In [None]:
import numpy as np
import pandas as pd
import matplotlib
import datetime
import time
from itertools import islice

In [None]:
def parse_timestamp(el):
    b = datetime.datetime.strptime(el, fmt)
    return b

In [None]:
fmt = '%d.%m.%Y'

In [None]:
def days_between(d1, d2):
    return abs((d2 - d1).days)

In [None]:
# Importing data on countermeasures
url = "countermeasures.csv"
df = pd.read_csv(url, sep=';', low_memory=False)

In [None]:
pd.set_option('display.max_columns()', None)

In [None]:
df.columns

In [None]:
df.drop(['amusment_venues', 'administrative_fine', 'shopping_malls', 'work'], axis=1, inplace=True)

In [None]:
# Adding a row at the begining of the data frame. Data will start from January 2020
df.loc[-1] = ['01.01.2020', 'open', 'open', 'open', 'open', 'allowed',
              'allowed', 'allowed', 'no', 'allowed', 'allowed', 'open', 
              'open', 'open', 'open', 'not_mandatory', 'no']  # adding a row
df.index = df.index + 1  # shifting index
df = df.sort_index()  # sorting by index

In [None]:
# Adding a row at the end of the data frame. Data will end with July 2021
df.loc[42] = ['31.07.2021', 'openw/meas', 'openw/meas', 'openw/tst_vacc', 'openw/lim_ppl', 'sport_1000',
              'allowed_1000max', 'allowed_100pplw/tst_imm', 'yes', 'allowed', 'ra_testM_vac_10dqua', 'open', 
              'open', 'openw/limit', 'limited_visitsw/mask_test', 'mandatory_medical', 'no']

In [None]:
df['entry_into_force'] = df['entry_into_force'].map(parse_timestamp)

In [None]:
print(df['entry_into_force'][0].day)
print(df['entry_into_force'][0].month)
print(df['entry_into_force'][0].year)

In [None]:
# Creating a new data frame that will be filled with regulations for all dates between Jan 2020 and July 2021
new_df = pd.DataFrame(columns= ['entry_into_force', 'cultural_venues', 'gastronomy',
       'essential_retail', 'non-essential_retail', 'major_events',
       'events_and_gatherings', 'public_gatherings', 'traceability', 'tourism',
       'traveling', 'school_facilities',
       'university_facilities', 'sport_facilities', 'medical_facilities', 'mouth_and_nose_covering', 'curfew'] ) #empty dataframe

In [None]:
row_a = df.iloc[0]
new_df = new_df.append(row_a)

In [None]:
for index, row in islice(df.iterrows(), 1, None):
    n = days_between(row_a['entry_into_force'], row['entry_into_force'])
    for i in range(1,n):        
        new_df = new_df.append({ 'entry_into_force' : row_a['entry_into_force'] + datetime.timedelta(days=1), 
                'cultural_venues' : row_a['cultural_venues'], 
                'gastronomy' : row_a['gastronomy'], 'essential_retail' : row_a['essential_retail'], 
                'non-essential_retail' : row_a['non-essential_retail'], 'major_events' : row_a['major_events'], 
                'events_and_gatherings' : row_a['events_and_gatherings'], 
                'public_gatherings' : row_a['public_gatherings'], 'traceability' : row_a['traceability'], 
                'tourism' : row_a['tourism'],
                'traveling' : row_a['traveling'], 
                'school_facilities' : row_a['school_facilities'],
                'university_facilities' : row_a['university_facilities'], 'sport_facilities' : row_a['sport_facilities'], 
                'medical_facilities' : row_a['medical_facilities'],
                'mouth_and_nose_covering' : row_a['mouth_and_nose_covering'], 
                'curfew' : row_a['curfew']}, ignore_index=True)
        row_a = new_df.iloc[-1]
    new_df = new_df.append(row)
    row_a = row

In [None]:
new_df.reset_index(drop= True, inplace=True)

In [None]:
new_df.loc[70:120]

In [None]:
#renaming a column to a simple name data for future merging of two data sets
new_df = new_df.rename({'entry_into_force':'date'},axis=1)

In [None]:
# Importing data on infection numbers -daily new infections format
url2 = "Cologne numbers.csv"
df_num = pd.read_csv(url2, sep=',', low_memory=False)

In [None]:
df_num = df_num.iloc[::-1]
df_num.reset_index(drop=True, inplace=True)

In [None]:
# Adding a row at the begining of the data frame. Data will start from January 2020
df_num.loc[-1] = ['01.01.2020', '0']  # adding a row
df_num.index = df_num.index + 1  # shifting index
df_num = df_num.sort_index()  # sorting by index

In [None]:
df_num['new_infections'][2]

In [None]:
df_num['new_infections'] = df_num['new_infections'].replace('-', 0)

In [None]:
df_num['new_infections'].value_counts(dropna=False)

In [None]:
def to_integer(s):
    value = int(s)
    return value

In [None]:
df_num['new_infections'] = df_num['new_infections'].map(to_integer)

In [None]:
df_num['date'] = df_num['date'].map(parse_timestamp)

In [None]:
type(df_num['date'][0])

In [None]:
new_num = pd.DataFrame(columns= ['date', 'new_infections'])

In [None]:
row_b = df_num.iloc[0]
new_num = new_num.append(row_b)

In [None]:
for index, row in islice(df_num.iterrows(), 1, None):
    n = days_between(row_b['date'], row['date'])
    for i in range(1,n):        
        new_num = new_num.append({'date' : row_b['date'] + datetime.timedelta(days=1), 
                'new_infections' : row_b['new_infections']}, ignore_index=True)
        row_b = new_num.iloc[-1]
    new_num = new_num.append(row)
    row_b = row

In [None]:
df_num[:20]

In [None]:
new_num.reset_index(drop=True, inplace=True)

In [None]:
new_num.head()

In [None]:
#merging two data frames to have new infection numbers together with countermeasures data
complete_df = pd.merge(new_df, new_num, on='date', how='outer')

In [None]:
complete_df.columns

In [None]:
complete_df.to_csv('covid19_unified_set.csv', sep=';', index=False)