In [1]:
import pandas as pd
import numpy as np
from epiweeks import Week, Year

In [2]:
PATH_POPULATION = 'data/Demographic/Population.csv'

In [3]:
# Function to convert dates to epiweeks
def date_to_epiweek(date):
    return Week.fromdate(date).startdate().strftime('%Y%W')

In [4]:
df = pd.read_csv(PATH_POPULATION, skiprows=80, names=['Date', 'population', 'variation'])[:-11]
df['Date'] = pd.to_datetime(df['Date'])
# Convert dates in the original dataframe to epiweeks
df['epiweek'] = df['Date'].apply(date_to_epiweek)
df


Unnamed: 0,Date,population,variation,epiweek
0,2014-12-31,12825000,0.9,201451
1,2015-12-31,12941000,0.9,201551
2,2016-12-31,13057000,0.9,201651
3,2017-12-31,13175000,0.9,201752
4,2018-12-31,13293000,0.9,201852
5,2019-12-31,13374000,0.61,201951
6,2020-12-31,13458000,0.63,202051
7,2021-12-31,13544000,0.64,202151
8,2022-12-31,13634000,0.66,202251
9,2023-12-31,13728000,0.69,202352


In [5]:
# Generate all dates from the first week of 2015 to just beyond the last week of 2023, by adding 7 days at a time
start_date = pd.to_datetime("2015-01-01")
end_date = pd.to_datetime("2023-12-31")

# Initialize list to hold all the dates
all_dates = []

# Start with the initial date and keep adding 7 days until we surpass the end date
current_date = start_date
while current_date <= end_date:
    all_dates.append(current_date)
    current_date += pd.Timedelta(days=7)

all_dates.append(end_date)

# convert all dates to epiweeks
all_epiweeks = [date_to_epiweek(date) for date in all_dates]

# Create a dataframe with all epiweeks and the corresponding dates
all_epiweeks_df = pd.DataFrame({'epiweek': all_epiweeks})
all_epiweeks_df.drop_duplicates(subset='epiweek', inplace=True)
all_epiweeks_df

Unnamed: 0,epiweek
0,201451
1,201500
2,201501
3,201502
4,201503
...,...
466,202348
467,202349
468,202350
469,202351


In [6]:

# Merge the population data with the epiweeks
df = all_epiweeks_df.merge(df, on='epiweek', how='left')[['epiweek', 'population', 'variation']]

# Interpolate the missing values for population and variation
df['population'] = df['population'].interpolate()
df['variation'] = df['variation'].interpolate()

df

Unnamed: 0,epiweek,population,variation
0,201451,1.282500e+07,0.900000
1,201500,1.282723e+07,0.900000
2,201501,1.282946e+07,0.900000
3,201502,1.283169e+07,0.900000
4,201503,1.283392e+07,0.900000
...,...,...,...
466,202348,1.372091e+07,0.687736
467,202349,1.372268e+07,0.688302
468,202350,1.372445e+07,0.688868
469,202351,1.372623e+07,0.689434


In [7]:
df.to_csv('data/Demographic/demographic.csv', index=False)