In [1]:
import numpy as np
import pandas as pd
import datetime

In [2]:
path = '../raw_data/urbanpop_1960-2019_yearly.csv'

df = pd.read_csv(path)
df.dropna(how='any', inplace=True)
df.rename(columns={ 'Indicator Code': 'Year', 'SP.URB.TOTL': 'pop' }, inplace=True)
df['Year'] = df['Year'].astype(int)
df.set_index('Year', inplace=True)
df.index = pd.to_datetime(df.index, format='%Y')
df

Unnamed: 0_level_0,pop
Year,Unnamed: 1_level_1
1960-01-01,126462473.0
1961-01-01,129276215.0
1962-01-01,131988693.0
1963-01-01,134615404.0
1964-01-01,137215986.0
1965-01-01,139663053.0
1966-01-01,142008703.0
1967-01-01,144288757.0
1968-01-01,146463196.0
1969-01-01,148629124.0


In [3]:
current = datetime.datetime(2021, 1, 17)
dates = []

while current > df.index[0]:
    if current < df.index[-1] + datetime.timedelta(weeks=4):
        dates.append(current)
    current -= datetime.timedelta(days=7)

dates.reverse()
dates

[datetime.datetime(1960, 1, 3, 0, 0),
 datetime.datetime(1960, 1, 10, 0, 0),
 datetime.datetime(1960, 1, 17, 0, 0),
 datetime.datetime(1960, 1, 24, 0, 0),
 datetime.datetime(1960, 1, 31, 0, 0),
 datetime.datetime(1960, 2, 7, 0, 0),
 datetime.datetime(1960, 2, 14, 0, 0),
 datetime.datetime(1960, 2, 21, 0, 0),
 datetime.datetime(1960, 2, 28, 0, 0),
 datetime.datetime(1960, 3, 6, 0, 0),
 datetime.datetime(1960, 3, 13, 0, 0),
 datetime.datetime(1960, 3, 20, 0, 0),
 datetime.datetime(1960, 3, 27, 0, 0),
 datetime.datetime(1960, 4, 3, 0, 0),
 datetime.datetime(1960, 4, 10, 0, 0),
 datetime.datetime(1960, 4, 17, 0, 0),
 datetime.datetime(1960, 4, 24, 0, 0),
 datetime.datetime(1960, 5, 1, 0, 0),
 datetime.datetime(1960, 5, 8, 0, 0),
 datetime.datetime(1960, 5, 15, 0, 0),
 datetime.datetime(1960, 5, 22, 0, 0),
 datetime.datetime(1960, 5, 29, 0, 0),
 datetime.datetime(1960, 6, 5, 0, 0),
 datetime.datetime(1960, 6, 12, 0, 0),
 datetime.datetime(1960, 6, 19, 0, 0),
 datetime.datetime(1960, 6, 26, 

In [4]:
urban_pop = dict()
i = 0

while i < len(dates):
    cur = dates[i]
    weeks = 0
    
    for j in range(50, 54):  # On average, there are 52.14 weeks in on year
        nxt = cur + datetime.timedelta(weeks=j)
        if nxt.year != cur.year:
            weeks = j
            break
            
    assert(weeks != 0)
    
    start_time = datetime.datetime(cur.year, 1, 1)
    nxt = cur + datetime.timedelta(weeks=weeks)
    end_time = datetime.datetime(nxt.year, 1, 1)
    
    if end_time > df.index[-1]:
        break
        
    up_s = df.loc[start_time, 'pop']
    up_e = df.loc[end_time, 'pop']
    incr = (up_e - up_s) / weeks
    
    for j in range(weeks):
        #prmileage.append(prm_s + j * incr)
        up = up_s + j * incr
        date = datetime.datetime(cur.year + 1, cur.month, cur.day) + datetime.timedelta(weeks=j)
        urban_pop[date] = up
        
    i += weeks
    
urban_pop

{datetime.datetime(1961, 1, 3, 0, 0): 126462473.0,
 datetime.datetime(1961, 1, 10, 0, 0): 126516583.42307693,
 datetime.datetime(1961, 1, 17, 0, 0): 126570693.84615384,
 datetime.datetime(1961, 1, 24, 0, 0): 126624804.26923077,
 datetime.datetime(1961, 1, 31, 0, 0): 126678914.6923077,
 datetime.datetime(1961, 2, 7, 0, 0): 126733025.11538461,
 datetime.datetime(1961, 2, 14, 0, 0): 126787135.53846154,
 datetime.datetime(1961, 2, 21, 0, 0): 126841245.96153846,
 datetime.datetime(1961, 2, 28, 0, 0): 126895356.38461539,
 datetime.datetime(1961, 3, 7, 0, 0): 126949466.8076923,
 datetime.datetime(1961, 3, 14, 0, 0): 127003577.23076923,
 datetime.datetime(1961, 3, 21, 0, 0): 127057687.65384616,
 datetime.datetime(1961, 3, 28, 0, 0): 127111798.07692307,
 datetime.datetime(1961, 4, 4, 0, 0): 127165908.5,
 datetime.datetime(1961, 4, 11, 0, 0): 127220018.92307693,
 datetime.datetime(1961, 4, 18, 0, 0): 127274129.34615384,
 datetime.datetime(1961, 4, 25, 0, 0): 127328239.76923077,
 datetime.datetim

In [5]:
res = pd.DataFrame({ 'Week': urban_pop.keys(), 'urban_pop': urban_pop.values() })
res

Unnamed: 0,Week,urban_pop
0,1961-01-03,1.264625e+08
1,1961-01-10,1.265166e+08
2,1961-01-17,1.265707e+08
3,1961-01-24,1.266248e+08
4,1961-01-31,1.266789e+08
...,...,...
3074,2019-12-02,2.704762e+08
3075,2019-12-09,2.705136e+08
3076,2019-12-16,2.705509e+08
3077,2019-12-23,2.705883e+08


In [6]:
res.to_csv("../cleansed_data/US_urban_population_61_19.csv", index=False)