https://docs.safegraph.com/docs/open-census-data#section-census-demographic-data

In [1]:
import os
from requests import get
import pandas as pd
import geopandas as gpd
from timeit import default_timer as timer
import numpy as np

In [2]:
path_to_data='/scratch/spf248/covid/data'

# MX

In [3]:
country='MX'

In [4]:
print('Load data')
start = timer()
data=pd.read_csv(os.path.join(path_to_data,'admin',country,'data.csv'),dtype='object')
shp=gpd.read_file(os.path.join(path_to_data,'admin',country,'MX_4326.shp'))
admin2city=pd.read_excel(os.path.join(path_to_data,'admin',country,'city2adminGHSL.xlsx'))
print("Done in", round(timer()-start), "sec")

Load data
Done in 27 sec


In [5]:
print('Prepare data')
start = timer()

df=shp.merge(data,on='ID_n',how='left').merge(admin2city.drop_duplicates('ID_n'),on='ID_n',how='left')
df['pct_65_mas']=df['pob65_mas'].astype(float).divide(df['pobtot'].astype(float))
df=df.rename(columns={
'ID_n':'id_',
'pobtot':'pop',
'index_pca':'wealth_index',
'ENTIDAD':'entidad_code',
'NOM_ENT':'entidad_name',
'MUN':'mun_code',
'NOM_MUN':'entidad_name',
'LOC':'loc_code',
'NOM_LOC':'loc_name',
}).drop(['pca','pob65_mas','pob15_64'],1)

df.rename(columns=lambda x:x.upper() if x!='geometry' else x,inplace=True)
for col in ['POP','AREA_KM2','POP_DENS','WEALTH_INDEX']:
    df[col]=df[col].astype(float)

print("Done in", round(timer()-start), "sec")

Prepare data
Done in 2 sec


In [6]:
print('Save data')
start = timer()
df.to_file(os.path.join(path_to_data,'admin',country,'admin.geojson'), driver='GeoJSON')
df.to_csv(os.path.join(path_to_data,'admin',country,'admin.csv'), index=False, header=True)
print("Done in", round(timer()-start), "sec")

Save data
Done in 207 sec


In [7]:
df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 345116 entries, 0 to 345115
Data columns (total 15 columns):
 #   Column        Non-Null Count   Dtype   
---  ------        --------------   -----   
 0   ID_           345116 non-null  object  
 1   geometry      345116 non-null  geometry
 2   POP           238282 non-null  float64 
 3   AREA_KM2      238282 non-null  float64 
 4   POP_DENS      238282 non-null  float64 
 5   WEALTH_INDEX  238282 non-null  float64 
 6   ENTIDAD_CODE  238282 non-null  object  
 7   ENTIDAD_NAME  238282 non-null  object  
 8   MUN_CODE      238282 non-null  object  
 9   ENTIDAD_NAME  238282 non-null  object  
 10  LOC_CODE      238282 non-null  object  
 11  LOC_NAME      238282 non-null  object  
 12  UC_NM_MN      34209 non-null   object  
 13  UC_NM_LST     34209 non-null   object  
 14  PCT_65_MAS    235627 non-null  float64 
dtypes: float64(5), geometry(1), object(9)
memory usage: 42.1+ MB


In [8]:
df.head()

Unnamed: 0,ID_,geometry,POP,AREA_KM2,POP_DENS,WEALTH_INDEX,ENTIDAD_CODE,ENTIDAD_NAME,MUN_CODE,ENTIDAD_NAME.1,LOC_CODE,LOC_NAME,UC_NM_MN,UC_NM_LST,PCT_65_MAS
0,50230309,"POLYGON ((-102.80543 28.20811, -102.80212 28.1...",,,,,,,,,,,,,
1,80470057,"POLYGON ((-108.57084 28.04528, -108.58444 27.9...",1.0,19.834613,0.050417,0.492315,8.0,Chihuahua,47.0,Moris,57.0,Los Lobos,,,0.0
2,240150002,"POLYGON ((-101.00987 23.10039, -101.01985 23.0...",574.0,12.837316,44.713398,0.242564,24.0,San Luis Potosí,15.0,Charcas,2.0,Álvaro Obregón (Estación los Charcos),,,0.156794
3,270120004,"POLYGON ((-92.29374 17.75034, -92.29052 17.743...",664.0,11.723325,56.639224,0.195166,27.0,Tabasco,12.0,Macuspana,4.0,Alcalde Mayor,,,0.063253
4,201510005,"POLYGON ((-97.52340 17.97540, -97.52057 17.960...",,,,,,,,,,,,,


# ID

In [9]:
country='ID'

In [10]:
print('Load data')
start = timer()
data=pd.read_csv(os.path.join(path_to_data,'admin',country,'data.csv'),dtype='object')
shp=gpd.read_file(os.path.join(path_to_data,'admin',country,'ID_4326.shp'))
admin2city=pd.read_excel(os.path.join(path_to_data,'admin',country,'city2adminGHSL.xlsx'))
print("Done in", round(timer()-start), "sec")

Load data
Done in 12 sec


In [11]:
print('Prepare data')
start = timer()

df=shp[['ADM4_PCODE','geometry']].merge(data,on='ADM4_PCODE',how='left').merge(admin2city.drop_duplicates('ADM4_PCODE'),on='ADM4_PCODE',how='left')
df=df.rename(columns={'Pop':'pop','index':'wealth_index','a_km2':'area_km2'}).drop(['pca'],1)
df.rename(columns=lambda x:x.upper() if x!='geometry' else x,inplace=True)
for col in ['POP','AGE','AREA_KM2','POP_DENS','WEALTH_INDEX']:
    df[col]=df[col].astype(float)

print("Done in", round(timer()-start), "sec")

Prepare data
Done in 2 sec


In [12]:
print('Save data')
start = timer()
df.to_file(os.path.join(path_to_data,'admin',country,'admin.geojson'), driver='GeoJSON')
df.to_csv(os.path.join(path_to_data,'admin',country,'admin.csv'), index=False, header=True)
print("Done in", round(timer()-start), "sec")

Save data
Done in 217 sec


In [13]:
df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 81912 entries, 0 to 81911
Data columns (total 16 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   ADM4_PCODE    81912 non-null  object  
 1   geometry      81912 non-null  geometry
 2   AGE           72300 non-null  float64 
 3   WEALTH_INDEX  72300 non-null  float64 
 4   POP           72300 non-null  float64 
 5   AREA_KM2      72300 non-null  float64 
 6   POP_DENS      72300 non-null  float64 
 7   ADM4_EN       72300 non-null  object  
 8   ADM3_EN       72300 non-null  object  
 9   ADM3_PCODE    72300 non-null  object  
 10  ADM2_EN       72300 non-null  object  
 11  ADM2_PCODE    72300 non-null  object  
 12  ADM1_EN       72300 non-null  object  
 13  ADM1_PCODE    72300 non-null  object  
 14  UC_NM_MN      17194 non-null  object  
 15  UC_NM_LST     17198 non-null  object  
dtypes: float64(5), geometry(1), object(10)
memory usage: 10.6+ MB


In [14]:
df.head()

Unnamed: 0,ADM4_PCODE,geometry,AGE,WEALTH_INDEX,POP,AREA_KM2,POP_DENS,ADM4_EN,ADM3_EN,ADM3_PCODE,ADM2_EN,ADM2_PCODE,ADM1_EN,ADM1_PCODE,UC_NM_MN,UC_NM_LST
0,ID1671060006,"POLYGON ((104.82248 -2.98102, 104.82165 -2.985...",26.0,0.48757,5792.0,2.086247,2776.276967,1 Ilir,Ilir Timur II,ID1671060,Kota Palembang,ID1671,Sumatera Selatan,ID16,Palembang,Palembang
1,ID1671020008,"POLYGON ((104.75516 -3.01316, 104.75480 -3.013...",25.0,0.499906,12058.0,0.209298,57611.629454,1 Ulu,Seberang Ulu I,ID1671020,Kota Palembang,ID1671,Sumatera Selatan,ID16,Palembang,Palembang
2,ID1671062001,"POLYGON ((104.77081 -2.98185, 104.77082 -2.981...",,,,,,,,,,,,,Palembang,Palembang
3,ID1671062002,"POLYGON ((104.77081 -2.98185, 104.77070 -2.981...",,,,,,,,,,,,,Palembang,Palembang
4,ID1671030014,"POLYGON ((104.77317 -2.99340, 104.77316 -2.993...",24.0,0.409127,7543.0,0.227913,33096.015996,11 Ulu,Seberang Ulu II,ID1671030,Kota Palembang,ID1671,Sumatera Selatan,ID16,Palembang,Palembang
