In [1]:
import datetime

import numpy as np
import pandas as pd
import geopandas as gpd

import plotly.express as px

px.set_mapbox_access_token("pk.eyJ1IjoiamdhY29zdGFzIiwiYSI6ImNsYWJwd3g1ZDAwaGUzb3Q0ZG04NDNndGgifQ.brk6kVA6biVSH0ovZ1dreA")

In [2]:
df_data = pd.read_csv('data/comed_month/comed_201801.csv')
df_data['date_time'] = pd.to_datetime(df_data.date_time)

df_data = df_data.groupby(['zip5', 'date_time']).energy.sum().reset_index()
df_data['weekday'] = df_data.date_time.dt.weekday
gdf_zc = gpd.read_file('data/geo/Chicago_ZC.geojson')
gdf_zc['GEOID20'] = gdf_zc['GEOID20'].astype(int)

gdf_data = pd.merge(gdf_zc, df_data, left_on='GEOID20', right_on='zip5')
gdf_data = gdf_data.groupby(['zip5', 'weekday']).sum().reset_index()
gdf_data['zip5'] = gdf_data['zip5'].astype(str)

df_acs = pd.read_csv('data/census/Census_Clean_Zip5_IL_Sex_Age_Ethnicity_2018.csv')
dict_names = dict(zip(df_acs.columns, df_acs.loc[0].values))
df_acs = df_acs.drop(0)
df_acs['zip5'] = df_acs.NAME.str[6:]

gdf_data = pd.merge(gdf_data, df_acs, on='zip5')

gdf_counties = gpd.read_file('data/geo/US_counties.json')


FileNotFoundError: [Errno 2] No such file or directory: 'data/comed_month/comed_201801.csv'

In [None]:
df_plot = gdf_data[['zip5', 'weekday', 'energy', 'DP05_0001E', 'DP05_0001M']].astype(int)
df_plot['type_day'] = np.where(df_plot.weekday<=4, 'workday', 'weekend')
#df_plot['type_day'] = np.where(df_plot.weekday==6, 'sunday', df_plot['type_day'])
df_plot = df_plot.groupby(['zip5', 'type_day']).agg({
    'energy': 'sum',
    'DP05_0001E': 'sum',
    'weekday': 'count'
}).reset_index()

gdf_zc_c = gdf_zc.sjoin(gdf_counties, how='left')
city_zc = gdf_zc_c.query('NAME=="Cook"').GEOID20.values

df_plot['energy_pp'] = df_plot.energy/df_plot.DP05_0001E
df_plot['Class'] = np.where(df_plot.zip5.isin(city_zc), 'Urban', 'Suburban')

df_plot['energy_pp_pd'] = df_plot.energy_pp/df_plot.weekday
df_plot['zip5'] = df_plot['zip5'].astype(str)

In [None]:
fig = px.scatter(df_plot, x=df_plot.index, y='energy_pp_pd',  color='type_day', symbol='type_day',
                 width=800, height=800, hover_data=['zip5'])
fig.show()

In [None]:
fig = px.bar(df_plot, x='zip5', y='energy_pp_pd',  color='type_day',width=1600, height=800, hover_data=['zip5'])
fig.show()

In [None]:
df_plot = gdf_data[['zip5', 'weekday', 'energy', 'DP05_0001E', 'DP05_0001M']].astype(int)
df_plot = df_plot.groupby(['zip5']).agg({
    'energy': 'sum',
    'DP05_0001E': 'sum',
}).reset_index()

gdf_zc_c = gdf_zc.sjoin(gdf_counties, how='left')
city_zc = gdf_zc_c.query('NAME=="Cook"').GEOID20.values

df_plot['energy_pp'] = df_plot.energy/df_plot.DP05_0001E
df_plot['Class'] = np.where(df_plot.zip5.isin(city_zc), 'Urban', 'Suburban')


In [None]:
fig = px.choropleth_mapbox(df_plot,
                   geojson=gdf_zc,
                   featureidkey='properties.GEOID20',
                   color="energy_pp",
                   locations='zip5',
                   width=1000,
                   height=700,
                   center={'lat':41.6, 'lon':-88.99},
                   zoom=6,
                   range_color=(0, 30),
                   mapbox_style='carto-positron',
                   opacity=0.5,
                   )
fig.update_geos(fitbounds="locations", visible=False)
fig.show()
#fig.write_html('20180131_animation.html')

In [None]:
fig = px.choropleth_mapbox(df_plot,
                   geojson=gdf_zc,
                   featureidkey='properties.GEOID20',
                   color="energy_pp",
                   locations='zip5',
                   width=1000,
                   height=700,
                   center={'lat':41.6, 'lon':-88.99},
                   zoom=6,
                   range_color=(0, 60),
                   mapbox_style='carto-positron',
                   opacity=0.5,
                   )
fig.update_geos(fitbounds="locations", visible=False)
fig.show()

In [None]:
for i in dict_names.keys():
    print(i, dict_names[i])


## Energy consumption per person by age range

In [None]:
columns_age = ["DP05_0005E","DP05_0006E","DP05_0007E","DP05_0008E","DP05_0009E","DP05_0010E","DP05_0011E","DP05_0012E","DP05_0013E","DP05_0014E","DP05_0015E","DP05_0016E","DP05_0017E"]

df_plot = gdf_data[['zip5', 'energy', "DP05_0001E"]+columns_age].astype(int)
df_plot = df_plot.groupby('zip5').sum().reset_index()

for col in columns_age:
    df_plot[col] = df_plot[col]/df_plot.DP05_0001E

df_plot['energy_pp'] = df_plot.energy/df_plot.DP05_0001E

df_plot = pd.melt(df_plot, id_vars=['zip5', 'energy_pp'], value_vars=columns_age, var_name='age_range', value_name='people')

df = px.data.iris()
fig = px.scatter(df_plot, x="people", y="energy_pp", color="age_range",
                  hover_data=['zip5'])
fig.show()

## Energy consumption per person by sex

In [None]:
columns_sex = ["DP05_0002E","DP05_0003E"]

df_plot = gdf_data[['zip5', 'energy', "DP05_0001E"]+columns_sex].astype(int)
df_plot = df_plot.groupby('zip5').sum().reset_index()

for col in columns_sex:
    df_plot[col] = df_plot[col]/df_plot.DP05_0001E

df_plot['energy_pp'] = df_plot.energy/df_plot.DP05_0001E

df_plot = pd.melt(df_plot, id_vars=['zip5', 'energy_pp'], value_vars=columns_sex, var_name='sex', value_name='people')

df = px.data.iris()
fig = px.scatter(df_plot, x="people", y="energy_pp", color="sex",
                  hover_data=['zip5'])
fig.show()

## Energy consumption per person by (limited) age range and sex

In [None]:
columns_sex_age = ["DP05_0026E","DP05_0027E", "DP05_0030E","DP05_0031E"]

df_plot = gdf_data[['zip5', 'energy', "DP05_0001E"]+columns_sex_age].astype(int)
df_plot = df_plot.groupby('zip5').sum().reset_index()

for col in columns_sex_age:
    df_plot[col] = df_plot[col]/df_plot.DP05_0001E

df_plot['energy_pp'] = df_plot.energy/df_plot.DP05_0001E

df_plot = pd.melt(df_plot, id_vars=['zip5', 'energy_pp'], value_vars=columns_sex_age, var_name='sex_age', value_name='people')

df = px.data.iris()
fig = px.scatter(df_plot, x="people", y="energy_pp", color="sex_age",
                  hover_data=['zip5'])
fig.show()

## Energy consumption per person by etnicity/race

In [None]:
columns_race = ["DP05_0037E", 'DP05_0038E', 'DP05_0039E', 'DP05_0044E', 'DP05_0052E', 'DP05_0057E']

df_plot = gdf_data[['zip5', 'energy', "DP05_0001E"]+columns_race].astype(int)
df_plot = df_plot.groupby('zip5').sum().reset_index()

for col in columns_race:
    df_plot[col] = df_plot[col]/df_plot.DP05_0001E

df_plot['energy_pp'] = df_plot.energy/df_plot.DP05_0001E

df_plot = pd.melt(df_plot, id_vars=['zip5', 'energy_pp'], value_vars=columns_race, var_name='race', value_name='people')

df = px.data.iris()
fig = px.scatter(df_plot, x="people", y="energy_pp", color="race",
                  hover_data=['zip5'])
fig.show()

In [None]:
df_data = pd.read_csv('data/comed_month/comed_201801.csv')
df_data['date_time'] = pd.to_datetime(df_data.date_time)

df_data = df_data.groupby(['zip5', 'date_time']).energy.sum().reset_index()
df_data['weekday'] = df_data.date_time.dt.weekday
gdf_zc = gpd.read_file('data/geo/Chicago_ZC.geojson')
gdf_zc['GEOID20'] = gdf_zc['GEOID20'].astype(int)

gdf_data = pd.merge(gdf_zc, df_data, left_on='GEOID20', right_on='zip5')
gdf_data = gdf_data.groupby(['zip5', 'weekday']).sum().reset_index()
gdf_data['zip5'] = gdf_data['zip5'].astype(str)

df_acs = pd.read_csv('data/census/Census_Clean_Zip5_IL_Household&Family_Married&Nonmarried_Income_2018.csv')
dict_names = dict(zip(df_acs.columns, df_acs.loc[0].values))
df_acs = df_acs.drop(0)
df_acs['zip5'] = df_acs.NAME.str[6:]

gdf_data = pd.merge(gdf_data, df_acs, on='zip5')

gdf_counties = gpd.read_file('data/geo/US_counties.json')

## Energy consumption per person by income

In [None]:
columns_income = ['S1901_C01_012E', 'S1901_C01_013E']

df_pop = pd.read_csv('data/census/Census_Clean_Zip5_IL_Sex_Age_Ethnicity_2018.csv')
df_pop = df_pop.drop(0)
df_pop['zip5'] = df_pop.NAME.str[6:].astype(int)

df_plot = gdf_data[['zip5', 'energy']+columns_income].astype(int)
df_plot = pd.merge(df_plot, df_pop[['zip5', 'DP05_0001E']], on='zip5').astype(int)
df_plot['energy_pp'] = df_plot.energy/df_plot.DP05_0001E
df_plot = df_plot.groupby('zip5').mean().reset_index()



df_plot = pd.melt(df_plot, id_vars=['zip5', 'energy_pp'], value_vars=columns_income, var_name='income', value_name='people')

df = px.data.iris()
fig = px.scatter(df_plot, x="people", y="energy_pp", color="income",
                  hover_data=['zip5'])
fig.show()

## Energy consumption per person by education level

In [None]:
df_data = pd.read_csv('data/comed_month/comed_201801.csv')
df_data['date_time'] = pd.to_datetime(df_data.date_time)

df_data = df_data.groupby(['zip5', 'date_time']).energy.sum().reset_index()
df_data['weekday'] = df_data.date_time.dt.weekday
gdf_zc = gpd.read_file('data/geo/Chicago_ZC.geojson')
gdf_zc['GEOID20'] = gdf_zc['GEOID20'].astype(int)

gdf_data = pd.merge(gdf_zc, df_data, left_on='GEOID20', right_on='zip5')
gdf_data = gdf_data.groupby(['zip5', 'weekday']).sum().reset_index()
gdf_data['zip5'] = gdf_data['zip5'].astype(str)

df_acs = pd.read_csv('data/census/Census_Clean_Zip5_IL_EducationLevel_byAge_byIncome_Ethnicity_bySex.csv')
dict_names = dict(zip(df_acs.columns, df_acs.loc[0].values))
df_acs = df_acs.drop(0)
df_acs['zip5'] = df_acs.NAME.str[6:]

gdf_data = pd.merge(gdf_data, df_acs, on='zip5')

gdf_counties = gpd.read_file('data/geo/US_counties.json')

In [None]:
columns_education = ['S1501_C01_002E', 'S1501_C01_003E', 'S1501_C01_004E', 'S1501_C01_005E', 'S1501_C01_007E', 
                     'S1501_C01_008E', 'S1501_C01_009E', 'S1501_C01_010E', 'S1501_C01_011E', 'S1501_C01_012E',
                     'S1501_C01_013E', 'S1501_C01_014E', 'S1501_C01_015E']

df_pop = pd.read_csv('data/census/Census_Clean_Zip5_IL_Sex_Age_Ethnicity_2018.csv')
df_pop = df_pop.drop(0)
df_pop['zip5'] = df_pop.NAME.str[6:].astype(int)

df_plot = gdf_data[['zip5', 'energy']+columns_education].astype(int)

df_plot = df_plot.groupby('zip5').mean().reset_index()

df_plot = pd.merge(df_plot, df_pop[['zip5', 'DP05_0001E']], on='zip5').astype(int)
df_plot['energy_pp'] = df_plot.energy/df_plot.DP05_0001E

for col in columns_education:
    df_plot[col] = df_plot[col]/df_plot.DP05_0001E


df_plot = pd.melt(df_plot, id_vars=['zip5', 'energy_pp'], value_vars=columns_education, var_name='education', value_name='people')

df = px.data.iris()
fig = px.scatter(df_plot, x="people", y="energy_pp", color="education",
                  hover_data=['zip5'])
fig.show()

In [None]:
df_plot