In [1]:
# Import dependencies
import pandas as pd
%matplotlib notebook 
import matplotlib.pyplot as plt

In [2]:
# Import merged_resedential_electricity_income.csv
file = 'HEAT_mean_2010_merged_income.csv'
income_therm_mean = pd.read_csv(file)

In [3]:
# Group income_kwh_mean by 'COMMUNITY AREA NAME'
area_income_mean = income_therm_mean.groupby('COMMUNITY AREA NAME').mean()

In [4]:
# Reset index of area_income_mean
# Clean up DataFrame and drop 'Unnamed: 0' column
area_income_mean.reset_index(inplace=True)
area_income_mean.drop(columns='Unnamed: 0', inplace=True)
area_income_mean.sort_values(by='PER CAPITA INCOME', inplace=True)

# Find the min and max of the mean per capita incomes of each neighborhood
area_income_mean_sum = area_income_mean.describe()
area_income_mean_sum

Unnamed: 0,PER CAPITA INCOME,THERM MEAN 2010,TOTAL POPULATION
count,77.0,77.0,77.0
mean,25106.74026,3214.85879,158.777985
std,14952.672297,6078.637448,306.343074
min,8535.0,1295.355759,45.851685
25%,15467.0,1664.790658,77.282932
50%,20489.0,1952.904097,99.049479
75%,29026.0,2624.55065,142.782087
max,87163.0,53087.466667,2677.166667


In [5]:
# Create bins to group results into per capita income brackets
bins = [0, 10000, 20000, 30000, 40000, 50000, 60000, 70000, 80000, 90000]

# Create names for the bins
incomes = ["0 to 10k", "10k to 20k", "20k to 30k", "30k to 40k",
           "40k to 50k", "50k to 60k", "60k to 70k", "70k to 80k",
           "80k to 90k"]

In [6]:
# Slice area_income_mean and place it into income bins
# Place data series into a new column inside of area_income_mean
area_income_mean['PER CAPITA INCOME GROUPS'] = pd.cut(area_income_mean["PER CAPITA INCOME"], bins, labels=incomes)

In [7]:
# Make note of the overview of 'THERM MEAN 2010' for all neighborhoods
area_income_mean['THERM MEAN 2010'].describe()

count       77.000000
mean      3214.858790
std       6078.637448
min       1295.355759
25%       1664.790658
50%       1952.904097
75%       2624.550650
max      53087.466667
Name: THERM MEAN 2010, dtype: float64

In [8]:
# Group area_income_mean to reflect income groups
per_capita_income_grp = area_income_mean.groupby("PER CAPITA INCOME GROUPS").mean()

# Check per_capita_income_grp DataFrame
per_capita_income_grp = per_capita_income_grp[['THERM MEAN 2010']]
per_capita_income_grp

Unnamed: 0_level_0,THERM MEAN 2010
PER CAPITA INCOME GROUPS,Unnamed: 1_level_1
0 to 10k,1898.48929
10k to 20k,2096.748075
20k to 30k,2514.008691
30k to 40k,3065.539731
40k to 50k,2155.695061
50k to 60k,2539.799858
60k to 70k,31730.568636
70k to 80k,3102.251293
80k to 90k,8935.582759


In [9]:
# Reset index of per_capita_incom_grp
# (will combine with other DataFrams later)
# Make note of how 'THERM MEAN 2010' changed for income brackets
per_capita_income_grp.reset_index(inplace=True)
per_capita_income_grp['THERM MEAN 2010'].describe()

count        9.000000
mean      6448.742599
std       9726.491212
min       1898.489290
25%       2155.695061
50%       2539.799858
75%       3102.251293
max      31730.568636
Name: THERM MEAN 2010, dtype: float64

In [10]:
# Check neighborhood count per income bracket
count_per_capita_income = area_income_mean.groupby("PER CAPITA INCOME GROUPS").count()

# Just take one column from count_per_capita_income to get count
count_per_capita_income = count_per_capita_income[['PER CAPITA INCOME']]

# Reset index to merge DataFrame later
# Rename column to reflect actual data
count_per_capita_income.reset_index(inplace=True)
count_per_capita_income.rename(columns={'PER CAPITA INCOME': 'NEIGHBORHOOD COUNT'}, inplace=True)
count_per_capita_income.head(2)

Unnamed: 0,PER CAPITA INCOME GROUPS,NEIGHBORHOOD COUNT
0,0 to 10k,2
1,10k to 20k,34


In [11]:
# Check total population
# Taking the sum of total population within income brackets
## from the mean total population per community area
total_population = area_income_mean.groupby("PER CAPITA INCOME GROUPS").sum()

# Only select 'TOTAL POPULATION' column
# Reset indext, so DataFrame can be merged later
# Check total_population DataFrame
total_population = total_population[['TOTAL POPULATION']]
total_population.reset_index(inplace=True)
total_population.head(2)

Unnamed: 0,PER CAPITA INCOME GROUPS,TOTAL POPULATION
0,0 to 10k,218.338134
1,10k to 20k,3521.236517


In [12]:
# Combine DataFrames:
# per_capita_income_grp, count_per_capita_income, total_population

summary_meanKWH_residential = per_capita_income_grp
summary_meanKWH_residential['NEIGHBORHOOD COUNT'] = count_per_capita_income['NEIGHBORHOOD COUNT']
summary_meanKWH_residential['MEAN TOTAL POPULATION'] = total_population['TOTAL POPULATION']

summary_meanKWH_residential

Unnamed: 0,PER CAPITA INCOME GROUPS,THERM MEAN 2010,NEIGHBORHOOD COUNT,MEAN TOTAL POPULATION
0,0 to 10k,1898.48929,2,218.338134
1,10k to 20k,2096.748075,34,3521.236517
2,20k to 30k,2514.008691,23,2488.405846
3,30k to 40k,3065.539731,9,1416.560965
4,40k to 50k,2155.695061,3,292.398094
5,50k to 60k,2539.799858,2,247.956346
6,60k to 70k,31730.568636,2,3322.80303
7,70k to 80k,3102.251293,1,167.70344
8,80k to 90k,8935.582759,1,550.502463


In [15]:
# Create bar plot from summary_meanKWH_residential 
# Compare income brackets to THERM MEAN 2010

# Create plot for per_capita_income_grp 
x = 'PER CAPITA INCOME GROUPS'
y = 'THERM MEAN 2010'
title = 'Chicago - Avg Natural Gas Usage 2010, Grouped by Per Capita Income Brackets'
per_capita_income_grp.plot(kind='bar', x=x, y=y, figsize=(7,10),
                           rot=40, title=title)

# Add y-axix lines to make reading graph easier
plt.gca().yaxis.grid(True, linestyle='--')

# Adjust location of legend
plt.legend(loc='upper left')

# y Label
# 1 Therm = 100,000 British Thermal Units (BTUs)
# Source: https://accel.peoplesgasdelivery.com/home/gas_rates.aspx 
plt.ylabel('Natural Gas Used (therms)')
plt.tight_layout

<IPython.core.display.Javascript object>

<function matplotlib.pyplot.tight_layout(pad=1.08, h_pad=None, w_pad=None, rect=None)>

In [16]:
# Check income brackets of interest for community area names and PCI
for index, row in area_income_mean.iterrows():
    income = row['PER CAPITA INCOME']
    area = row['COMMUNITY AREA NAME']
    if income < 70000 and income > 60000:
        print(f"Community areas with PCI 60k to 70k")
        print(f"{area} per capita income is {income}")
        print(f"-------------------------------------")
    if income > 80000:
        print(f"Community areas with PCI >80k")
        print(f"{area} per capita income is {income}")
        print(f"-------------------------------------")
    if income < 10000:
        print(f"Community areas with PCI <10k")
        print(f"{area} per capita income is {income}")
        print(f"-------------------------------------")

Community areas with PCI <10k
Riverdale per capita income is 8535.0
-------------------------------------
Community areas with PCI <10k
Fuller Park per capita income is 9016.0
-------------------------------------
Community areas with PCI 60k to 70k
Near South Side per capita income is 60593.0
-------------------------------------
Community areas with PCI 60k to 70k
Loop per capita income is 67699.0
-------------------------------------
Community areas with PCI >80k
Near North Side per capita income is 87163.0
-------------------------------------


In [17]:
# Save figure
plt.savefig('../Graphs/HEAT_Avg_Per_Capita_Income.png')