In [14]:
#Import modules
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy import Column, String, Integer
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
import pprint as pp


In [15]:
#due to api key techincal error, had to manually pull raw data from American Fact Finder
# pulling list of zip codes, total population by zip, population classfifed as poverty by zip
# import data
ACS_Poverty = pd.read_csv("fact_finder_raw_data/ACS_17_5YR_S1701_with_ann.csv", header=1,
                          usecols = ["Id","Geography","Below poverty level; Estimate; Population for whom poverty status is determined"])
ACS_Poverty.columns = ['Id', 'Geography','Total_Poverty']

ACS_Zip = pd.read_csv("fact_finder_raw_data/ACS_17_5YR_G001_with_ann.csv", header=1,
                      usecols = ["Id","Geography"])

ACS_Population = pd.read_csv("fact_finder_raw_data/ACS_17_5YR_B01003_with_ann.csv", header=1,
                             usecols = ["Id","Geography","Estimate; Total"])
ACS_Population.columns = ['Id', 'Geography','Total_Population']

In [16]:
ACS_Population

Unnamed: 0,Id,Geography,Total_Population
0,8600000US00601,ZCTA5 00601,17599
1,8600000US00602,ZCTA5 00602,39209
2,8600000US00603,ZCTA5 00603,50135
3,8600000US00606,ZCTA5 00606,6304
4,8600000US00610,ZCTA5 00610,27590
...,...,...,...
33115,8600000US99923,ZCTA5 99923,0
33116,8600000US99925,ZCTA5 99925,901
33117,8600000US99926,ZCTA5 99926,1684
33118,8600000US99927,ZCTA5 99927,59


In [17]:
ACS_Zip

Unnamed: 0,Id,Geography
0,8600000US00601,ZCTA5 00601
1,8600000US00602,ZCTA5 00602
2,8600000US00603,ZCTA5 00603
3,8600000US00606,ZCTA5 00606
4,8600000US00610,ZCTA5 00610
...,...,...
33115,8600000US99923,ZCTA5 99923
33116,8600000US99925,ZCTA5 99925
33117,8600000US99926,ZCTA5 99926
33118,8600000US99927,ZCTA5 99927


In [18]:
ACS_Poverty

Unnamed: 0,Id,Geography,Total_Poverty
0,8600000US00601,ZCTA5 00601,11282
1,8600000US00602,ZCTA5 00602,20428
2,8600000US00603,ZCTA5 00603,25176
3,8600000US00606,ZCTA5 00606,4092
4,8600000US00610,ZCTA5 00610,12553
...,...,...,...
33115,8600000US99923,ZCTA5 99923,0
33116,8600000US99925,ZCTA5 99925,210
33117,8600000US99926,ZCTA5 99926,224
33118,8600000US99927,ZCTA5 99927,49


In [19]:
#connect and create classes for postgresSQL tables
db_string = 'postgres+psycopg2://postgres:Maxpayne992#@localhost:5432/census'

db = create_engine(db_string)
base = declarative_base()

class Zip(base):
    __tablename__ = 'Zip'

    Id = Column(String, primary_key=True)
    Geography = Column(String)

class Population(base):
    __tablename__ = 'Population'

    Id = Column(String, primary_key=True)
    Geography = Column(String)
    Total_Population =  Column(Integer)

class Poverty(base):
    __tablename__ = 'Poverty'

    Id = Column(String, primary_key=True)
    Geography = Column(String)
    Total_Poverty = Column(Integer)

In [20]:
#establish sessions and tables
Session = sessionmaker(db)
session = Session()
base.metadata.create_all(db)

In [21]:
# insert data to postgre sql
ACS_Poverty.to_sql('Poverty', con=db, if_exists='append',index=False)
ACS_Population.to_sql('Population', con=db, if_exists='append',index=False)
ACS_Zip.to_sql('Zip', con=db, if_exists='append',index=False)

In [22]:
#query data to join Poverty & Population tables
q = session.query(Population.Id, Population.Geography,
                  Population.Total_Population,Poverty.Total_Poverty).join(Poverty, Population.Id == Poverty.Id).all()

In [23]:
# convert to df, and calculate the Poverty Rate
test_df = pd.DataFrame(q)
test_df['Poverty_rate'] = test_df['Total_Poverty']/test_df['Total_Population']
test_df

Unnamed: 0,Id,Geography,Total_Population,Total_Poverty,Poverty_rate
0,8600000US00601,ZCTA5 00601,17599,11282,0.641059
1,8600000US00602,ZCTA5 00602,39209,20428,0.521003
2,8600000US00603,ZCTA5 00603,50135,25176,0.502164
3,8600000US00606,ZCTA5 00606,6304,4092,0.649112
4,8600000US00610,ZCTA5 00610,27590,12553,0.454984
...,...,...,...,...,...
33115,8600000US99923,ZCTA5 99923,0,0,
33116,8600000US99925,ZCTA5 99925,901,210,0.233074
33117,8600000US99926,ZCTA5 99926,1684,224,0.133017
33118,8600000US99927,ZCTA5 99927,59,49,0.830508


In [24]:
# Filter for the top 10 most inflicted zip codes by sorting for the most populous but with the highest poverty rate.
test_df = test_df.sort_values(by = ['Total_Population','Poverty_rate'],ascending=False).head(10)

In [27]:
#upload to postgresSQL database
test_df.to_sql("Top_10_Poverty", con=db, if_exists='replace',index=False)

In [28]:
test_df

Unnamed: 0,Id,Geography,Total_Population,Total_Poverty,Poverty_rate
26837,8600000US77449,ZCTA5 77449,119204,11234,0.094242
2877,8600000US11368,ZCTA5 11368,114647,23391,0.204026
20721,8600000US60629,ZCTA5 60629,114129,23084,0.202262
27790,8600000US79936,ZCTA5 79936,111918,15937,0.142399
30019,8600000US90011,ZCTA5 90011,108051,38839,0.359451
2888,8600000US11385,ZCTA5 11385,106598,13306,0.124824
30142,8600000US90650,ZCTA5 90650,106404,14546,0.136705
26874,8600000US77494,ZCTA5 77494,105854,4095,0.038685
30225,8600000US91331,ZCTA5 91331,105696,20060,0.18979
26735,8600000US77084,ZCTA5 77084,104582,12261,0.117238
