In [2]:
import pandas as pd
from census import Census
from config import api_key
from sqlalchemy import create_engine
c = Census(api_key, year=2017)

In [3]:
census_data = c.acs5.get(("NAME", "B19013_001E", 
                          "B01003_001E", 
                          "B01002_001E",
                          "B19301_001E",
                          "B17001_002E",
                          "B23025_005E",
                         "B25077_001E",
                         "B15003_022E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "population",
                                      "B01002_001E": "median_age",
                                      "B19013_001E": "household_income",
                                      "B19301_001E": "per_capita_income",
                                      "B17001_002E": "poverty_count",
                                      "B23025_005E": "unemployment_count",
                                      "B25077_001E": "median_home_value",
                                      "NAME": "Name", "zip code tabulation area": "zip_code"})

In [16]:
census_cleaned = census_pd[["zip_code","population",
                            "median_home_value",
                            "per_capita_income",
                            "household_income","poverty_count","unemployment_count"]]

census_cleaned = census_cleaned.dropna()
census_cleaned.drop_duplicates("zip_code", inplace=True)

census_cleaned["zip_code"]= census_cleaned["zip_code"].astype(int)
census_cleaned["population"]= census_cleaned["population"].astype(int)
census_cleaned["per_capita_income"]= census_cleaned["per_capita_income"].astype(int)
census_cleaned["household_income"]= census_cleaned["household_income"].astype(int)
census_cleaned["median_home_value"]= census_cleaned["median_home_value"].astype(int)
census_cleaned["poverty_count"]= census_cleaned["poverty_count"].astype(int)
census_cleaned["unemployment_count"]= census_cleaned["unemployment_count"].astype(int)

census_cleaned = census_cleaned[census_cleaned["zip_code"].between(32004, 34997)]
census_cleaned = census_cleaned[census_cleaned["median_home_value"].gt(0)]
census_cleaned["poverty_rate"] = round(census_cleaned["poverty_count"]/census_cleaned["population"],3)*100


census_cleaned.set_index("zip_code", inplace=True)
census_cleaned.head(10)

Unnamed: 0_level_0,population,median_home_value,per_capita_income,household_income,poverty_count,unemployment_count,poverty_rate
zip_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
32008,4808,74900,21277,42235,766,51,15.9
32009,3647,154900,25970,65469,313,40,8.6
32011,14446,157900,27153,61176,1492,376,10.3
32024,19479,120200,25607,49825,2643,618,13.6
32025,22427,109700,20110,43891,2002,583,8.9
32033,4683,136300,24873,49107,1077,348,23.0
32034,33695,294400,41397,68533,4061,983,12.1
32038,9891,99700,22686,41325,1564,433,15.8
32040,7553,131600,22052,56886,1352,303,17.9
32043,24381,168000,28857,59960,2730,944,11.2


In [6]:
connection_string = "postgres:postgres@localhost:5432/Project_Data"
engine = create_engine(f'postgresql://{connection_string}')

In [7]:
census_cleaned.to_sql(name='census_data', con=engine, if_exists='append', index=True)