In [1]:
import pandas as pd

import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect

In [4]:
#DB URL
username = "postgres"
password = "BonyPonies21!" 
db_name = "Residential_Rent_DB"
conn_url = f"postgresql://{username}:{password}@localhost/{db_name}"
    
engine = create_engine(conn_url)

#Reflect Database into ORM classes
Base = automap_base()
Base.prepare(engine, reflect=True)
Base.classes.keys()

['state_county_master', 'county_zillow_rental_prices']

In [5]:
# read in the master counties data, create a DataFrame, create new 'county_state' column
path = './Resources/Master_Counties.csv'
df = pd.read_csv(path)
# rename the 'state_name' column


df.to_csv(path, index=False)

In [6]:
keep_col = ['state_name','county']
new_df = df[keep_col].drop_duplicates(keep='first')
new_df = new_df.rename(columns={"state_name":"state"})
new_df.head()
counties_path = './Resources/uscounty.csv'
new_df.to_csv(counties_path, index=False)

In [7]:
#Populate the state_county_master

state_county_df = pd.read_csv(counties_path)
state_county_df.dropna(axis=0, inplace=True, how='any')

#Bulk insert the records into the State_County_Master table
state_county_df.to_sql('state_county_master', engine, index=False, if_exists="append")

In [8]:
# zillow steps to take:

# import the data from csv - Randy and Elsa - done by Tuesday
path = './Resources/price.csv'
zillow_info = pd.read_csv(path)

# ----------- TRANSFORM - clean the data --------------
# group rows by county - to get the mean of all records for a county - Randy and Elsa  - done by Tuesday
zillow_info = zillow_info.groupby(['County','State']).mean()

# drop city code and population rank columns from the DataFrame - Mark and Anthony
zillow_info = zillow_info.drop(['City Code','Population Rank'], axis=1)

# transpose the rent information from columns into rows
zillow_info_transpose = zillow_info.T
zillow_info_transpose.head()

# ------------ LOAD - get the data into a sql database -------------
# convert into vertical data set instead of horizontal - dates need to be rows instead of columns... or something...
# read csv 
# put data into dictionaries
# create data frames from the dictionaries
# load data frames into database using sqlalchemy, session.query
# key is foreign key - county and state, combined - references "master county table" 
## update county and state information to match master table - change state to full name not an abbreviation
## then create foreign key

County,Acadia,Accomack,Ada,Adair,Adams,Adams,Adams,Adams,Adams,Adams,...,Yell,Yellow Medicine,Yellowstone,Yolo,York,York,York,York,Yuba,Yuma
State,LA,VA,ID,IA,CO,IA,IL,MS,NE,OH,...,AR,MN,MT,CA,ME,PA,SC,VA,CA,AZ
Nov-10,1077.25,1269.75,,1039.0,1248.214286,933.0,,,827.0,782.5,...,755.0,1178.5,,1579.8,,1219.951613,1197.9,1715.0,1361.75,946.0
Dec-10,1071.5,1276.75,,999.0,1251.928571,925.0,,,842.0,785.0,...,759.5,1185.5,,1580.6,,1217.774194,1225.2,1707.5,1364.5,938.75
Jan-11,1076.25,1291.75,,974.0,1259.857143,924.0,,,855.0,788.25,...,760.0,1188.5,,1554.2,,1221.193548,1239.0,1708.5,1368.0,931.5
Feb-11,1081.5,1308.0,,961.0,1273.5,928.0,,,854.0,789.25,...,759.0,1191.5,,1523.8,,1229.354839,1215.9,1710.0,1372.25,936.5
Mar-11,1084.25,1313.0,1055.833333,961.0,1288.285714,936.0,,,863.0,790.25,...,757.5,1196.0,,1489.2,,1237.209677,1184.4,1715.5,1372.25,941.75


In [16]:
# evictionlab data steps to take:

# import the data from csv - Sidneyh and Harsh and Jason - done by Tuesday
newPath = './Resources/all-counties.csv'
evictionLab = pd.read_csv(newPath)

# once the data is imported everyone can feel free to work on the data independently

# TO DO - do we need to have the mean here or is that not really correct?
# TO DO - the 'County_State' column still has the word 'County' among other words 
# ('Borough', 'Census Area', etc) in all the rows... if we use this as a key we must remove that

evictionLab = evictionLab.rename(columns={"name": "County", "parent.location": "State"})
evictionLab['County'] = evictionLab['County'].str.replace('County', '' )

#original groupby command below:
#evictionLab_info = evictionLab.groupby(['County','State', "year"]).mean()

evictionLab = evictionLab.drop(['GEOID', 'low.flag', 'imputed', 'subbed'], axis=1)

#----------- TRANSFORM - clean the data -------------

# change column names to be more descriptive AND format - Scout and Wesley 

Eviction_df = evictionLab.rename(columns={
    "poverty.rate" : "below poverty line pct", 
    "renter.occupied.households" : "renter occupied households", 
    "pct.renter.occupied" : "renter occupied pct", 
    "median.gross.rent" : "median gross rent", 
    "median.household.income" : "median household income",
    "median.property.value" : "median property value", 
    "rent.burden":"rent burden pct", "pct.white" : "white pct", 
    "pct.af.am" : "african american pct",
    "pct.hispanic" : "hispanic pct", 
    "pct.am.ind" : "american indian pct", 
    "pct.asian" : "asian pct", 
    "pct.nh.pi" : "pacific islander pct",
    "pct.multiple" : "multiple race pct", 
    "pct.other" : "other race pct", 
    "eviction.filings": "eviction filings", 
    "evictions" : "evictions",
    "eviction.rate" : "eviction rate", 
    "eviction.filing.rate": "eviction filing rate"
})

Eviction_df

#---------- LOAD ------------
# key is foreign key - county and state, combined - references "master county table"
## update county and state information to match master table - keep full state name
## then create foreign key

# compare this median.gross.rent to rent information from zillow





Unnamed: 0,year,County,State,population,below poverty line pct,renter occupied households,renter occupied pct,median gross rent,median household income,median property value,...,hispanic pct,american indian pct,asian pct,pacific islander pct,multiple race pct,other race pct,eviction filings,evictions,eviction rate,eviction filing rate
0,2009,Aleutians East Borough,Alaska,2959.0,8.37,263,58.59,654.0,56250.0,122100.0,...,9.50,17.17,47.48,0.34,5.58,0.14,0.0,,,0.00
1,2010,Aleutians East Borough,Alaska,3141.0,6.28,268,48.46,847.0,58125.0,120900.0,...,12.26,27.67,35.43,0.60,3.72,0.03,0.0,,,0.00
2,2013,Aleutians East Borough,Alaska,3304.0,12.74,274,46.52,930.0,61518.0,126100.0,...,12.05,29.00,30.51,1.12,3.18,0.00,0.0,,,0.00
3,2015,Aleutians East Borough,Alaska,3304.0,12.74,278,46.52,930.0,61518.0,126100.0,...,12.05,29.00,30.51,1.12,3.18,0.00,0.0,,,0.00
4,2016,Aleutians West Census Area,Alaska,5684.0,5.81,849,67.75,1270.0,84306.0,217500.0,...,10.96,12.95,36.77,2.45,4.40,0.11,0.0,,,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12525,2016,Uinta,Wyoming,20930.0,10.81,2195,26.95,641.0,56569.0,176700.0,...,8.88,0.38,0.12,0.00,2.62,0.00,16.0,16.0,0.73,0.73
12526,2013,Washakie,Wyoming,8400.0,10.08,974,26.25,605.0,47652.0,160800.0,...,13.99,0.25,0.17,0.00,2.52,0.00,1.0,1.0,0.10,0.10
12527,2014,Washakie,Wyoming,8400.0,10.08,988,26.25,605.0,47652.0,160800.0,...,13.99,0.25,0.17,0.00,2.52,0.00,3.0,3.0,0.30,0.30
12528,2015,Washakie,Wyoming,8400.0,10.08,1002,26.25,605.0,47652.0,160800.0,...,13.99,0.25,0.17,0.00,2.52,0.00,3.0,3.0,0.30,0.30


In [17]:
# create two new DataFrames - county_renters_evictions and county_demographics
# these will be used to populate the sql database tables with the corresponding names
# add all the columns that are expected in each DB table, making sure the names match the DB table column names

county_renters_evictions = pd.DataFrame({
    'state': Eviction_df['State'],
    'county': Eviction_df['County'],
    'year_recorded': Eviction_df['year'],
    'number_of_renter_households': Eviction_df["renter occupied households"],
    'renter_occupied_pct': Eviction_df['renter occupied pct'],
    'median_gross_rent': Eviction_df['median gross rent'],
    'median_property_value': Eviction_df['median property value'],
    'rent_burden': Eviction_df['rent burden pct'],
    'eviction_filings': Eviction_df['eviction filings'],
    'actual_evictions': Eviction_df['evictions'], 
    'eviction_rate': Eviction_df['eviction rate'], 
    'eviction_filing_rate': Eviction_df['eviction filing rate']
})

county_demographics = pd.DataFrame({
    'state': Eviction_df['State'],
    'county': Eviction_df['County'],
    'year_recorded': Eviction_df['year'],
    'population': Eviction_df['population'],
    'poverty_rate_pct': Eviction_df['below poverty line pct'], 
    'median_household_income': Eviction_df['median household income'], 
    'white_pct': Eviction_df['white pct'], 
    'af_am_pct': Eviction_df['african american pct'], 
    'hispanic_pct': Eviction_df['hispanic pct'], 
    'am_ind_pct': Eviction_df['american indian pct'], 
    'asian_pct': Eviction_df['asian pct'], 
    'hawaiin_pac_isl_pct': Eviction_df['pacific islander pct'], 
    'multiple_race_pct': Eviction_df['multiple race pct'], 
    'other_races_pct': Eviction_df['other race pct']
})

In [None]:
#Populate the COUNTY_ZILLOW_RENTAL_PRICES table
#Populate the COUNTY_DEMOGRAPHICS table
#Populate the COUNTY_RENTERS_EVICTIONS table

In [None]:
county_renters_evictions 

In [None]:
county_demographics

In [18]:
county_renters_evictions.to_sql('county_renters_evictions', engine, index=False, if_exists="append")

IntegrityError: (psycopg2.errors.ForeignKeyViolation) insert or update on table "county_renters_evictions" violates foreign key constraint "state_county_3"
DETAIL:  Key (state, county)=(Alaska, Aleutians East Borough) is not present in table "state_county_master".

[SQL: INSERT INTO county_renters_evictions (state, county, year_recorded, number_of_renter_households, renter_occupied_pct, median_gross_rent, median_property_value, rent_burden, eviction_filings, actual_evictions, eviction_rate, eviction_filing_rate) VALUES (%(state)s, %(county)s, %(year_recorded)s, %(number_of_renter_households)s, %(renter_occupied_pct)s, %(median_gross_rent)s, %(median_property_value)s, %(rent_burden)s, %(eviction_filings)s, %(actual_evictions)s, %(eviction_rate)s, %(eviction_filing_rate)s)]
[parameters: ({'state': 'Alaska', 'county': 'Aleutians East Borough', 'year_recorded': 2009, 'number_of_renter_households': 263, 'renter_occupied_pct': 58.59, 'median_gross_rent': 654.0, 'median_property_value': 122100.0, 'rent_burden': 18.0, 'eviction_filings': 0.0, 'actual_evictions': None, 'eviction_rate': None, 'eviction_filing_rate': 0.0}, {'state': 'Alaska', 'county': 'Aleutians East Borough', 'year_recorded': 2010, 'number_of_renter_households': 268, 'renter_occupied_pct': 48.46, 'median_gross_rent': 847.0, 'median_property_value': 120900.0, 'rent_burden': 20.6, 'eviction_filings': 0.0, 'actual_evictions': None, 'eviction_rate': None, 'eviction_filing_rate': 0.0}, {'state': 'Alaska', 'county': 'Aleutians East Borough', 'year_recorded': 2013, 'number_of_renter_households': 274, 'renter_occupied_pct': 46.52, 'median_gross_rent': 930.0, 'median_property_value': 126100.0, 'rent_burden': 22.0, 'eviction_filings': 0.0, 'actual_evictions': None, 'eviction_rate': None, 'eviction_filing_rate': 0.0}, {'state': 'Alaska', 'county': 'Aleutians East Borough', 'year_recorded': 2015, 'number_of_renter_households': 278, 'renter_occupied_pct': 46.52, 'median_gross_rent': 930.0, 'median_property_value': 126100.0, 'rent_burden': 22.0, 'eviction_filings': 0.0, 'actual_evictions': None, 'eviction_rate': None, 'eviction_filing_rate': 0.0}, {'state': 'Alaska', 'county': 'Aleutians West Census Area', 'year_recorded': 2016, 'number_of_renter_households': 849, 'renter_occupied_pct': 67.75, 'median_gross_rent': 1270.0, 'median_property_value': 217500.0, 'rent_burden': 19.4, 'eviction_filings': 0.0, 'actual_evictions': None, 'eviction_rate': None, 'eviction_filing_rate': 0.0}, {'state': 'Alaska', 'county': 'Bristol Bay Borough', 'year_recorded': 2014, 'number_of_renter_households': 208, 'renter_occupied_pct': 45.89, 'median_gross_rent': 1063.0, 'median_property_value': 187500.0, 'rent_burden': 21.4, 'eviction_filings': 0.0, 'actual_evictions': None, 'eviction_rate': None, 'eviction_filing_rate': 0.0}, {'state': 'Alaska', 'county': 'Denali Borough', 'year_recorded': 2000, 'number_of_renter_households': 274, 'renter_occupied_pct': 34.9, 'median_gross_rent': 568.0, 'median_property_value': 103400.0, 'rent_burden': 15.8, 'eviction_filings': None, 'actual_evictions': None, 'eviction_rate': None, 'eviction_filing_rate': None}, {'state': 'Alaska', 'county': 'Denali Borough', 'year_recorded': 2001, 'number_of_renter_households': 267, 'renter_occupied_pct': 34.9, 'median_gross_rent': 568.0, 'median_property_value': 103400.0, 'rent_burden': 15.8, 'eviction_filings': None, 'actual_evictions': None, 'eviction_rate': None, 'eviction_filing_rate': None}  ... displaying 10 of 12530 total bound parameter sets ...  {'state': 'Wyoming', 'county': 'Washakie ', 'year_recorded': 2015, 'number_of_renter_households': 1002, 'renter_occupied_pct': 26.25, 'median_gross_rent': 605.0, 'median_property_value': 160800.0, 'rent_burden': 23.8, 'eviction_filings': 3.0, 'actual_evictions': 3.0, 'eviction_rate': 0.3, 'eviction_filing_rate': 0.3}, {'state': 'Wyoming', 'county': 'Washakie ', 'year_recorded': 2016, 'number_of_renter_households': 1016, 'renter_occupied_pct': 26.25, 'median_gross_rent': 605.0, 'median_property_value': 160800.0, 'rent_burden': 23.8, 'eviction_filings': 6.0, 'actual_evictions': 5.0, 'eviction_rate': 0.49, 'eviction_filing_rate': 0.59})]
(Background on this error at: https://sqlalche.me/e/14/gkpj)