In [61]:
import pandas as pd
from sqlalchemy import create_engine
#Pull in CSV paths
csvHL = "Resources/2007-2016-Homelessnewss-USA.csv"
csvRP = "Resources/price.csv"
csvpop = "Resources/datasets_1697_3211_Population-by-state (1).csv"
csvPPSQFT = "Resources/pricepersqft.csv"

In [66]:
#Prep CSV Paths for DataFrames
homeless_data = pd.read_csv(csvHL)
rentprice_data = pd.read_csv(csvRP)
population_data = pd.read_csv(csvpop)
pricepersqft_data = pd.read_csv(csvPPSQFT)

In [101]:
#Set DataFrames
homeless_df = pd.DataFrame(homeless_data)
rentprice_df = pd.DataFrame(rentprice_data)
population_df = pd.DataFrame(population_data)
pricepersqft_df = pd.DataFrame(pricepersqft_data)


In [170]:
#Transform Homeless CSV data to match Zillow Data Form

homelessfiltered_df = homeless_df.loc[homeless_df['Measures'] == "Total Homeless"]
#homelessfiltered_df[]
homeless_pdf = homelessfiltered_df.pivot_table(values='Count',
                                               index=homelessfiltered_df.State,
                                               columns='Year',
                                               aggfunc='first'
                                              )

#Reset Index and Rename columns
homeless_pdf.reset_index(inplace=True)
homeless_pdf.columns = ['State',
                        'HR2007',
                        'HR2008',
                        'HR2009',
                        'HR2010',
                        'HR2011',
                        'HR2012',
                        'HR2013',
                        'HR2014',
                        'HR2015',
                        'HR2016'
                       ]
homeless_pdf[["HR2007", "HR2008", "HR2009", "HR2010", "HR2011", "HR2012", "HR2013", "HR2014", "HR2015", "HR2016"]] = homeless_pdf[["HR2007", "HR2008", "HR2009", "HR2010", "HR2011", "HR2012", "HR2013", "HR2014", "HR2015", "HR2016"]].replace(',','', regex=True).astype(float)

#filter Homeless DataFrame to prep for Join with Rent Data, cleaned down to show variance and % CHG from 2011 to 2016
homeless_final = homeless_pdf.filter(['State', 'HR2011', 'HR2016'])
homeless_final["Var"] = homeless_filter["HR2016"] - homeless_filter["HR2011"]
homeless_final["% CHG"] = homeless_filter["Var"] / homeless_filter["HR2011"] * 100
homeless_final.head()


Unnamed: 0,State,HR2011,HR2016,Var,% CHG
0,AK,1223.0,1105.0,-118.0,-9.648406
1,AL,1950.0,1228.0,-722.0,-37.025641
2,AR,1276.0,808.0,-468.0,-36.677116
3,AZ,2047.0,2240.0,193.0,9.428432
4,CA,7067.0,6524.0,-543.0,-7.6836


In [171]:

# Perform Groupby on Rent Data by State with Average rent price
#rentprice_df.filter(['State', 'January 2011', 'January 2016'])
rentprice_mean = rentprice_df.groupby(['State']).mean()
rentprice_mean_years = rentprice_mean.filter(['State',
                                              'January 2011',
                                              'January 2012',
                                              'January 2013',
                                              'January 2014',
                                              'January 2015',
                                              'January 2016',
                                             ])

#Re-name columns to match format from homeless DF
rentprice_mean_years.columns = ['RP2011',
                                'RP2012',
                                'RP2013',
                                'RP2014',
                                'RP2015',
                                'RP2016'
                               ]
#rentprice_mean_years

rentprice_final = rentprice_mean_years.filter(['RP2011', 'RP2016'])
rentprice_final["RP Var"] = rentprice_final["RP2016"] - rentprice_final["RP2011"]
rentprice_final["RP % CHG"] = rentprice_final["RP Var"] / rentprice_final["RP2011"] * 100
rentprice_final.reset_index(inplace=True)
rentprice_final.head()

Unnamed: 0,State,RP2011,RP2016,RP Var,RP % CHG
0,AK,,1644.0,,
1,AL,959.351648,1023.484305,64.132657,6.685
2,AR,828.77305,943.687075,114.914025,13.86556
3,AZ,1197.033613,1233.605042,36.571429,3.055171
4,CA,2019.433604,2322.005376,302.571772,14.983002


In [199]:
combined_df = homeless_final.set_index('State').join(rentprice_final.set_index('State'))
combined_clean = combined_df.dropna()
combined_clean.reset_index(inplace=True)
combined_final = combined_clean.style.format({
                             "RP2011":"${:20,.0f}",
                             "RP2016":"${:20,.0f}",
                             "RP Var":"${:20,.0f}",
                             "RP % CHG":"{:20,.1f}%",
                             "HR2011":"{:20,.0f}",
                             "HR2016":"{:20,.0f}",
                             "Var":"{:20,.0f}",
                             "% CHG":"{:20,.1f}%"
                            })

combined_final

Unnamed: 0,State,HR2011,HR2016,Var,% CHG,RP2011,RP2016,RP Var,RP % CHG
0,AL,1950,1228,-722,-37.0%,$ 959,"$ 1,023",$ 64,6.7%
1,AR,1276,808,-468,-36.7%,$ 829,$ 944,$ 115,13.9%
2,AZ,2047,2240,193,9.4%,"$ 1,197","$ 1,234",$ 37,3.1%
3,CA,7067,6524,-543,-7.7%,"$ 2,019","$ 2,322",$ 303,15.0%
4,CO,9283,3520,-5763,-62.1%,"$ 1,398","$ 1,712",$ 315,22.5%
5,CT,1005,886,-119,-11.8%,"$ 1,733","$ 1,834",$ 101,5.9%
6,DC,6546,8350,1804,27.6%,"$ 2,241","$ 2,535",$ 294,13.1%
7,DE,1035,1070,35,3.4%,"$ 1,329","$ 1,352",$ 23,1.7%
8,FL,1152,1468,316,27.4%,"$ 1,400","$ 1,642",$ 242,17.3%
9,GA,6805,4063,-2742,-40.3%,"$ 1,116","$ 1,156",$ 39,3.5%


In [203]:
html = combined_clean.to_html()
print(html)


<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>State</th>
      <th>HR2011</th>
      <th>HR2016</th>
      <th>Var</th>
      <th>% CHG</th>
      <th>RP2011</th>
      <th>RP2016</th>
      <th>RP Var</th>
      <th>RP % CHG</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>AL</td>
      <td>1950.0</td>
      <td>1228.0</td>
      <td>-722.0</td>
      <td>-37.025641</td>
      <td>959.351648</td>
      <td>1023.484305</td>
      <td>64.132657</td>
      <td>6.685000</td>
    </tr>
    <tr>
      <th>1</th>
      <td>AR</td>
      <td>1276.0</td>
      <td>808.0</td>
      <td>-468.0</td>
      <td>-36.677116</td>
      <td>828.773050</td>
      <td>943.687075</td>
      <td>114.914025</td>
      <td>13.865560</td>
    </tr>
    <tr>
      <th>2</th>
      <td>AZ</td>
      <td>2047.0</td>
      <td>2240.0</td>
      <td>193.0</td>
      <td>9.428432</td>
      <td>1197.033613</td>
      <td>1233.6050

In [109]:
#df for Sam 
# homeless count of total homeless by state by years 2011 to 2016
#homeless_pdf
# Rent prices by state by years 2011 to 2016
#rentprice_mean_years - 