### Zillow Rental Data Analysis

In [1]:
import pandas as pd
import json
import geopandas as gpd
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
# import plotly
# import orca
import os
# plotly.io.orca.config.executable = r'C:\Users\steve\Anaconda3\pkgs\plotly-4.10.0-py_0\site-packages\plotly\io\orca'

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:88% !important; }</style>"))
# allow max rows and colums to be displayed
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
os.chdir(r'C:\Users\steve\GitHub\rp-covid-migration')

In [None]:
# Zilloe Observed Rent Index(ZORI)
df = pd.read_csv(r'C:\Users\steve\Documents\Internship\Zip_ZORI_AllHomesPlusMultifamily_SSA.csv')

In [None]:
zips = json.load(open(r'data\shapefiles\31CR_ZCTAs.geojson','r'))
geozip =  r'data\shapefiles\31CR_ZCTAs.geojson'
geozip = gpd.read_file(geozip, dtype={"ZCTA5CE10": str})
# zips2.head()
zips['features'][0]

In [None]:
geozip.plot()

## y-o-y % change and net change for january - present 

In [None]:
list_date = df.columns.tolist()
list_date = list_date[64:85] # 2019 - 2020
#list_date = list_date[61:63] # march 1st to march 2nd
df = pd.melt(df, id_vars=['RegionName'],value_vars = list_date)
# df = df.rename(columns={'NTA':'id'}) #rename so the join ids are the same
# df['variable'] = df.variable.str.replace('-','/')
gdf = pd.merge(geozip, df, left_on = 'ZipInt', right_on = 'RegionName',how = 'inner')
gdf = gdf.drop(columns = {'ALAND10','AWATER10'})

# new data frame with split year and day columns value columns 
new = gdf["variable"].str.split("-", n = 1, expand = True) 
gdf["year"]= new[0]
gdf["month"]= new[1].astype(int) # convert to integer 
gdf = gdf.drop(gdf[gdf.month > 9].index)
gdf.head(21)

In [None]:
gdf.dtypes

In [None]:
# year over year change
dff = gdf.groupby(['year','GEOID10'], as_index = False
                     ).agg(
                            {'value':'mean'})
dff.head(10)
#month over month change
dff_month =  gdf.groupby(['year','month','GEOID10'], as_index = False
                     ).agg(
                            {'value':'mean'})
dff_month.head()
# def compute_bias(df, sample_col = 'value', pop_col = 'Pop_E'):
#     pct = df[sample_col]/df[sample_col].sum()*100 # take one record's candidate devices, divide by total devices within dataset
# want to find the Difference(pct) between each `RegionName` from 2019 to 2020..
# (zip20-zip19)/zip19 x 100

In [None]:
# use a pivot table to get 2019 v 2020
# dff = dff.pivot_table(values = "value", index = 'GEOID10', columns = 'year').reset_index()
# dff.head(200)
dff_month = dff_month.pivot_table(values = 'value', index= ['GEOID10','month'], columns =['year']).reset_index()
# # Drop NA values in year cols
dff_month = dff_month.dropna(subset=['2019', '2020'])
dff_month.head(100)

In [None]:
def calcs_zillow(df, y0, y1):
    df['yoy_pct'] = ((y1-y0)/y0) *100
    df['net_ch'] = y1 - y0
    df['net_pct'] = (df['net_ch']/y1)*100
calcs_zillow(dff_month, dff_month['2019'], dff_month['2020'])

In [None]:
dff_month.head()

In [None]:
dff_month['month'] = dff_month['month'].astype(str)
dff_month.dtypes

In [None]:
dff_month.describe()

In [None]:
# ISSUE HERE: Map will not iterate by month. Want the 'z' column to take in each month.
# currently it seems plot does not change for each png created..
months_dict = {'1':'January',
               '2':'February',
               '3':'March',
               '4':'April',
               '5':'May',
               '6':'June',
               '7':'July',
               '8':'August',
               '9':'September'}
legend = 'Year-over-Year<br>% change'
def month_maps_zillow(dataframe, month):
    fignew = go.Figure(go.Choroplethmapbox(
        geojson = zips, locations = dff_month.GEOID10,
        featureidkey="properties.GEOID10",
        z = dff_month['yoy_pct'],
        colorscale = 'RdBu',
        marker_line_width = 0.15,
        marker_line_color = 'lightgray',
        zmin = -30, zmid = 0, zmax = 30,
        colorbar_title = legend))
    fignew.update_layout(mapbox_zoom = 7.5,
                      mapbox_center = {'lat':40.7, 'lon':-73.95},
                      mapbox_style="carto-darkmatter",
                      width = 1250,
                      height = 1080,
       #          legend_title_text = '% of NTA Residents',
                      legend = dict(yanchor = "top", y = 0.6, xanchor = "left", x = 0.10))
    fignew.add_annotation(text = f'Rent Change, Month of {mn}<br>2019 vs 2020',
                         align = 'left', x = 0.03, y = 0.97,
                         showarrow = False,
                         bordercolor = None,
                         bgcolor = 'black',
                         font = dict(family = 'Arial', color = 'white', size = 24))
    fignew.write_image(f'Maps/Zip_zillow_{mn}.png')
# fignew.show()

In [None]:
for m, mn in months_dict.items():
    dataframe = dff_month[dff_month['yoy_pct'].isin([m])]
    month_maps_zillow(dataframe, mn)

In [None]:
fignew = go.Figure(go.Choroplethmapbox(
        geojson = zips, locations = dff_month.GEOID10,
        featureidkey="properties.GEOID10",
        z = dff.yoy_pct,
        colorscale = 'RdBu',
        marker_line_width = 0.2,
        marker_line_color = 'lightgray',
        zmin = -25, zmid = 0, zmax = 25,
        colorbar_title = 'Year-over-Year<br>% change'))
fignew.update_layout(mapbox_zoom = 6.45,
                      mapbox_center = {'lat':40.7, 'lon':-73.95},
                      mapbox_style="carto-darkmatter",
                      width = 950,
                      height = 780,
       #          legend_title_text = '% of NTA Residents',
                      legend = dict(yanchor = "top", y = 0.6, xanchor = "left", x = 0.10))
fignew.show()

In [None]:
# ['yoy_pct'] = pct_change(avg_yoy, avg_yoy['2019'], avg_yoy['2020'])
# avg_yoy.head(10)
dff['yoy_pct'] = ((dff['2020']-dff['2019'])/dff['2019'])*100
dff['net_change'] = dff['2020']-dff['2019']
dff['net_pct'] = (dff['net_change']/dff['2020'])*100

In [None]:
dff.describe()

In [None]:
fignew = go.Figure(go.Choroplethmapbox(
        geojson = zips, locations = dff.GEOID10,
        featureidkey="properties.GEOID10",
        z = dff.yoy_pct,
        colorscale = 'RdBu',
        marker_line_width = 0.2,
        marker_line_color = 'lightgray',
        zmin = -25, zmid = 0, zmax = 25,
        colorbar_title = 'Year-over-Year<br>% change'))
fignew.update_layout(mapbox_zoom = 6.45,
                      mapbox_center = {'lat':40.7, 'lon':-73.95},
                      mapbox_style="carto-darkmatter",
                      width = 950,
                      height = 780,
       #          legend_title_text = '% of NTA Residents',
                      legend = dict(yanchor = "top", y = 0.6, xanchor = "left", x = 0.10))
fignew.show()

In [None]:
# gdf
fig = px.choropleth_mapbox(dff_month, geojson = zips,
                  locations ='GEOID10',
                  featureidkey="properties.GEOID10", # you can also rename this to ID in properties
                     color = 'yoy_pct',
                    animation_frame ='month',
                     color_continuous_scale = "RdBu",
#                      marker_line_width=0
#                    labels={'percent_away':'% of Residents', 'id':'NTA Code'},
                     range_color = [-30,30])
fig.update_layout(mapbox_zoom = 9.45,
                      mapbox_center = {'lat':40.7, 'lon':-73.95},
                      mapbox_style="carto-darkmatter",
                      width = 950,
                      height = 780,
       #          legend_title_text = '% of NTA Residents',
                      legend = dict(yanchor = "top", y = 0.6, xanchor = "left", x = 0.10))
fig.show()