In [24]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly
from config import user, apikey
plotly.tools.set_credentials_file(username=user, api_key=apikey)
import plotly.plotly as py
import plotly.figure_factory as ff

data = pd.read_csv("real_estate_db.csv", encoding='ISO-8859-1')
all_fips = pd.read_csv("all-geocodes-v2016.csv", encoding='ISO-8859-1')

In [25]:
rent_40_df = data[["rent_gt_40", 'STATEID', "COUNTYID", "state"]]
rent_40_df.head()

Unnamed: 0,rent_gt_40,STATEID,COUNTYID,state
0,0.15135,2,16,Alaska
1,0.20455,2,20,Alaska
2,0.54368,2,20,Alaska
3,0.27286,2,20,Alaska
4,0.24829,2,20,Alaska


In [26]:
rent_40_df.loc[:,"STATEID"] = rent_40_df["STATEID"].map("{:02d}".format)
rent_40_df.loc[:,"COUNTYID"] = rent_40_df["COUNTYID"].map("{:003d}".format)

In [27]:
rent_40_df.head()

Unnamed: 0,rent_gt_40,STATEID,COUNTYID,state
0,0.15135,2,16,Alaska
1,0.20455,2,20,Alaska
2,0.54368,2,20,Alaska
3,0.27286,2,20,Alaska
4,0.24829,2,20,Alaska


In [28]:
rent_40_df["FIPS"] = rent_40_df["STATEID"].map(str) + rent_40_df["COUNTYID"].map(str)

In [29]:
rent_40_clean = rent_40_df.dropna(how='any')
rent_40_clean.head()

Unnamed: 0,rent_gt_40,STATEID,COUNTYID,state,FIPS
0,0.15135,2,16,Alaska,2016
1,0.20455,2,20,Alaska,2020
2,0.54368,2,20,Alaska,2020
3,0.27286,2,20,Alaska,2020
4,0.24829,2,20,Alaska,2020


In [36]:
grouped_data = rent_40_clean.groupby(["FIPS"])
mean_rent_40 = grouped_data.mean().reset_index()
mean_rent_40.head()

Unnamed: 0,FIPS,rent_gt_40
0,1001,0.3489
1,1003,0.312674
2,1005,0.386574
3,1007,0.25448
4,1009,0.36105


In [31]:
#Add leading zeros
all_fips.loc[:,"State Code (FIPS)"] = all_fips["State Code (FIPS)"].map("{:02d}".format)
all_fips.loc[:,"County Code (FIPS)"] = all_fips["County Code (FIPS)"].map("{:003d}".format)

In [32]:
#Add column for full FIPS
all_fips["FIPS"] = all_fips["State Code (FIPS)"].map(str) + all_fips["County Code (FIPS)"].map(str)

#Keep only counties
county_fips = all_fips[all_fips["County Code (FIPS)"] != "000"].reset_index()

clean_fips = county_fips[["FIPS"]]
clean_fips.head()

Unnamed: 0,FIPS
0,1001
1,1003
2,1005
3,1007
4,1009


In [33]:
merge_table = pd.merge(clean_fips, mean_rent_40, how="outer")

#Fill missing counties with median
merge_table["rent_gt_40"].fillna(mean_rent_40["rent_gt_40"].median(), inplace=True)

merge_table.head()

Unnamed: 0,FIPS,rent_gt_40
0,1001,0.3489
1,1003,0.312674
2,1005,0.386574
3,1007,0.25448
4,1009,0.36105


In [71]:
northeast = merge_table.loc[merge_table["FIPS"].str[:2].isin(
    ["09", "23", "25", "33", "44", "50", "34", "36", "42"]
)].reset_index(drop=True)

midwest = merge_table.loc[merge_table["FIPS"].str[:2].isin(
    ["17", "18", "26", "39", "55", "19", "20", "27", "29", "31", "38", "46"]
)].reset_index(drop=True)

south = merge_table.loc[merge_table["FIPS"].str[:2].isin(
    ["10", "11", "12", "13", "24", "37", "45", "51", "54", "01", "21", "28", "47", "05", "22", "40", "48"]
)].reset_index(drop=True)

west = merge_table.loc[merge_table["FIPS"].str[:2].isin(
    ["08", "16", "30", "32", "35", "49", "56", "02", "06", "15", "41", "53"]
)].reset_index(drop=True)


west.head()

Unnamed: 0,FIPS,rent_gt_40
0,2013,0.309533
1,2016,0.15135
2,2020,0.232252
3,2050,0.1599
4,2060,0.11278


# Northeast

In [66]:
colorscale = ["00E53D","00E219","09DF00","2BDD00","4DDA00","6DD800","8DD500","ACD300",\
              "CBD000","CEB300","CB9200","C97100","C65000","C43100","C11200","BF000A"]    
    
endpts = list(np.linspace(0, 0.86364, len(colorscale) - 1))
fips = northeast["FIPS"]
values = northeast["rent_gt_40"]
scope = ["Connecticut", "Maine", "Massachusetts", "New Hampshire", "New Jersey", "Vermont", "New York", "Pennsylvania"]

fig = ff.create_choropleth(fips=fips, values=values, binning_endpoints=endpts, scope=scope,
                           colorscale=colorscale, county_outline={'color': 'rgb(255,255,255)', 'width': 0.5})
py.iplot(fig, filename='northeast')


Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.




Woah there! Look at all those points! Due to browser limitations, the Plotly SVG drawing functions have a hard time graphing more than 500k data points for line charts, or 40k points for other types of charts. Here are some suggestions:
(1) Use the `plotly.graph_objs.Scattergl` trace object to generate a WebGl graph.
(2) Trying using the image API to return an image instead of a graph URL
(3) Use matplotlib
(4) See if you can create your visualization with fewer data points




The draw time for this plot will be slow for all clients.



Estimated Draw Time Too Long



# Midwest

In [70]:
colorscale = ["00E53D","00E219","09DF00","2BDD00","4DDA00","6DD800","8DD500","ACD300",\
              "CBD000","CEB300","CB9200","C97100","C65000","C43100","C11200","BF000A"]    
    
endpts = list(np.linspace(0, 0.86364, len(colorscale) - 1))
fips = midwest["FIPS"]
values = midwest["rent_gt_40"]
scope = ["Illinois", "Indiana", "Michigan", "Ohio", "Wisconsin", "Kansas", "Minnesota", \
         "Missouri", "Nebraska", "North Dakota", "South Dakota"]

fig = ff.create_choropleth(fips=fips, values=values, binning_endpoints=endpts, scope=scope,
                           colorscale=colorscale, county_outline={'color': 'rgb(255,255,255)', 'width': 0.25})
py.iplot(fig, filename='midwest')


Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.




Woah there! Look at all those points! Due to browser limitations, the Plotly SVG drawing functions have a hard time graphing more than 500k data points for line charts, or 40k points for other types of charts. Here are some suggestions:
(1) Use the `plotly.graph_objs.Scattergl` trace object to generate a WebGl graph.
(2) Trying using the image API to return an image instead of a graph URL
(3) Use matplotlib
(4) See if you can create your visualization with fewer data points




The draw time for this plot will be slow for all clients.



Estimated Draw Time Too Long



# South

In [72]:
colorscale = ["00E53D","00E219","09DF00","2BDD00","4DDA00","6DD800","8DD500","ACD300",\
              "CBD000","CEB300","CB9200","C97100","C65000","C43100","C11200","BF000A"]    
    
endpts = list(np.linspace(0, 0.86364, len(colorscale) - 1))
fips = south["FIPS"]
values = south["rent_gt_40"]
scope = ["Delaware", "District of Columbia", "Florida", "Georgia", "Maryland", "North Carolina", \
         "South Carolina", "Virginia", "West Virginia", "Alabama", "Kentucky", "Mississippi", "Tennessee", \
         "Arkansas", "Louisiana", "Oklahoma", "Texas"]

fig = ff.create_choropleth(fips=fips, values=values, binning_endpoints=endpts, scope=scope,
                           colorscale=colorscale, county_outline={'color': 'rgb(255,255,255)', 'width': 0.25})
py.iplot(fig, filename='south')


Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.





# West

In [75]:
colorscale = ["00E53D","00E219","09DF00","2BDD00","4DDA00","6DD800","8DD500","ACD300",\
              "CBD000","CEB300","CB9200","C97100","C65000","C43100","C11200","BF000A"]    
    
endpts = list(np.linspace(0, 0.86364, len(colorscale) - 1))
fips = west["FIPS"]
values = west["rent_gt_40"]
scope = ["Colorado", "Idaho", "Montana", "Nevada", "New Mexico", "Utah", "Wyoming", "Alaska",\
         "California", "Hawaii", "Oregon", "Washington"]

fig = ff.create_choropleth(fips=fips, values=values, binning_endpoints=endpts, scope=scope,
                           colorscale=colorscale, county_outline={'color': 'rgb(255,255,255)', 'width': 0.25})
py.iplot(fig, filename='west')


Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.





In [None]:
Territor

In [12]:
colorscale = ["00E53D","00E219","09DF00","2BDD00","4DDA00","6DD800","8DD500","ACD300",\
              "CBD000","CEB300","CB9200","C97100","C65000","C43100","C11200","BF000A"]    
    
endpts = list(np.linspace(0, 0.86364, len(colorscale) - 1))
fips = merge_table["FIPS"]
values = merge_table["rent_gt_40"]

fig = ff.create_choropleth(
    fips=fips, values=values, scope=['usa'],
    binning_endpoints=endpts, colorscale=colorscale,
    show_state_data=False, 
    show_hover=True, centroid_marker={'opacity': 0},
    county_outline={'color': 'rgb(255,255,255)', 'width': 0.5},
    asp=2.9, title='Percentage of Population whose Rent Exceeds 40% of Income',
    legend_title='Percentage of Population')

py.iplot(fig, filename='choropleth_full_usa')


Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.





PlotlyError: Invalid 'figure_or_data' argument. Plotly will not be able to properly parse the resulting JSON. If you want to send this 'figure_or_data' to Plotly anyway (not recommended), you can set 'validate=False' as a plot option.
Here's why you're seeing this error:

'sort' is not allowed in 'layout'

Path To Error: ['layout']['sort']

Valid attributes for 'layout' at path ['layout'] under parents ['figure']:

    ['angularaxis', 'annotations', 'autosize', 'bargap', 'bargroupgap',
    'barmode', 'barnorm', 'boxgap', 'boxgroupgap', 'boxmode', 'calendar',
    'colorway', 'datarevision', 'direction', 'dragmode', 'font', 'geo',
    'grid', 'height', 'hiddenlabels', 'hiddenlabelssrc', 'hidesources',
    'hoverdistance', 'hoverlabel', 'hovermode', 'images', 'legend',
    'mapbox', 'margin', 'orientation', 'paper_bgcolor', 'plot_bgcolor',
    'polar', 'radialaxis', 'scene', 'selectdirection', 'separators',
    'shapes', 'showlegend', 'sliders', 'spikedistance', 'ternary', 'title',
    'titlefont', 'updatemenus', 'violingap', 'violingroupgap',
    'violinmode', 'width', 'xaxis', 'yaxis']

Run `<layout-object>.help('attribute')` on any of the above.
'<layout-object>' is the object at ['layout']

In [49]:
hundred = mean_rent_40.sort_values(["rent_gt_40"], ascending = False).head(100)
top_100 = hundred.reset_index(drop = True)
top_100.head()

Unnamed: 0,FIPS,rent_gt_40
0,26003,0.86364
1,51007,0.8232
2,72019,0.79675
3,51750,0.78646
4,28111,0.77647


In [50]:
northeast = []
midwest = []
south = []
west = []
territory = []

index = 0

for value in top_100["FIPS"]:
    if value[:2] in ("09", "23", "25", "33", "44", "50", "34", "36", "42"):
        northeast.append(top_100.loc[index, "rent_gt_40"])
    elif value[:2] in ("17", "18", "26", "39", "55", "19", "20", "27", "29", "31", "38", "46"):
        midwest.append(top_100.loc[index, "rent_gt_40"])
    elif value[:2] in ("10", "11", "12", "13", "24", "37", "45", "51", "54", "01", "21", "28", "47", "05", "22", "40", "48"):
        south.append(top_100.loc[index, "rent_gt_40"])
    elif value[:2] in ("08", "16", "30", "32", "35", "49", "56", "02", "06", "15", "41", "53"):
        west.append(top_100.loc[index, "rent_gt_40"])
    else:
        territory.append(top_100.loc[index, "rent_gt_40"])   

    index += 1
    
print(len(northeast))
print(len(midwest))
print(len(south))
print(len(west))
print(len(territory))



1
10
63
10
16
