In [1]:
# Importing libraries

import pandas as pd
import numpy as np
import requests
import json
import os

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly
%matplotlib inline

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# sklearn
from sklearn.model_selection import train_test_split

# stats
from scipy.stats import pearsonr, spearmanr
import statistics as s

# functions
import acquire
import prepare

In [2]:
rawdf = acquire.get_zillow_data()

In [5]:
rawdf

Unnamed: 0,location,bedrooms,bathrooms,square_feet,lot_size,fips_code,year_built,assessed_value,tax_amount
0,"-118,221043, 34,184517",3.0,3.0,2538.0,39238.0,6037.0,1966.0,878288.0,9635.37
1,"-117,616376, 33,644714",4.0,2.5,2761.0,5800.0,6059.0,1998.0,556255.0,8381.04
2,"-118,737528, 34,262618",3.0,2.5,1371.0,4321.0,6111.0,2012.0,404542.0,4705.74
3,"-118,336609, 34,181076",3.0,2.0,1676.0,7025.0,6037.0,1940.0,283892.0,3151.85
4,"-118,294446, 33,814856",4.0,3.0,2352.0,5398.0,6037.0,1989.0,293034.0,3839.25
...,...,...,...,...,...,...,...,...,...
52436,"-118,145291, 34,134922",3.0,1.0,1448.0,5063.0,6037.0,1912.0,39763.0,759.61
52437,"-118,807277, 34,168646",5.0,5.5,6393.0,44431.0,6111.0,1991.0,3250000.0,34966.66
52438,"-117,822461, 33,626142",4.0,4.5,2883.0,5909.0,6059.0,2004.0,1831829.0,23025.28
52439,"-117,871235, 33,719480",2.0,1.0,1095.0,6250.0,6059.0,1949.0,202108.0,2522.32


In [4]:
df = prepare.prep_zillow(rawdf)

AttributeError: 'DataFrame' object has no attribute 'has_pool'

In [None]:
df.head()

In [None]:
train, validate, test = prepare.split(df)

In [None]:
train.sort_values(by = 'square_feet', ascending = True, inplace = True)

In [None]:
zillow_json = {
  "type": "FeatureCollection",
  "features": [
    {
      "type": "Feature",
      "properties": {},
      "geometry": {
        "type": "Polygon",
        "coordinates": [
          [
            [
              -119.33898925781251,
              34.31621838080741
            ],
            [
              -119.31427001953125,
              34.19817309627726
            ],
            [
              -118.74847412109375,
              33.868135032968624
            ],
            [
              -117.74322509765624,
              33.4039312002347
            ],
            [
              -117.58392333984375,
              33.43144133557529
            ],
            [
              -117.60040283203125,
              33.881817226884806
            ],
            [
              -118.11950683593749,
              34.266296360583546
            ],
            [
              -118.99291992187499,
              34.45221847282654
            ],
            [
              -119.30877685546876,
              34.4861839632883
            ],
            [
              -119.33898925781251,
              34.31621838080741
            ]
          ]
        ]
      }
    }
  ]
}

In [None]:
rawdf['fips_code'] = rawdf['fips_code'].astype('object')

In [None]:
rawdf['fips'] = '0' + rawdf['fips_code'].map(str)

In [None]:
rawdf['FIPS'] = rawdf['fips'].str[:5]

In [None]:
rawdf['FIPS'] = rawdf['FIPS'].astype(int)

In [None]:
rawdf = rawdf.dropna()

In [None]:
fig = px.choropleth(rawdf, geojson=zillow_json, locations='fips', color='assessed_value',
                    color_continuous_scale='Viridis',
                    range_color=(0, 12),
                    scope='usa',
                    labels={'housing value': 'unemployment rate'}
                    )
fig.update_layout(margin={'r': 0, 't': 0, 'l': 0, 'b': 0})
fig.show()

In [None]:
import plotly.figure_factory as ff

In [None]:
rawdf['stname']='California'

In [None]:
df_sample_r = rawdf[rawdf['stname'] == 'California']

values = df_sample_r['assessed_value'].tolist()
fips = df_sample_r['FIPS'].tolist()

endpts = list(np.mgrid[min(values):max(values):4j])
colorscale = ["#030512","#1d1d3b","#323268","#3d4b94","#3e6ab0",
              "#4989bc","#60a7c7","#85c5d3","#b7e0e4","#eafcfd"]
fig = ff.create_choropleth(
    fips=fips, values=values, scope=['California'], show_state_data=True,
    colorscale=colorscale, binning_endpoints=endpts, round_legend_values=True,
    plot_bgcolor='rgb(229,229,229)',
    paper_bgcolor='rgb(229,229,229)',
    legend_title='Population by County',
    county_outline={'color': 'rgb(255,255,255)', 'width': 0.5},
    exponent_format=True,
)
fig.layout.template = None
fig.show()

In [None]:
import plotly.figure_factory as ff

fips = ['06021', '06023', '06027',
        '06029', '06033', '06059',
        '06047', '06049', '06051',
        '06055', '06061']
values = range(len(fips))

fig = ff.create_choropleth(fips=fips, values=values)
fig.layout.template = None
fig.show()

In [None]:
df_sample_r = rawdf[rawdf['stname'] == 'California']
values = df_sample_r['assessed_value'].tolist()
fips = df_sample_r['FIPS'].tolist()

colorscale = ["#030512","#1d1d3b","#323268","#3d4b94","#3e6ab0",
              "#4989bc","#60a7c7","#85c5d3","#b7e0e4","#eafcfd"]
endpts = list(np.mgrid[min(values):max(values):4j])
fig = ff.create_choropleth(fips=fips, values=values, scope=['California'], show_state_data=True,
    colorscale=colorscale, binning_endpoints=endpts, round_legend_values=True,
    plot_bgcolor='rgb(229,229,229)',
    paper_bgcolor='rgb(229,229,229)',
    legend_title='Population by County',
    county_outline={'color': 'rgb(255,255,255)', 'width': 0.5},
    exponent_format=True,
)
fig.update_layout(
    legend_x = 0,
    annotations = {'x': -0.12, 'xanchor': 'left'}
)

fig.layout.template = None
fig.show()