In [59]:
import json
import numpy as np
import pandas as pd
import plotly
import plotly.express as px

In [60]:
df = pd.read_json('../data/model_data/data.json')
with open('../map/map2.geojson') as file: 
    neighborhoods = json.load(file)

## preliminary cleaning

In [61]:
# drop obviously erroneous data points
df = df[(df['distance_to_CCTC'] < 20) & df['neighborhood'].notnull()]

In [81]:
nbhds = np.append(np.sort(df[df['neighborhood'] != 'Other Surrounding Areas'].neighborhood.unique()), 'Other Surrounding Areas')

# mean price for each neighborhood, number of listings for each neighborhood
mean_price, n = [], []
for nbhd in nbhds:
    mean_price.append(round(df[df['neighborhood'] == nbhd].price.mean(), 2))
    n.append(df[df['neighborhood'] == nbhd].address.count())

df_nbhd = pd.concat([pd.DataFrame(nbhds, columns=['neighborhood']), 
                     pd.DataFrame(mean_price, columns=['price']),
                     pd.DataFrame(n, columns=['n'])], axis=1)


df_nbhd

Unnamed: 0,neighborhood,price,n
0,East Packard,2636.36,317
1,Elbel,1711.92,557
2,Germantown,1757.26,407
3,North Burns Park,1269.37,271
4,North Ingalls,2016.21,309
5,Northside,1368.3,132
6,Old Fourth Ward,1570.22,509
7,Old West Side,1641.83,93
8,Oxbridge,3745.0,15
9,South University,1861.06,200


##  map of neighborhoods, colored by average price

In [84]:
fig = px.choropleth_mapbox(df_nbhd, geojson=neighborhoods, 
                           color='price', 
                           color_continuous_scale=['green', 'yellow', 'orange', 'red'],
                           mapbox_style="carto-positron",
                           locations='neighborhood',
                           zoom=12.7,
                           hover_data=['neighborhood', 'price', 'n'],
                           opacity=0.7,
                           center={'lat': 42.283497, 'lon': -83.735136},
                           featureidkey='properties.name')

In [85]:
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

## map showing location of each "valid" listing

In [86]:
fig = px.scatter_mapbox(df, lat='lat', lon='lon', 
                        hover_name='address', hover_data=['price'],
                        color='neighborhood',
                        color_discrete_sequence=plotly.colors.qualitative.Alphabet,
                        mapbox_style="carto-positron", zoom=12)

In [87]:
# as you can see, there is some noise (miscoded listings) -- what can ya do!
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

## additional ideas
- neighborhoods by number of listings (density)
- neighborhoods by variability of price
- ???