In [1]:
import json
import numpy as np
import pandas as pd
import plotly
from plotly.subplots import make_subplots
import plotly.express as px
import plotly.graph_objects as go
import geopandas as gpd

In [2]:
df = pd.read_json('../data/model_data/data.json')
with open('../map/map2.geojson') as file: 
    neighborhoods = json.load(file)

geojson_data = gpd.read_file('map2.geojson')

In [84]:
lat, lon = [], []
coords = gpd.GeoSeries.from_file('../map/map2.geojson')
for nbhd in coords:
    lat.extend(nbhd.exterior.coords.xy[1])
    lat.append(None)
    lon.extend(nbhd.exterior.coords.xy[0])
    lon.append(None)

## preliminary cleaning

In [3]:
def fix_bed(x):
    try:
        x = float(x)
        if x == 0:
            return np.nan
        return x
    
    except:
        if x == 'room':
            return 1
        return np.nan

df.bed = df.bed.apply(fix_bed).astype(float)

In [4]:
# drop obviously erroneous data points
df = df[(df['distance_to_CCTC'] < 20) & df['neighborhood'].notnull()]
df = df[df['price'] > 300]

In [5]:
# calculate price per bed
ppb = []

for i, elt in df.iterrows():
    tmp = elt['price'] / elt['bed'] if (elt['price'] / elt['bed'] > 300) else elt['price']
    ppb.append(tmp)
    
df['ppb'] = ppb

In [244]:
nbhds = np.append(np.sort(df[df['neighborhood'] != 'Other Surrounding Areas'].neighborhood.unique()), 'Other Surrounding Areas')

# mean price for each neighborhood, number of listings for each neighborhood
mean_price, n = [], []
for nbhd in nbhds:
    mean_price.append(round(df[df['neighborhood'] == nbhd].price.mean(), 2))
    n.append(df[df['neighborhood'] == nbhd].address.count())

df_nbhd = pd.concat([pd.DataFrame(nbhds, columns=['neighborhood']), 
                     pd.DataFrame(mean_price, columns=['price']),
                     pd.DataFrame(n, columns=['n'])], axis=1)

df_nbhd

Unnamed: 0,neighborhood,price,n
0,East Packard,2636.36,197
1,Elbel,1721.84,345
2,Germantown,1757.26,299
3,North Burns Park,1269.37,257
4,North Ingalls,2016.21,183
5,Northside,1368.3,105
6,Old Fourth Ward,1570.22,384
7,Old West Side,1664.21,70
8,Oxbridge,3745.0,8
9,South University,1861.06,125


## map of neighborhoods, colored by average price

In [245]:
fig = px.choropleth_mapbox(df_nbhd, geojson=neighborhoods, 
                           color='price', 
                           color_continuous_scale='viridis',
                           mapbox_style="carto-positron",
                           locations='neighborhood',
                           zoom=12.7,
                           hover_data=['neighborhood', 'price', 'n'],
                           opacity=0.7,
                           center={'lat': 42.283497, 'lon': -83.735136},
                           featureidkey='properties.name')

In [246]:
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
plotly.offline.plot(fig, filename='./plotly-maps/neighborhood-map.html', auto_open=False)

'./plotly-maps/neighborhood-map.html'

## map of neighborhoods, colored by average price per bed

In [247]:
nbhds = np.append(np.sort(df[df['neighborhood'] != 'Other Surrounding Areas'].neighborhood.unique()), 'Other Surrounding Areas')

# mean price for each neighborhood, number of listings for each neighborhood
mean_price, n = [], []
for nbhd in nbhds:
    mean_price.append(round(df[(df['neighborhood'] == nbhd) & (df['ppb'] != np.inf)].ppb.mean(), 2))
    n.append(df[df['neighborhood'] == nbhd].address.count())

df_nbhd = pd.concat([pd.DataFrame(nbhds, columns=['neighborhood']), 
                     pd.DataFrame(mean_price, columns=['ppb']),
                     pd.DataFrame(n, columns=['n'])], axis=1)

df_nbhd

Unnamed: 0,neighborhood,ppb,n
0,East Packard,864.75,197
1,Elbel,852.79,345
2,Germantown,1132.48,299
3,North Burns Park,780.35,257
4,North Ingalls,891.59,183
5,Northside,868.44,105
6,Old Fourth Ward,1091.94,384
7,Old West Side,1081.04,70
8,Oxbridge,748.91,8
9,South University,993.06,125


In [253]:
fig = px.choropleth_mapbox(df_nbhd, geojson=neighborhoods, 
                           color='ppb', 
                           color_continuous_scale='viridis',
                           mapbox_style="carto-positron",
                           locations='neighborhood',
                           zoom=12.7,
                           hover_data=['neighborhood', 'ppb', 'n'],
                           opacity=0.7,
                           center={'lat': 42.283497, 'lon': -83.735136},
                           featureidkey='properties.name')

In [254]:
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
plotly.offline.plot(fig, filename='./plotly-maps/neighborhood-map-ppb.html', auto_open=False)

'./plotly-maps/neighborhood-map-ppb.html'

## map showing location of each listing by neighborhood

In [256]:
color_seq = plotly.colors.qualitative.Prism 
color_seq.extend(plotly.colors.qualitative.Set3[:3])
fig = px.scatter_mapbox(df, lat='lat', lon='lon', 
                        hover_name='address', hover_data=['ppb'],
                        color='neighborhood',
                        color_discrete_sequence=color_seq,
                        mapbox_style="carto-positron", zoom=12.7,
                        center={'lat': 42.283497, 'lon': -83.735136})

In [257]:
# as you can see, there is some noise (miscoded listings) -- what can ya do!
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
plotly.offline.plot(fig, filename='./plotly-maps/listing-by-nbhd.html', auto_open=False)

'./plotly-maps/listing-by-nbhd.html'

## map showing each listing by price per bed

In [260]:
token = 'pk.eyJ1IjoibHpoYWkiLCJhIjoiY2s4MXg1bHVvMDVrdzNmb2c0MmtsZjJ0ayJ9.IA78_4RIbNkqLerMbbidFw'

choro = go.Choroplethmapbox(geojson = neighborhoods,
  z = df_nbhd['ppb'],
  locations = list(df_nbhd['neighborhood']),
  featureidkey = 'properties.name',
  colorscale='viridis',
  zmax=2000,
  zmin=600,
  marker_opacity=0.3
)

scatt = go.Scattermapbox(lat = df['lat'],
  lon = df['lon'],
  mode = 'markers+text',
  marker = {
    'color': df['ppb'],
    'colorscale': 'viridis',
    'cmax':2000,
    'cmin':600
  },
  hoverinfo = 'text',
  hovertext = df['address'],
  customdata = df['ppb'],
  hovertemplate=
        "<b>%{hovertext}</b><br>" +
        "Price per Bed: $%{customdata:.2f}<br>"
        "<extra></extra>"
)

layout = go.Layout(title_text = 'housing map', title_x = 0.5,
  width = 1200, height = 700,
  mapbox = dict(center = {
      'lat': 42.283497,
      'lon': -83.735136
    },
    accesstoken = token,
    zoom = 12.3,
    style = "light"
  )
)

fig = go.Figure(data = [choro, scatt], layout = layout)
fig.show()
plotly.offline.plot(fig, filename='./plotly-maps/neighborhood-ppb-scatter.html', auto_open=False)

'./plotly-maps/neighborhood-ppb-scatter.html'

## map of listings by property type

In [153]:
# top 5 most common property types
types = df.property_type.value_counts()[:5].index.to_list()
df_prop = df[df['property_type'].isin(types)]

In [200]:
types 

['apartment', 'house', 'duplex', 'room', 'condo']

In [331]:
token = 'pk.eyJ1IjoibHpoYWkiLCJhIjoiY2s4MXg1bHVvMDVrdzNmb2c0MmtsZjJ0ayJ9.IA78_4RIbNkqLerMbbidFw'

boundaries = go.Scattermapbox(
    fill = "toself", mode='lines',
    lon = lon, lat = lat,
    fillcolor='rgba(137, 196, 244, 0.1)',
    name = 'neighborhood boundaries',
    line = {'color':'rgba(0, 103, 205, 0.2)', 'width':1.5}
)

In [332]:
markers = []
for i, t in enumerate(types):
    df_p = df_prop[df_prop['property_type'] == t]
    
    scatt = go.Scattermapbox(lat = df_p['lat'],
      lon = df_p['lon'],
      mode = 'markers+text',
      marker = {
        'color': px.colors.qualitative.Vivid[i],
        'opacity': 1
      },
      hoverinfo = 'text',
      hovertext = df_p['address'],
      name = t
    )
    
    markers.append(scatt)

In [333]:
traces = [boundaries]
traces.extend(markers)

In [334]:
layout = go.Layout(title_text = 'housing map', title_x = 0.5,
  width = 1200, height = 700,
  mapbox = dict(center = {
      'lat': 42.283497,
      'lon': -83.735136
    },
    accesstoken = token,
    zoom = 12.3,
    style = "light"
  )
)

fig = go.Figure(data = traces, layout = layout)

fig.show()
plotly.offline.plot(fig, filename='./plotly-maps/neighborhood-prop_type-scatter.html', auto_open=False)

'./plotly-maps/neighborhood-prop_type-scatter.html'

## additional ideas
- neighborhoods by number of listings (density)
- neighborhoods by variability of price
- ???