In [42]:
import numpy as np
import pandas as pd
import geopandas as gpd
import carto2gpd
import math as mt
import holoviews as hv
import hvplot.pandas

import altair as alt
import folium
from folium.plugins import MarkerCluster
from matplotlib import pyplot as plt


# Chicago data in block group

In [74]:
chicago_data = gpd.read_file('data/chicago_data.geojson').set_crs("epsg:3857", allow_override=True,)
#chicago_data.to_crs(epsg=4326, inplace=True)

bus_ridership = gpd.read_file('data/bus_ridership.geojson') #4326
bus_ridership.to_crs("epsg:3857", inplace = True)

In [77]:
# data cleanning: replace error values with average values. 
## median rent: replace -666666666.0 in median rent with average. median rent of all tract.
chicago_data['median_rent'] = chicago_data['median_rent'].replace([-666666666.0],np.nan)
mean = chicago_data['median_rent'].mean()
chicago_data.loc[:,'median_rent'] = chicago_data.loc[:,'median_rent'].fillna(mean)
chicago_data.loc[:,'median_rent'].mean()

## median income: replace -666666666.0 with mean 
chicago_data['median_income'] = chicago_data['median_income'].replace([-666666666.0],np.nan)
mean = chicago_data['median_income'].mean()
chicago_data.loc[:,'median_income'] = chicago_data.loc[:,'median_income'].fillna(mean)
chicago_data.loc[:,'median_income'].mean()
chicago_data['median_income']

## median age: 
chicago_data['median_age'] = chicago_data['median_age'].replace([-666666666.0],np.nan)
mean = chicago_data['median_age'].mean()
chicago_data.loc[:,'median_age'] = chicago_data.loc[:,'median_age'].fillna(mean)
chicago_data.loc[:,'median_age'].mean()

37.117421302285464

In [78]:
chicago_data_filter = chicago_data.select_dtypes("object")
chicago_data_filter = chicago_data_filter.loc[:, ~chicago_data_filter.columns.isin(['GEOID'])]
var_list = chicago_data_filter.columns
chicago_data_filter = chicago_data.loc[:,~chicago_data.columns.isin(var_list)].drop(columns=['state', 'county','block group','geometry'])

chicago_data_filter.dtypes


GEOID                            object
total_pop                       float64
median_rent                     float64
median_income                   float64
median_age                      float64
educational_attainment_pop25    float64
bachelor_degree                 float64
master_degree                   float64
professional_degree             float64
doctoral_degree                 float64
white                           float64
african_american                float64
tenure_owner                    float64
with_children                   float64
tract                           float64
boardings                       float64
alightings                      float64
dtype: object

In [79]:
chicago_data_filter

Unnamed: 0,GEOID,total_pop,median_rent,median_income,median_age,educational_attainment_pop25,bachelor_degree,master_degree,professional_degree,doctoral_degree,white,african_american,tenure_owner,with_children,tract,boardings,alightings
0,170313018023,1560.0,800.00000,36250.0,28.7,869.0,87.0,8.0,0.0,0.0,850.0,0.0,141.0,386.0,301802.0,165.0,54.0
1,170313018022,1178.0,796.00000,29643.0,25.4,599.0,25.0,0.0,0.0,1.0,677.0,33.0,64.0,334.0,301802.0,10.0,71.0
2,170310714002,2018.0,1628.00000,159205.0,38.8,1798.0,556.0,496.0,149.0,82.0,1666.0,208.0,568.0,101.0,71400.0,160.0,169.0
3,170310714004,739.0,1695.00000,238421.0,35.1,597.0,192.0,201.0,150.0,19.0,562.0,0.0,295.0,117.0,71400.0,255.0,153.0
4,170310702002,1187.0,1431.00000,181736.0,30.8,819.0,315.0,103.0,117.0,33.0,1116.0,49.0,242.0,214.0,70200.0,92.0,137.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2322,170311710002,731.0,1145.33871,136382.0,54.0,590.0,67.0,65.0,0.0,16.0,566.0,0.0,284.0,75.0,171000.0,29.0,99.0
2323,170311710003,1022.0,1145.33871,113295.0,46.8,718.0,167.0,126.0,0.0,14.0,589.0,311.0,293.0,161.0,171000.0,24.0,82.0
2324,170311710005,1178.0,769.00000,84766.0,49.9,885.0,142.0,58.0,15.0,0.0,862.0,39.0,347.0,137.0,171000.0,31.0,30.0
2325,170314606001,1200.0,840.00000,39837.0,36.7,819.0,103.0,22.0,5.0,1.0,317.0,876.0,210.0,181.0,460600.0,26.0,96.0


In [80]:
# Set up a function to plot using hvplot
def chicago_data_plot(x='boardings', y='alightings', color='#058805'):
    return chicago_data_filter.hvplot.scatter(x, y, c=color, padding=0.1)

columns = list(chicago_data_filter.columns[1:])
columns

['total_pop',
 'median_rent',
 'median_income',
 'median_age',
 'educational_attainment_pop25',
 'bachelor_degree',
 'master_degree',
 'professional_degree',
 'doctoral_degree',
 'white',
 'african_american',
 'tenure_owner',
 'with_children',
 'tract',
 'boardings',
 'alightings']

In [81]:
# Load panel and enable interactive features
import panel as pn
pn.extension()

In [82]:
# Create a widget to select the color of the scatter points
color = pn.widgets.ColorPicker(name='Color', value='#4f4fdf')

# Auto-generate the layout
layout = pn.interact(chicago_data_plot, x=columns, y=columns, color=color)

# Create the dashboard with a Row and Column
interact_dashboard = pn.Row(pn.Column('## Chicago Socioeconomic Explorer', layout[0]), layout[1])
interact_dashboard

In [85]:
x = pn.widgets.Select(name='x', options=columns)

@pn.depends(x)
def plot(x):
    return chicago_data.hvplot.hist(x,
        subplots=True, padding=0.1,
        responsive=True, min_height=500, size=100
    )

settings = pn.Row(pn.WidgetBox(x))
pn.Column(
    '### Chicago Socioeconomic Explorer', 
    settings,
    plot,
    width_policy='max'
)
pn.Row(x,plot).servable()

In [86]:
## K-meaen clustering
from sklearn.cluster import KMeans
x = pn.widgets.Select(name='x', options=columns)
y = pn.widgets.Select(name='y', options=columns)
n_clusters = pn.widgets.IntSlider(name='n_clusters', start=1, end=8, value=3)

@pn.depends(x.param.value, y.param.value, n_clusters.param.value)
def get_clusters(x, y, n_clusters):
    kmeans = KMeans(n_clusters=n_clusters)
    est = kmeans.fit(chicago_data_filter.iloc[:, :-1].values)
    chicago_data_filter['labels'] = est.labels_.astype('str')
    centers = chicago_data_filter.groupby('labels').mean()
    return (chicago_data_filter.sort_values('labels').hvplot.scatter(x, y, c='labels', size=100, height=500) *
            centers.hvplot.scatter(x, y, marker='x', color='black', size=400,
                                   padding=0.1, line_width=5))

pn.Column(
    '# K-Means Clustering',
    pn.Row(pn.WidgetBox(x, y, n_clusters), get_clusters)
).servable()