# Assignment03. Exploring Urban Mobility with Spatial Data

## Load the Packages

In [36]:
!pip install fiona
!pip install geopandas
!pip install sodapy -q
!pip install adjustText
!pip install plotly --upgrade
!pip install contextily
!pip install mapclassify
!pip install chart-studio



In [37]:
# To ignore unimporant system warnings
import warnings
warnings.filterwarnings("ignore")

# We will use Pandas, Numpy, and Matplotlib which is a package for visualization with Python
import pandas as pd
import numpy as np

# Load a required package 
# This is a library for accessing and parsing data through URLs
from urllib.parse import urlencode
import urllib.request, json 
from bs4 import BeautifulSoup # for web scraping
import fiona
import plotly.express as px
import geopandas as gpd
import adjustText as aT
import seaborn as sns # visualization styling package
import plotly.graph_objects as go
import urllib, json
import chart_studio.plotly as py
import chart_studio.tools as tls
import os
from sodapy import Socrata
from IPython.display import Image, display
from shapely.geometry import Polygon
import time
from shapely.geometry import shape
import contextily as ctx

# A magic functin that renders the figure in a notebook 
%matplotlib inline 

from matplotlib import pyplot as plt

#Chart Studio API
import chart_studio
username='bkirthi'
api_key='PHHYaOZKZDA7BF3nUUtO'
chart_studio.tools.set_credentials_file(username=username,
                                        api_key=api_key)



## Get Community Area Boundaries and Project the Data

Boundaries—Community Areas (current) | City of Chicago | Data Portal. (n.d.). Chicago. Retrieved November 22, 2021, from https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Community-Areas-current-/cauq-8yn6


In [55]:
!git clone https://github.com/kirthi-b/Chicago-Rideshare

Cloning into 'Chicago-Rideshare'...
remote: Enumerating objects: 22, done.[K
remote: Counting objects: 100% (22/22), done.[K
remote: Compressing objects: 100% (18/18), done.[K
remote: Total 22 (delta 2), reused 22 (delta 2), pack-reused 0[K
Unpacking objects: 100% (22/22), done.


In [101]:
gdf = gpd.read_file("/content/Chicago-Rideshare/Data/CA_Boundaries/", driver = "shapefile")
gdf['area_num_1'] = gdf['area_num_1'].astype(int)
gdf.head()

Unnamed: 0,area,area_num_1,area_numbe,comarea,comarea_id,community,perimeter,shape_area,shape_len,geometry
0,0.0,35,35,0.0,0.0,DOUGLAS,0.0,46004620.0,31027.05451,"POLYGON ((-87.60914 41.84469, -87.60915 41.844..."
1,0.0,36,36,0.0,0.0,OAKLAND,0.0,16913960.0,19565.506153,"POLYGON ((-87.59215 41.81693, -87.59231 41.816..."
2,0.0,37,37,0.0,0.0,FULLER PARK,0.0,19916700.0,25339.08975,"POLYGON ((-87.62880 41.80189, -87.62879 41.801..."
3,0.0,38,38,0.0,0.0,GRAND BOULEVARD,0.0,48492500.0,28196.837157,"POLYGON ((-87.60671 41.81681, -87.60670 41.816..."
4,0.0,39,39,0.0,0.0,KENWOOD,0.0,29071740.0,23325.167906,"POLYGON ((-87.59215 41.81693, -87.59215 41.816..."


In [63]:
print (gdf.crs)
print (gdf.crs.name)
print (gdf.crs.datum)

epsg:4326
WGS 84
World Geodetic System 1984 ensemble


For Chicago, we should generally use NAD83 StatePlane Illinois East FIPS 1201 (EPSG 26971)

In [64]:
gdf = gdf.to_crs(26971)
print (gdf.crs)
print (gdf.crs.name)
print (gdf.crs.datum)

epsg:26971
NAD83 / Illinois East
North American Datum 1983


## Get Rideshare Data

Transportation Network Providers—Trips | City of Chicago | Data Portal. (n.d.). Chicago. Retrieved November 22, 2021, from https://data.cityofchicago.org/Transportation/Transportation-Network-Providers-Trips/m6dm-c72p


In [40]:
API = 'https://data.cityofchicago.org/resource/m6dm-c72p.csv?'

In [76]:
query = {'$select': 'trip_start_timestamp, trip_seconds, trip_miles, pickup_community_area, pickup_centroid_latitude, pickup_centroid_longitude',
         '$where': 'trip_start_timestamp >= "2020-10-01" and  trip_start_timestamp < "2021-11-01" and pickup_community_area IS NOT NULL ',
         '$order': 'trip_start_timestamp',
         '$limit': 5000} #change to 42000000 later

rs_df = pd.read_csv(API + urlencode(query))
rs_df.head()

Unnamed: 0,trip_start_timestamp,trip_seconds,trip_miles,pickup_community_area,pickup_centroid_latitude,pickup_centroid_longitude
0,2020-10-01T00:00:00.000,798,3.6284,38,41.812949,-87.61786
1,2020-10-01T00:00:00.000,1895,7.1873,7,41.922686,-87.649489
2,2020-10-01T00:00:00.000,247,1.0866,6,41.944227,-87.655998
3,2020-10-01T00:00:00.000,787,3.6098,43,41.761578,-87.572782
4,2020-10-01T00:00:00.000,768,4.96,5,41.947792,-87.683835


In [77]:
rs_df['Date'] = pd.to_datetime(df['trip_start_timestamp']).dt.date
rs_df['Time'] = pd.to_datetime(df['trip_start_timestamp']).dt.time

In [78]:
print(rs_df.shape[0])
rs_df.dropna()
print(rs_df.shape[0])

5000
5000


In [104]:
#pickups per community area
pickup_ca = rs_df['pickup_community_area'].value_counts().to_frame('nc_freq')
pickup_ca = pd.DataFrame(pickup_ca).reset_index()
pickup_ca.columns = ['area_num_1', 'Pickups']
pickup_ca['area_num_1'] = pd.to_numeric(pickup_ca['area_num_1'])
pickup_ca.sort_values('area_num_1',inplace=True, ascending=True)
pickup_ca.head()

Unnamed: 0,area_num_1,Pickups
16,1,82
30,2,56
8,3,125
31,4,55
42,5,41


## Get Median Income Data

Community Data Snapshots Raw Data, July 2021 Release with 2020 Supplement—CMAP Data Hub. (n.d.). Retrieved November 22, 2021, from https://datahub.cmap.illinois.gov/dataset/community-data-snapshots-raw-data


In [99]:
inc_df = pd.read_csv("/content/Chicago-Rideshare/Data/Median_Income.csv")
inc_df = inc_df.rename(columns={"CA": "area_num_1"})
inc_df.head()

Unnamed: 0,area_num_1,CA_NAME,INCOME,NONWHITE
0,14,Albany Park,61759,68
1,57,Archer Heights,50458,85
2,34,Armour Square,31663,86
3,70,Ashburn,70223,91
4,71,Auburn Gresham,34396,99


## Add Median Income data to GeoDataFrame

In [102]:
gdf = gdf.merge(inc_df, on ='area_num_1')
gdf.head()

Unnamed: 0,area,area_num_1,area_numbe,comarea,comarea_id,community,perimeter,shape_area,shape_len,geometry,CA_NAME,INCOME,NONWHITE
0,0.0,35,35,0.0,0.0,DOUGLAS,0.0,46004620.0,31027.05451,"POLYGON ((-87.60914 41.84469, -87.60915 41.844...",Douglas,31856,89
1,0.0,36,36,0.0,0.0,OAKLAND,0.0,16913960.0,19565.506153,"POLYGON ((-87.59215 41.81693, -87.59231 41.816...",Oakland,32844,97
2,0.0,37,37,0.0,0.0,FULLER PARK,0.0,19916700.0,25339.08975,"POLYGON ((-87.62880 41.80189, -87.62879 41.801...",Fuller Park,23148,96
3,0.0,38,38,0.0,0.0,GRAND BOULEVARD,0.0,48492500.0,28196.837157,"POLYGON ((-87.60671 41.81681, -87.60670 41.816...",Grand Boulevard,33503,96
4,0.0,39,39,0.0,0.0,KENWOOD,0.0,29071740.0,23325.167906,"POLYGON ((-87.59215 41.81693, -87.59215 41.816...",Kenwood,49114,81


## Add Pickup data to GeoDataFrame

In [105]:
gdf = gdf.merge(pickup_ca, on ='area_num_1')
gdf.head()

Unnamed: 0,area,area_num_1,area_numbe,comarea,comarea_id,community,perimeter,shape_area,shape_len,geometry,CA_NAME,INCOME,NONWHITE,Pickups
0,0.0,35,35,0.0,0.0,DOUGLAS,0.0,46004620.0,31027.05451,"POLYGON ((-87.60914 41.84469, -87.60915 41.844...",Douglas,31856,89,41
1,0.0,36,36,0.0,0.0,OAKLAND,0.0,16913960.0,19565.506153,"POLYGON ((-87.59215 41.81693, -87.59231 41.816...",Oakland,32844,97,11
2,0.0,37,37,0.0,0.0,FULLER PARK,0.0,19916700.0,25339.08975,"POLYGON ((-87.62880 41.80189, -87.62879 41.801...",Fuller Park,23148,96,15
3,0.0,38,38,0.0,0.0,GRAND BOULEVARD,0.0,48492500.0,28196.837157,"POLYGON ((-87.60671 41.81681, -87.60670 41.816...",Grand Boulevard,33503,96,50
4,0.0,39,39,0.0,0.0,KENWOOD,0.0,29071740.0,23325.167906,"POLYGON ((-87.59215 41.81693, -87.59215 41.816...",Kenwood,49114,81,33


## Add Community Area Centroid to GeoDataFrame

In [109]:
gdf['centroid']=gdf.centroid
gdf.head()

Unnamed: 0,area,area_num_1,area_numbe,comarea,comarea_id,community,perimeter,shape_area,shape_len,geometry,CA_NAME,INCOME,NONWHITE,Pickups,centroid
0,0.0,35,35,0.0,0.0,DOUGLAS,0.0,46004620.0,31027.05451,"POLYGON ((-87.60914 41.84469, -87.60915 41.844...",Douglas,31856,89,41,POINT (-87.61868 41.83512)
1,0.0,36,36,0.0,0.0,OAKLAND,0.0,16913960.0,19565.506153,"POLYGON ((-87.59215 41.81693, -87.59231 41.816...",Oakland,32844,97,11,POINT (-87.60322 41.82375)
2,0.0,37,37,0.0,0.0,FULLER PARK,0.0,19916700.0,25339.08975,"POLYGON ((-87.62880 41.80189, -87.62879 41.801...",Fuller Park,23148,96,15,POINT (-87.63242 41.80909)
3,0.0,38,38,0.0,0.0,GRAND BOULEVARD,0.0,48492500.0,28196.837157,"POLYGON ((-87.60671 41.81681, -87.60670 41.816...",Grand Boulevard,33503,96,50,POINT (-87.61786 41.81295)
4,0.0,39,39,0.0,0.0,KENWOOD,0.0,29071740.0,23325.167906,"POLYGON ((-87.59215 41.81693, -87.59215 41.816...",Kenwood,49114,81,33,POINT (-87.59618 41.80892)


In [112]:
px.set_mapbox_access_token(open("./.mapbox_token.txt").read())
fig = px.scatter_geo(gdf,
                    lat=gdf.centroid.y,
                    lon=gdf.centroid.x,
                    hover_name="community")
fig.show()

In [None]:
px.set_mapbox_access_token(open("./.mapbox_token.txt").read())