# Assignment03. Exploring Urban Mobility with Spatial Data

## Load the Packages

In [1]:
!pip install fiona
!pip install geopandas
!pip install folium
!pip install sodapy -q
!pip install adjustText
!pip install plotly --upgrade
!pip install contextily
!pip install mapclassify
!pip install chart-studio

Collecting fiona
  Downloading Fiona-1.8.20-cp37-cp37m-manylinux1_x86_64.whl (15.4 MB)
[K     |████████████████████████████████| 15.4 MB 5.3 MB/s 
[?25hCollecting munch
  Downloading munch-2.5.0-py2.py3-none-any.whl (10 kB)
Collecting cligj>=0.5
  Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Collecting click-plugins>=1.0
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Installing collected packages: munch, cligj, click-plugins, fiona
Successfully installed click-plugins-1.1.1 cligj-0.7.2 fiona-1.8.20 munch-2.5.0
Collecting geopandas
  Downloading geopandas-0.10.2-py2.py3-none-any.whl (1.0 MB)
[K     |████████████████████████████████| 1.0 MB 5.3 MB/s 
[?25hCollecting pyproj>=2.2.0
  Downloading pyproj-3.2.1-cp37-cp37m-manylinux2010_x86_64.whl (6.3 MB)
[K     |████████████████████████████████| 6.3 MB 26.0 MB/s 
Installing collected packages: pyproj, geopandas
Successfully installed geopandas-0.10.2 pyproj-3.2.1
Collecting adjustText
  Downloading adjustText-0.7.3

In [2]:
# To ignore unimporant system warnings
import warnings
warnings.filterwarnings("ignore")

# We will use Pandas, Numpy, and Matplotlib which is a package for visualization with Python
import pandas as pd
import numpy as np

# Load a required package 
# This is a library for accessing and parsing data through URLs
from urllib.parse import urlencode
import urllib.request, json 
from bs4 import BeautifulSoup # for web scraping
import fiona
import plotly.express as px
import geopandas as gpd
import adjustText as aT
import seaborn as sns # visualization styling package
import plotly.graph_objects as go
import urllib, json
import chart_studio.plotly as py
import chart_studio.tools as tls
import os
from sodapy import Socrata
from IPython.display import Image, display
from shapely.geometry import Polygon
import time
from shapely.geometry import shape
import contextily as ctx


# A magic functin that renders the figure in a notebook 
%matplotlib inline 

from matplotlib import pyplot as plt

#Chart Studio API
import chart_studio
username='bkirthi'
api_key='PHHYaOZKZDA7BF3nUUtO'
chart_studio.tools.set_credentials_file(username=username,
                                        api_key=api_key)



In [3]:
import folium
from folium.plugins import HeatMap

## Get Community Area Boundaries and Project the Data

Boundaries—Community Areas (current) | City of Chicago | Data Portal. (n.d.). Chicago. Retrieved November 22, 2021, from https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Community-Areas-current-/cauq-8yn6


In [4]:
!git clone https://github.com/kirthi-b/Chicago-Rideshare

Cloning into 'Chicago-Rideshare'...
remote: Enumerating objects: 27, done.[K
remote: Counting objects: 100% (27/27), done.
remote: Compressing objects: 100% (23/23), done.[K
remote: Total 27 (delta 3), reused 26 (delta 2), pack-reused 0[K
Unpacking objects: 100% (27/27), done.


In [5]:
gdf = gpd.read_file("/content/Chicago-Rideshare/Data/CA_Boundaries/", driver = "shapefile")
gdf['area_num_1'] = gdf['area_num_1'].astype(int)
gdf.head()

Unnamed: 0,area,area_num_1,area_numbe,comarea,comarea_id,community,perimeter,shape_area,shape_len,geometry
0,0.0,35,35,0.0,0.0,DOUGLAS,0.0,46004620.0,31027.05451,"POLYGON ((-87.60914 41.84469, -87.60915 41.844..."
1,0.0,36,36,0.0,0.0,OAKLAND,0.0,16913960.0,19565.506153,"POLYGON ((-87.59215 41.81693, -87.59231 41.816..."
2,0.0,37,37,0.0,0.0,FULLER PARK,0.0,19916700.0,25339.08975,"POLYGON ((-87.62880 41.80189, -87.62879 41.801..."
3,0.0,38,38,0.0,0.0,GRAND BOULEVARD,0.0,48492500.0,28196.837157,"POLYGON ((-87.60671 41.81681, -87.60670 41.816..."
4,0.0,39,39,0.0,0.0,KENWOOD,0.0,29071740.0,23325.167906,"POLYGON ((-87.59215 41.81693, -87.59215 41.816..."


In [6]:
print (gdf.crs)
print (gdf.crs.name)
print (gdf.crs.datum)

epsg:4326
WGS 84
World Geodetic System 1984 ensemble


## Get Rideshare Data

Transportation Network Providers—Trips | City of Chicago | Data Portal. (n.d.). Chicago. Retrieved November 22, 2021, from https://data.cityofchicago.org/Transportation/Transportation-Network-Providers-Trips/m6dm-c72p


In [7]:
API = 'https://data.cityofchicago.org/resource/m6dm-c72p.csv?'

In [None]:
query = {'$select': 'trip_start_timestamp, trip_seconds, trip_miles, pickup_community_area, pickup_centroid_latitude, pickup_centroid_longitude',
         '$where': 'trip_start_timestamp >= "2020-10-01" and  trip_start_timestamp < "2021-11-01" and pickup_community_area IS NOT NULL ',
         '$order': 'trip_start_timestamp',
         '$limit': 42000000} #change to 42000000 later

rs_df = pd.read_csv(API + urlencode(query))
rs_df.head()

In [None]:
rs_df['Date'] = pd.to_datetime(rs_df['trip_start_timestamp']).dt.date
rs_df['Time'] = pd.to_datetime(rs_df['trip_start_timestamp']).dt.time

In [None]:
print(rs_df.shape[0])
rs_df.dropna()
print(rs_df.shape[0])

In [None]:
#pickups per community area
pickup_ca = rs_df['pickup_community_area'].value_counts().to_frame('nc_freq')
pickup_ca = pd.DataFrame(pickup_ca).reset_index()
pickup_ca.columns = ['area_num_1', 'Pickups']
pickup_ca['area_num_1'] = pd.to_numeric(pickup_ca['area_num_1'])
pickup_ca.sort_values('area_num_1',inplace=True, ascending=True)
pickup_ca.head()

## Get Median Income Data

Community Data Snapshots Raw Data, July 2021 Release with 2020 Supplement—CMAP Data Hub. (n.d.). Retrieved November 22, 2021, from https://datahub.cmap.illinois.gov/dataset/community-data-snapshots-raw-data


In [None]:
inc_df = pd.read_csv("/content/Chicago-Rideshare/Data/Median_Income.csv")
inc_df = inc_df.rename(columns={"CA": "area_num_1"})
inc_df.head()

## Add Median Income data to GeoDataFrame

In [None]:
gdf = gdf.merge(inc_df, on ='area_num_1')
gdf.head()

## Add Pickup data to GeoDataFrame

In [None]:
gdf = gdf.merge(pickup_ca, on ='area_num_1')
gdf['WHITE'] = 100 - gdf['NONWHITE']
gdf.head()

## Centroid

In [None]:
gdf["center"] = gdf["geometry"].centroid
gdf_points = gdf.copy()
gdf_points.set_geometry("center", inplace = True)
gdf_points['latitude'] = gdf_points.geometry.x
gdf_points['longitude'] = gdf_points.geometry.y

In [None]:
ax = gdf.plot("INCOME", legend=True, figsize =  (20, 20),cmap='GnBu')
ax.set_facecolor('white')
ax.set_axis_off()
gdf_points.plot(ax=ax, color='yellow', markersize=0.001*gdf['Pickups'], alpha=.5)
texts = []

for x, y, label in zip(gdf_points.geometry.x, gdf_points.geometry.y, gdf_points["community"]):
    texts.append(plt.text(x, y, label, fontsize = 8, color='black'))

aT.adjust_text(texts, force_points=0.3, force_text=0.8, expand_points=(1,1), expand_text=(1,1), 
               arrowprops=dict(arrowstyle="-", color='gray', lw=0.5))


In [None]:
ax = gdf.plot("WHITE", legend=True, figsize =  (20, 20),cmap='GnBu')
ax.set_facecolor('white')
ax.set_axis_off()
gdf_points.plot(ax=ax, color='yellow', markersize=0.001*gdf['Pickups'], alpha=.5)
texts = []

for x, y, label in zip(gdf_points.geometry.x, gdf_points.geometry.y, gdf_points["community"]):
    texts.append(plt.text(x, y, label, fontsize = 8, color='black'))

aT.adjust_text(texts, force_points=0.3, force_text=0.8, expand_points=(1,1), expand_text=(1,1), 
               arrowprops=dict(arrowstyle="-", color='gray', lw=0.5))

#l1 = plt.scatter([],[], s=10*10, color='yellow')
#l2 = plt.scatter([],[], s=50*10, color='yellow')
#l3 = plt.scatter([],[], s=100*10, color='yellow')
#l4 = plt.scatter([],[], s=200*10, color='yellow')

#labels = ["10", "50", "100", "200"]

#leg = plt.legend([l1, l2, l3, l4], labels, ncol=4, frameon=True, fontsize=12,
#handlelength=2, loc = 8, borderpad = 1.8,
#handletextpad=3, title='Legend', scatterpoints = 1)

## Folium Map

In [None]:
base_map = folium.Map(location=[41.8781, -87.6298], zoom_start = 11,
                     tiles='cartodbdark_matter')
rs_df['count'] = 1

hm = HeatMap(rs_df[['pickup_centroid_latitude','pickup_centroid_longitude','count']].groupby(['pickup_centroid_latitude','pickup_centroid_longitude']).sum().reset_index().values.tolist(),
       radius=5, max_zoom=11)
hm.add_to(base_map)

base_map

In [None]:
base_map.save('/content/Chicago-Rideshare/folium_heatmap_pickups.html')

In [None]:
base_map = folium.Map(location=[41.8781, -87.6298], zoom_start = 11,
                     tiles='cartodbdark_matter')

# looping through our data frame adding each bubble one line at a time

for i in range(0, len(gdf_points)):

    folium.Circle(
        location=[gdf_points.iloc[i]['longitude'], gdf_points.iloc[i]['latitude']],
        radius = int(gdf_points.iloc[i]['Pickups']*0.0005),
        color='palegreen',
        popup= 'No. of Pickups: ' + str(gdf_points.iloc[i]['Pickups']),
        fill=True,
        weight=1,
        opacity=1,
        fillopacity=0.9,
        fill_color='palegreen'
        ).add_to(base_map)



# looping through our data frame adding each bubble one line at a time

for i in range(0, len(gdf_points)):

    folium.Circle(
        location=[gdf_points.iloc[i]['longitude'], gdf_points.iloc[i]['latitude']],
        radius = int(gdf_points.iloc[i]['INCOME']*0.01),
        color='turquoise',
        popup= 'Median Income: $' + str(gdf_points.iloc[i]['INCOME']),
        fill=True,
        weight=1,
        opacity=1,
        fillopacity=0.9,
        fill_color='turquoise'
        ).add_to(base_map)


# looping through our data frame adding each bubble one line at a time

for i in range(0, len(gdf_points)):

    folium.Circle(
        location=[gdf_points.iloc[i]['longitude'], gdf_points.iloc[i]['latitude']],
        radius = int(gdf_points.iloc[i]['WHITE']*10),
        color='dodgerblue',
        popup= 'Percentage of Whites: ' + str(gdf_points.iloc[i]['WHITE']) + '%',
        fill=True,
        weight=1,
        opacity=1,
        fillopacity=0.9,
        fill_color='dodgerblue'
        ).add_to(base_map)

base_map

In [None]:
base_map.save('/content/Chicago-Rideshare/folium_bubble_all.html')

In [None]:
base_map = folium.Map(location=[41.8781, -87.6298], zoom_start = 11,
                     tiles='cartodbdark_matter')

# looping through our data frame adding each bubble one line at a time

for i in range(0, len(gdf_points)):

    folium.Circle(
        location=[gdf_points.iloc[i]['longitude'], gdf_points.iloc[i]['latitude']],
        radius = int(gdf_points.iloc[i]['Pickups']*0.0005),
        color='palegreen',
        popup= 'No. of Pickups: ' + str(gdf_points.iloc[i]['Pickups']),
        fill=True,
        weight=1,
        opacity=1,
        fillopacity=0.9,
        fill_color='palegreen'
        ).add_to(base_map)

base_map

In [None]:
base_map.save('/content/Chicago-Rideshare/folium_bubble_pickups.html')

In [None]:
base_map = folium.Map(location=[41.8781, -87.6298], zoom_start = 11,
                     tiles='cartodbdark_matter')

# looping through our data frame adding each bubble one line at a time

for i in range(0, len(gdf_points)):

    folium.Circle(
        location=[gdf_points.iloc[i]['longitude'], gdf_points.iloc[i]['latitude']],
        radius = int(gdf_points.iloc[i]['INCOME']*0.01),
        color='turquoise',
        popup= 'Median Income: $' + str(gdf_points.iloc[i]['INCOME']),
        fill=True,
        weight=1,
        opacity=1,
        fillopacity=0.9,
        fill_color='turquoise'
        ).add_to(base_map)

base_map

In [None]:
base_map.save('/content/Chicago-Rideshare/folium_bubble_income.html')

In [None]:
base_map = folium.Map(location=[41.8781, -87.6298], zoom_start = 11,
                     tiles='cartodbdark_matter')

# looping through our data frame adding each bubble one line at a time

for i in range(0, len(gdf_points)):

    folium.Circle(
        location=[gdf_points.iloc[i]['longitude'], gdf_points.iloc[i]['latitude']],
        radius = int(gdf_points.iloc[i]['WHITE']*10),
        color='dodgerblue',
        popup= 'Percentage of Whites: ' + str(gdf_points.iloc[i]['WHITE']) + '%',
        fill=True,
        weight=1,
        opacity=1,
        fillopacity=0.9,
        fill_color='dodgerblue'
        ).add_to(base_map)
base_map

base_map

In [None]:
base_map.save('/content/Chicago-Rideshare/folium_bubble_whites.html')

##Charts

In [None]:
rs_df.head()

In [None]:
rides = rs_df.copy()

rides['trip_start_timestamp'] =pd.to_datetime(rides['trip_start_timestamp'],format= '%Y/%m/%Y %I:%M:%S %p')
census = pd.read_csv('../input/chicago-census-socioecon-commarea-2012/Chicago_Census_SociaEcon_CommArea_2008_2012.csv')
census.columns = census.columns.str.replace(' ','_').str.lower()
census.columns

comm_dict = pd.Series(census.community_area_name,index=census.community_area_number).to_dict()
rides['pickup_community_area_name'] = rides['pickup_community_area'].map(lambda x: comm_dict[x])

rides['weekday'] = rides['trip_start_timestamp'].map(lambda x: x.weekday())

rides['time'] = 0
rides.loc[rides['trip_start_timestamp'].dt.hour <= 4,'time'] = 5
rides.loc[(rides['trip_start_timestamp'].dt.hour > 4)&((rides['trip_start_timestamp'].dt.hour <= 8)),'time'] = 0
rides.loc[(rides['trip_start_timestamp'].dt.hour > 8)&((rides['trip_start_timestamp'].dt.hour <= 12)),'time'] = 1
rides.loc[(rides['trip_start_timestamp'].dt.hour > 12)&((rides['trip_start_timestamp'].dt.hour <= 16)),'time'] = 2
rides.loc[(rides['trip_start_timestamp'].dt.hour > 16)&((rides['trip_start_timestamp'].dt.hour <= 20)),'time'] = 3
rides.loc[(rides['trip_start_timestamp'].dt.hour > 20),'time'] = 4


pickup_locations = rides.groupby('pickup_community_area')[['pickup_community_area_name','pickup_centroid_latitude','pickup_centroid_longitude']].first().reset_index()
pickup_locations['trips'] = rides.groupby('pickup_community_area')['trip_id'].count().values

fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1)
pickup_locations.sort_values('trips',ascending=False).head(20).plot(kind='bar',y='trips',x='pickup_community_area_name',legend=None,figsize=(15,15),
                                                                    color=sns.cubehelix_palette(20,start=3,rot=-.25,reverse=True),ax=ax1)
#ax1.set_xticklabels(labels= ["04:00-08:00","08:00-12:00", "12:00-16:00", "16:00-20:00", "20:00-00:00","00:00-04:00"],rotation=60)
ax1.set_xlabel('Community Area')
ax1.set_ylabel('Number of Pickups')
ax1.set_title('Most Rides by Community Area (Pickups)')
dropoff_locations.sort_values('trips',ascending=False).head(20).plot(kind='bar',x='dropoff_community_area_name',y='trips',legend=None,figsize=(15,15),ax=ax2,
                                                                     color=sns.cubehelix_palette(20,reverse=True))
#ax2.set_xticklabels(labels= ["Monday", "Tuesday", "Wednesday","Thursday", "Friday","Saturday","Sunday"],rotation=60)
ax2.set_xlabel('Community Area')
ax2.set_ylabel('Number of Trips')
ax2.set_title('Most Rides by Community Area (Dropoffs)')
fig.subplots_adjust(hspace=0.5)
plt.show()

## Multiple Linear Regression

In [None]:
from sklearn import linear_model

X = gdf_points[['INCOME', 'WHITE']]
y = gdf_points['Pickups']

regr = linear_model.LinearRegression()
regr.fit(X, y)

#predict the pickup
predictedCO2 = regr.predict([[2300, 1300]])

print(predictedCO2)