Loading Data 
1. Air Quality Data

In [None]:
!pip install fiona
!pip install geopandas
!pip install folium
!pip install sodapy -q
!pip install adjustText
!pip install plotly --upgrade
!pip install contextily
!pip install mapclassify
!pip install chart-studio



In [None]:
# To ignore unimporant system warnings
import warnings
warnings.filterwarnings("ignore")

# We will use Pandas, Numpy, and Matplotlib which is a package for visualization with Python
import pandas as pd
import numpy as np

# Load a required package 
# This is a library for accessing and parsing data through URLs
from urllib.parse import urlencode
import urllib.request, json 
from bs4 import BeautifulSoup # for web scraping
import fiona
import plotly.express as px
import geopandas as gpd
import adjustText as aT
import seaborn as sns # visualization styling package
import plotly.graph_objects as go
import urllib, json
import chart_studio.plotly as py
import chart_studio.tools as tls
import os
from sodapy import Socrata
from IPython.display import Image, display
from shapely.geometry import Polygon
import time
from shapely.geometry import shape
import contextily as ctx


# A magic functin that renders the figure in a notebook 
%matplotlib inline 

from matplotlib import pyplot as plt



In [None]:
import folium
from folium.plugins import HeatMap

Getting NYC Air Quality Data from NYC Open Data Portal : https://data.cityofnewyork.us/Environment/Air-Quality/c3uy-2p5r

In [None]:
API = 'https://data.cityofnewyork.us/resource/c3uy-2p5r.csv?'

In [None]:
query = {'$select':'*',
         '$where': 'start_date > "2014-12-31"',
         '$order': 'unique_id',
         '$limit': 14500} #change to 414500 later

aq_df = pd.read_csv(API + urlencode(query))
aq_df.head()

Unnamed: 0,unique_id,indicator_id,name,measure,measure_info,geo_type_name,geo_join_id,geo_place_name,time_period,start_date,data_value,message
0,179718,642,Boiler Emissions- Total NOx Emissions,Number per km2,number,UHF42,504,South Beach - Tottenville,2015,2015-01-01T00:00:00.000,2.0,
1,179719,642,Boiler Emissions- Total NOx Emissions,Number per km2,number,UHF42,503,Willowbrook,2015,2015-01-01T00:00:00.000,2.1,
2,179720,642,Boiler Emissions- Total NOx Emissions,Number per km2,number,UHF42,501,Port Richmond,2015,2015-01-01T00:00:00.000,2.8,
3,179721,642,Boiler Emissions- Total NOx Emissions,Number per km2,number,UHF42,502,Stapleton - St. George,2015,2015-01-01T00:00:00.000,4.6,
4,179722,642,Boiler Emissions- Total NOx Emissions,Number per km2,number,UHF42,410,Rockaways,2015,2015-01-01T00:00:00.000,6.1,


In [None]:
aq_df['Date'] = pd.to_datetime(aq_df['start_date']).dt.date
aq_df['Time'] = pd.to_datetime(aq_df['start_date']).dt.time

In [None]:
print(aq_df.shape[0])
aq_df.dropna()
print(aq_df.shape[0])

4752
4752


In [None]:
aq_df.head()

Unnamed: 0,unique_id,indicator_id,name,measure,measure_info,geo_type_name,geo_join_id,geo_place_name,time_period,start_date,data_value,message,Date,Time
0,179718,642,Boiler Emissions- Total NOx Emissions,Number per km2,number,UHF42,504,South Beach - Tottenville,2015,2015-01-01T00:00:00.000,2.0,,2015-01-01,00:00:00
1,179719,642,Boiler Emissions- Total NOx Emissions,Number per km2,number,UHF42,503,Willowbrook,2015,2015-01-01T00:00:00.000,2.1,,2015-01-01,00:00:00
2,179720,642,Boiler Emissions- Total NOx Emissions,Number per km2,number,UHF42,501,Port Richmond,2015,2015-01-01T00:00:00.000,2.8,,2015-01-01,00:00:00
3,179721,642,Boiler Emissions- Total NOx Emissions,Number per km2,number,UHF42,502,Stapleton - St. George,2015,2015-01-01T00:00:00.000,4.6,,2015-01-01,00:00:00
4,179722,642,Boiler Emissions- Total NOx Emissions,Number per km2,number,UHF42,410,Rockaways,2015,2015-01-01T00:00:00.000,6.1,,2015-01-01,00:00:00


In [None]:
aq_df.dtypes

unique_id           int64
indicator_id        int64
name               object
measure            object
measure_info       object
geo_type_name      object
geo_join_id         int64
geo_place_name     object
time_period        object
start_date         object
data_value        float64
message           float64
Date               object
Time               object
dtype: object

In [None]:
aq_df = aq_df.rename(columns={"geo_join_id":"boro_cd"})

In [None]:
!git clone https://github.com/kirthi-b/QoL_NYC_Children/

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
gdf = gpd.read_file("/content/QoL_NYC_Children/Environmental/Air_Quality/Community Districts.zip")

In [None]:
gdf.head()

Unnamed: 0,boro_cd,shape_area,shape_leng,geometry
0,206.0,42664310.0,35875.711733,"POLYGON ((-73.87185 40.84376, -73.87192 40.843..."
1,404.0,65739660.0,37018.373718,"POLYGON ((-73.84751 40.73901, -73.84801 40.738..."
2,203.0,44796870.0,33500.069106,"POLYGON ((-73.88072 40.83752, -73.88074 40.837..."
3,304.0,56662610.0,37007.806599,"POLYGON ((-73.89647 40.68234, -73.89653 40.682..."
4,205.0,38316980.0,29443.048128,"POLYGON ((-73.89138 40.86170, -73.89142 40.861..."


In [None]:
gdf.dtypes

boro_cd        float64
shape_area     float64
shape_leng     float64
geometry      geometry
dtype: object

In [None]:
gdf = gdf.astype({"boro_cd":'int'})
gdf.head()

Unnamed: 0,boro_cd,shape_area,shape_leng,geometry
0,206,42664310.0,35875.711733,"POLYGON ((-73.87185 40.84376, -73.87192 40.843..."
1,404,65739660.0,37018.373718,"POLYGON ((-73.84751 40.73901, -73.84801 40.738..."
2,203,44796870.0,33500.069106,"POLYGON ((-73.88072 40.83752, -73.88074 40.837..."
3,304,56662610.0,37007.806599,"POLYGON ((-73.89647 40.68234, -73.89653 40.682..."
4,205,38316980.0,29443.048128,"POLYGON ((-73.89138 40.86170, -73.89142 40.861..."


In [None]:
gdf_join = gdf.merge(aq_df, on="boro_cd", how='left')
gdf_join.head()


Unnamed: 0,boro_cd,shape_area,shape_leng,geometry,unique_id,indicator_id,name,measure,measure_info,geo_type_name,geo_place_name,time_period,start_date,data_value,message,Date,Time
0,206,42664311.5,35875.711733,"POLYGON ((-73.87185 40.84376, -73.87192 40.843...",179744.0,642.0,Boiler Emissions- Total NOx Emissions,Number per km2,number,UHF42,Borough Park,2015,2015-01-01T00:00:00.000,34.4,,2015-01-01,00:00:00
1,206,42664311.5,35875.711733,"POLYGON ((-73.87185 40.84376, -73.87192 40.843...",179792.0,640.0,Boiler Emissions- Total SO2 Emissions,Number per km2,number,UHF42,Borough Park,2015,2015-01-01T00:00:00.000,1.1,,2015-01-01,00:00:00
2,206,42664311.5,35875.711733,"POLYGON ((-73.87185 40.84376, -73.87192 40.843...",179840.0,641.0,Boiler Emissions- Total PM2.5 Emissions,Number per km2,number,UHF42,Borough Park,2015,2015-01-01T00:00:00.000,0.2,,2015-01-01,00:00:00
3,206,42664311.5,35875.711733,"POLYGON ((-73.87185 40.84376, -73.87192 40.843...",325103.0,645.0,Traffic Density- Annual Vehicle Miles Traveled...,million miles,per km2,UHF42,Borough Park,2016,2016-01-01T00:00:00.000,0.6,,2016-01-01,00:00:00
4,206,42664311.5,35875.711733,"POLYGON ((-73.87185 40.84376, -73.87192 40.843...",325151.0,645.0,Traffic Density- Annual Vehicle Miles Traveled...,million miles,per km2,CD,Belmont and East Tremont (CD6),2016,2016-01-01T00:00:00.000,3.8,,2016-01-01,00:00:00


In [None]:
gdf_join.drop_duplicates()
gdf_join.drop(columns=['message','start_date'])

Unnamed: 0,boro_cd,shape_area,shape_leng,geometry,unique_id,indicator_id,name,measure,measure_info,geo_type_name,geo_place_name,time_period,data_value,Date,Time
0,206,4.266431e+07,35875.711733,"POLYGON ((-73.87185 40.84376, -73.87192 40.843...",179744.0,642.0,Boiler Emissions- Total NOx Emissions,Number per km2,number,UHF42,Borough Park,2015,34.40,2015-01-01,00:00:00
1,206,4.266431e+07,35875.711733,"POLYGON ((-73.87185 40.84376, -73.87192 40.843...",179792.0,640.0,Boiler Emissions- Total SO2 Emissions,Number per km2,number,UHF42,Borough Park,2015,1.10,2015-01-01,00:00:00
2,206,4.266431e+07,35875.711733,"POLYGON ((-73.87185 40.84376, -73.87192 40.843...",179840.0,641.0,Boiler Emissions- Total PM2.5 Emissions,Number per km2,number,UHF42,Borough Park,2015,0.20,2015-01-01,00:00:00
3,206,4.266431e+07,35875.711733,"POLYGON ((-73.87185 40.84376, -73.87192 40.843...",325103.0,645.0,Traffic Density- Annual Vehicle Miles Traveled...,million miles,per km2,UHF42,Borough Park,2016,0.60,2016-01-01,00:00:00
4,206,4.266431e+07,35875.711733,"POLYGON ((-73.87185 40.84376, -73.87192 40.843...",325151.0,645.0,Traffic Density- Annual Vehicle Miles Traveled...,million miles,per km2,CD,Belmont and East Tremont (CD6),2016,3.80,2016-01-01,00:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4269,209,1.142849e+08,62243.281229,"POLYGON ((-73.83979 40.83562, -73.83967 40.835...",605826.0,365.0,Fine Particulate Matter (PM2.5),Mean,mcg per cubic meter,UHF34,Bensonhurst - Bay Ridge,Summer 2018,7.83,2018-06-01,00:00:00
4270,209,1.142849e+08,62243.281229,"POLYGON ((-73.83979 40.83562, -73.83967 40.835...",605827.0,365.0,Fine Particulate Matter (PM2.5),Mean,mcg per cubic meter,UHF34,Bensonhurst - Bay Ridge,Winter 2017-18,8.10,2017-12-01,00:00:00
4271,209,1.142849e+08,62243.281229,"POLYGON ((-73.83979 40.83562, -73.83967 40.835...",605954.0,365.0,Fine Particulate Matter (PM2.5),Mean,mcg per cubic meter,CD,Parkchester and Soundview (CD9),Annual Average 2018,7.11,2018-01-01,00:00:00
4272,209,1.142849e+08,62243.281229,"POLYGON ((-73.83979 40.83562, -73.83967 40.835...",605955.0,365.0,Fine Particulate Matter (PM2.5),Mean,mcg per cubic meter,CD,Parkchester and Soundview (CD9),Summer 2018,8.50,2018-06-01,00:00:00


In [None]:
gdf_join.dropna()
print(gdf_join.shape[0])

4274


In [None]:
gdf_join.dtypes

boro_cd              int64
shape_area         float64
shape_leng         float64
geometry          geometry
unique_id          float64
indicator_id       float64
name                object
measure             object
measure_info        object
geo_type_name       object
geo_place_name      object
time_period         object
start_date          object
data_value         float64
message            float64
Date                object
Time                object
dtype: object

**2.NYC Parks Data**