In [1]:
import gmaps
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st

# Google developer API key
from Config import gkey

# Access maps with unique API key
gmaps.configure(api_key=gkey)


In [2]:
#create cordinates for NY State map
coordinates = [(42.9915, -76.0718)]


In [3]:
# Customize the size of the figure
figure_layout = {
    'width': '500px',
    'height': '500px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'
}

fig = gmaps.figure(zoom_level = 6.5, center = (42.9915, -76.0718), layout = figure_layout, map_type="HYBRID")
fig

Figure(layout=FigureLayout(border='1px solid black', height='500px', margin='0 auto 0 auto', padding='1px', wi…

In [4]:
AQI_df = pd.read_csv('../Se7en/AQIndex.csv')
AQI_df = AQI_df.dropna()
aqi_group = AQI_df.groupby ("Year")
aqi_2009_df = aqi_group.get_group (2009)
aqi_2009_df.head()

Unnamed: 0.1,Unnamed: 0,zip,AQI,Category Number,Category Name,Pollutant,Year
0,0,12032,90,2,Moderate,OZONE,2009
1,1,12108,90,2,Moderate,OZONE,2009
2,2,12134,90,2,Moderate,OZONE,2009
3,3,12139,90,2,Moderate,OZONE,2009
4,4,12164,90,2,Moderate,OZONE,2009


In [5]:
#introducing zip code coordinates
zip_df = pd.read_csv('../Se7en/us-zip-code-latitude-and-longitude.csv')
zip_df = zip_df.rename (columns = {"Zip" : "zip"})

#take care of NaN velue
zip_df = zip_df.dropna()
zip_df.head()

Unnamed: 0,zip,City,State,Latitude,Longitude,Timezone,Daylight savings time flag
0,10001,New York,NY,40.750742,-73.99653,-5,1
1,10002,New York,NY,40.71704,-73.987,-5,1
2,10003,New York,NY,40.732509,-73.98935,-5,1
3,10005,New York,NY,40.706019,-74.00858,-5,1
4,10006,New York,NY,40.707904,-74.01342,-5,1


In [6]:
# merge AQI and zip coordinates dataframes on zipcode
new_aqi_df = pd.merge(aqi_2009_df, zip_df, on=['zip','zip'], how="left")


#take care of NaN velue
new_aqi_df = new_aqi_df.dropna()
new_aqi_df.head()

Unnamed: 0.1,Unnamed: 0,zip,AQI,Category Number,Category Name,Pollutant,Year,City,State,Latitude,Longitude,Timezone,Daylight savings time flag
0,0,12032,90,2,Moderate,OZONE,2009,Caroga Lake,NY,43.158567,-74.49646,-5.0,1.0
1,1,12108,90,2,Moderate,OZONE,2009,Lake Pleasant,NY,43.525388,-74.40875,-5.0,1.0
2,2,12134,90,2,Moderate,OZONE,2009,Northville,NY,43.21892,-74.14822,-5.0,1.0
3,3,12139,90,2,Moderate,OZONE,2009,Piseco,NY,43.40276,-74.56972,-5.0,1.0
4,4,12164,90,2,Moderate,OZONE,2009,Speculator,NY,43.515611,-74.37431,-5.0,1.0


In [7]:
#introducing Vehicle data
vehicle_df = pd.read_csv("vehicle_data.csv.gz", compression="gzip")

#take care of NaN velue
vehicle_df = vehicle_df.dropna()

#creating dataframe for car's for selective years
vehicle_year_df = vehicle_df.loc[vehicle_df['model_year'].astype(int) <= 2009]
vehicle_year_df.head()

Unnamed: 0,vin,registration_class,zip,model_year,body_type,fuel_type
0,999407G3573,PAS,12866,1976,SUBN,GAS
1,9992313,PAS,13316,1924,CONV,GAS
2,998867,PAS,11501,1952,2DSD,GAS
3,9983692,PAS,10940,1936,2DSD,GAS
4,99782010525,PAS,11225,1978,2DSD,GAS


In [8]:
#Creating a new list 
veh_age = 2009 - vehicle_year_df['model_year'].astype(int)
new_vehicle_df = pd.DataFrame(vehicle_year_df)
new_vehicle_df['vehicle_age'] = veh_age
new_vehicle_df.head()

Unnamed: 0,vin,registration_class,zip,model_year,body_type,fuel_type,vehicle_age
0,999407G3573,PAS,12866,1976,SUBN,GAS,33
1,9992313,PAS,13316,1924,CONV,GAS,85
2,998867,PAS,11501,1952,2DSD,GAS,57
3,9983692,PAS,10940,1936,2DSD,GAS,73
4,99782010525,PAS,11225,1978,2DSD,GAS,31


In [9]:
# merge new_aqi_df with vehicle new-vehicle_df on zipcode
density_aqi_2009_df = pd.merge(new_aqi_df, new_vehicle_df, on=['zip','zip'], how="left" )


#take care of NaN velue
density_aqi_2009_df = density_aqi_2009_df.dropna()
density_aqi_2009_df.head()

Unnamed: 0.1,Unnamed: 0,zip,AQI,Category Number,Category Name,Pollutant,Year,City,State,Latitude,Longitude,Timezone,Daylight savings time flag,vin,registration_class,model_year,body_type,fuel_type,vehicle_age
0,0,12032,90,2,Moderate,OZONE,2009,Caroga Lake,NY,43.158567,-74.49646,-5.0,1.0,8T03C193065,PAS,1968.0,CONV,GAS,41.0
1,0,12032,90,2,Moderate,OZONE,2009,Caroga Lake,NY,43.158567,-74.49646,-5.0,1.0,8T01C214689,PAS,1968.0,2DSD,GAS,41.0
2,0,12032,90,2,Moderate,OZONE,2009,Caroga Lake,NY,43.158567,-74.49646,-5.0,1.0,7T03S242307,PAS,1967.0,CONV,GAS,42.0
3,0,12032,90,2,Moderate,OZONE,2009,Caroga Lake,NY,43.158567,-74.49646,-5.0,1.0,6MPCT01Z8P8644612,PAS,1993.0,CONV,GAS,16.0
4,0,12032,90,2,Moderate,OZONE,2009,Caroga Lake,NY,43.158567,-74.49646,-5.0,1.0,57548132568,PAS,1962.0,UTIL,GAS,47.0


In [11]:

#Plot Heatmap
locations = density_aqi_2009_df[["Latitude", "Longitude"]]
aqi = density_aqi_2009_df["AQI"].astype(float)

#Create layer
heat_layer = gmaps.heatmap_layer(locations, weights=aqi, 
                                  dissipating=False, max_intensity=500,
                                  point_radius=0.05)

fig.add_layer(heat_layer)
fig