In [1]:
!pip install gmaps
%matplotlib notebook
import os 
import csv
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import requests
import json
import gmaps

from config import gkey

gmaps.configure(api_key=gkey)



In [2]:
noise_complaint_file = "SFO_Aircraft_Noise_Complaint_Data.csv"

In [3]:
#read csv
noise_complaint_df = pd.read_csv(noise_complaint_file)
noise_complaint_df.head()

Unnamed: 0,Year,Month,Community,Total Complaints,Total Number of Callers
0,2005,1,Alameda,7.0,1.0
1,2005,1,Atherton,242.0,1.0
2,2005,1,Belmont,75.0,1.0
3,2005,1,Berkeley,8.0,1.0
4,2005,1,Brisbane,239.0,7.0


In [4]:
#check to see if there is any missing information
noise_complaint_df.count()

Year                       4749
Month                      4749
Community                  4749
Total Complaints           4748
Total Number of Callers    4748
dtype: int64

In [5]:
#drop rows with missing information
noise_complaint_df = noise_complaint_df.dropna(how='any')
noise_complaint_df.count()

Year                       4748
Month                      4748
Community                  4748
Total Complaints           4748
Total Number of Callers    4748
dtype: int64

In [6]:
#check data types
noise_complaint_df.dtypes

Year                         int64
Month                        int64
Community                   object
Total Complaints           float64
Total Number of Callers    float64
dtype: object

In [7]:
# make year and month an object
noise_complaint_df['Year'] = noise_complaint_df['Year'].astype(str)
noise_complaint_df['Month'] = noise_complaint_df['Month'].astype(str)
noise_complaint_df.dtypes

Year                        object
Month                       object
Community                   object
Total Complaints           float64
Total Number of Callers    float64
dtype: object

In [8]:
# filter unnecessary years out
noise_complaint_df = noise_complaint_df.loc[(noise_complaint_df['Year'] >= '2008') & 
                                            (noise_complaint_df['Year'] <= '2018'),:]

noise_complaint_df['Year'].unique()

array(['2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015',
       '2016', '2017', '2018'], dtype=object)

In [9]:
noise_complaint_df

Unnamed: 0,Year,Month,Community,Total Complaints,Total Number of Callers
607,2008,1,Belmont,3.0,2.0
608,2008,1,Brisbane,158.0,5.0
609,2008,1,Burlingame,5.0,3.0
610,2008,1,Daly City,388.0,3.0
611,2008,1,Fairfax,1.0,1.0
...,...,...,...,...,...
4299,2018,12,South San Francisco,46.0,9.0
4300,2018,12,Sunnyvale,576.0,5.0
4301,2018,12,Union City,613.0,1.0
4302,2018,12,Watsonville,207.0,1.0


In [10]:
# create pivot table
complaint_pivot = pd.pivot_table(noise_complaint_df,index=['Community'], columns = ['Year'],
                                 values=['Total Complaints'], aggfunc='sum', fill_value=0)

complaint_pivot

Unnamed: 0_level_0,Total Complaints,Total Complaints,Total Complaints,Total Complaints,Total Complaints,Total Complaints,Total Complaints,Total Complaints,Total Complaints,Total Complaints,Total Complaints
Year,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
Community,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Alameda,10,2,8,13,9,11,7,10,423,496,218
Albany,0,0,0,0,6,0,0,0,0,0,2
Antioch,0,0,0,0,0,0,0,0,0,2,0
Aptos,0,0,0,0,0,0,0,1975,3439,8835,8162
Aromas,0,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
Tracy,0,0,0,0,0,0,0,1,1,0,0
Union City,0,0,0,1,0,0,0,1,2,386,2906
Walnut Creek,0,0,1,0,0,3,0,7,2,0,0
Watsonville,0,0,0,0,0,0,0,11,1765,1941,2580


In [11]:
# convert pivot back into data frame 
complaint_pivot.columns = complaint_pivot.columns.droplevel(0)
complaint_pivot.columns.name = None 
new_complaint_df = complaint_pivot.reset_index() 

new_complaint_df

Unnamed: 0,Community,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,Alameda,10,2,8,13,9,11,7,10,423,496,218
1,Albany,0,0,0,0,6,0,0,0,0,0,2
2,Antioch,0,0,0,0,0,0,0,0,0,2,0
3,Aptos,0,0,0,0,0,0,0,1975,3439,8835,8162
4,Aromas,0,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
112,Tracy,0,0,0,0,0,0,0,1,1,0,0
113,Union City,0,0,0,1,0,0,0,1,2,386,2906
114,Walnut Creek,0,0,1,0,0,3,0,7,2,0,0
115,Watsonville,0,0,0,0,0,0,0,11,1765,1941,2580


In [12]:
#add additional columns - state, lat, lng
new_complaint_df['State'] = 'CA'
new_complaint_df['Lat'] = ''
new_complaint_df['Lng'] = ''
new_complaint_df = new_complaint_df[['Community', 'State', 'Lat','Lng', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', 
                 '2017', '2018']]

In [13]:
# create a params dict that will be updated with new city each iteration
params = {"key": gkey}

# Loop through the cities_pd and run a lat/long search for each city
for index, row in new_complaint_df.iterrows():

    # update address key value
    base_url = "https://maps.googleapis.com/maps/api/geocode/json"
    
    # make request
    city = row["Community"]
    state = row["State"]
        
    params["address"] = (f"{city}, {state}")
    
    cities_lat_lng = requests.get(base_url, params = params)
    # print(cities_lat_lng.url)
    
    cities_lat_lng = cities_lat_lng.json()

    new_complaint_df.loc[index, "Lat"] = cities_lat_lng["results"][0]["geometry"]["location"]["lat"]
    new_complaint_df.loc[index, "Lng"] = cities_lat_lng["results"][0]["geometry"]["location"]["lng"]
    
    
    new_complaint_df

In [14]:
new_complaint_df

Unnamed: 0,Community,State,Lat,Lng,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,Alameda,CA,37.7799,-122.282,10,2,8,13,9,11,7,10,423,496,218
1,Albany,CA,37.8867,-122.298,0,0,0,0,6,0,0,0,0,0,2
2,Antioch,CA,38.0049,-121.806,0,0,0,0,0,0,0,0,0,2,0
3,Aptos,CA,36.9772,-121.899,0,0,0,0,0,0,0,1975,3439,8835,8162
4,Aromas,CA,36.8886,-121.643,0,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,Tracy,CA,37.7397,-121.425,0,0,0,0,0,0,0,1,1,0,0
113,Union City,CA,37.5934,-122.044,0,0,0,1,0,0,0,1,2,386,2906
114,Walnut Creek,CA,37.9101,-122.065,0,0,1,0,0,3,0,7,2,0,0
115,Watsonville,CA,36.9102,-121.757,0,0,0,0,0,0,0,11,1765,1941,2580


In [15]:
#convert Lat and Lng to floats
new_complaint_df.count()
new_complaint_df['Lat'] = new_complaint_df['Lat'].astype(float)
new_complaint_df['Lng'] = new_complaint_df['Lng'].astype(float)

new_complaint_df.dtypes


Community     object
State         object
Lat          float64
Lng          float64
2008           int64
2009           int64
2010           int64
2011           int64
2012           int64
2013           int64
2014           int64
2015           int64
2016           int64
2017           int64
2018           int64
dtype: object

In [16]:
#create new dataframes for each year
locations = new_complaint_df[['Lat', 'Lng']]
two_thousand_eight_complaints = new_complaint_df['2008']
two_thousand_nine_complaints = new_complaint_df['2009']
two_thousand_ten_complaints = new_complaint_df['2010']
two_thousand_eleven_complaints = new_complaint_df['2011']
two_thousand_twelve_complaints = new_complaint_df['2012']
two_thousand_thirteen_complaints = new_complaint_df['2013']
two_thousand_fourteen_complaints = new_complaint_df['2014']
two_thousand_fifteen_complaints = new_complaint_df['2015']
two_thousand_sixteen_complaints = new_complaint_df['2016']
two_thousand_seventeen_complaints = new_complaint_df['2017']
two_thousand_eighteen_complaints = new_complaint_df['2018']

In [17]:
#2008 heat map
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations, weights=two_thousand_eight_complaints,
                                dissipating=False, max_intensity=10, point_radius=0.01)

fig.add_layer(heat_layer)

# fig.add_layer(heat_layer)
fig

Figure(layout=FigureLayout(height='420px'))

In [20]:
#top communities with most complaints in 2008
top_2008 = new_complaint_df[['Community', '2008']]
top_2008 = top_2008.sort_values(by='2008', ascending=False)
top_2008 = top_2008.reset_index()
top_2008 = top_2008[['Community', '2008']]
top_2008.head(10)

Unnamed: 0,Community,2008
0,Brisbane,3849
1,Daly City,1757
2,San Carlos,590
3,Redwood City,332
4,Foster City,319
5,Pacifica,306
6,San Francisco,146
7,Palo Alto,113
8,South San Franciscio,70
9,Woodside,67


In [21]:
#2018 heat map
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations, weights=two_thousand_eighteen_complaints,
                                dissipating=False, max_intensity=10, point_radius=0.01)

fig.add_layer(heat_layer)

# fig.add_layer(heat_layer)
fig

Figure(layout=FigureLayout(height='420px'))

In [90]:
#top communities with most complaints in 2018
top_2018 = new_complaint_df[['Community', '2018']]
top_2018 = top_2018.sort_values(by='2018', ascending=False)
top_2018 = top_2018.reset_index()
top_2018 = top_2018[['Community', '2018']]
top_2018['2018'] = top_2018['2018'].map('{:,.0f}'.format)

top_2018.head(10)

Unnamed: 0,Community,2018
0,Palo Alto,610745
1,Los Altos,275434
2,Los Gatos,256522
3,Santa Cruz,248581
4,Scotts Valley,143796
5,Los Altos Hills,108978
6,Soquel,96981
7,Oakland,94871
8,Portola Valley,80912
9,Pacifica,67112


In [113]:
#sort by largest complaints for each year
top_2008 = new_complaint_df[['Community', '2008']]
top_2008 = top_2008.sort_values(by='2008', ascending=False)
top_2008 = top_2008.reset_index()
top_2008 = top_2008[['Community', '2008']]
top_2008['Year'] = '2008'
top_2008 = top_2008.set_index(['Year'])

top_2009 = new_complaint_df[['Community', '2009']]
top_2009 = top_2009.sort_values(by='2009', ascending=False)
top_2009 = top_2009.reset_index()
top_2009 = top_2009[['Community', '2009']]
top_2009['Year'] = '2009'
top_2009 = top_2009.set_index(['Year'])

top_2010 = new_complaint_df[['Community', '2010']]
top_2010 = top_2010.sort_values(by='2010', ascending=False)
top_2010 = top_2010.reset_index()
top_2010 = top_2010[['Community', '2010']]
top_2010['Year'] = '2010'
top_2010 = top_2010.set_index(['Year'])

top_2011 = new_complaint_df[['Community', '2011']]
top_2011 = top_2011.sort_values(by='2011', ascending=False)
top_2011 = top_2011.reset_index()
top_2011 = top_2011[['Community', '2011']]
top_2011['Year'] = '2011'
top_2011 = top_2011.set_index(['Year'])

top_2012 = new_complaint_df[['Community', '2012']]
top_2012 = top_2012.sort_values(by='2012', ascending=False)
top_2012 = top_2012.reset_index()
top_2012 = top_2012[['Community', '2012']]
top_2012['Year'] = '2012'
top_2012 = top_2012.set_index(['Year'])

top_2013 = new_complaint_df[['Community', '2013']]
top_2013 = top_2013.sort_values(by='2013', ascending=False)
top_2013 = top_2013.reset_index()
top_2013 = top_2013[['Community', '2013']]
top_2013['Year'] = '2013'
top_2013 = top_2013.set_index(['Year'])

top_2014 = new_complaint_df[['Community', '2014']]
top_2014 = top_2014.sort_values(by='2014', ascending=False)
top_2014 = top_2014.reset_index()
top_2014 = top_2014[['Community', '2014']]
top_2014['Year'] = '2014'
top_2014 = top_2014.set_index(['Year'])

top_2015 = new_complaint_df[['Community', '2015']]
top_2015 = top_2015.sort_values(by='2015', ascending=False)
top_2015 = top_2015.reset_index()
top_2015 = top_2015[['Community', '2015']]
top_2015['Year'] = '2015'
top_2015 = top_2015.set_index(['Year'])

top_2016 = new_complaint_df[['Community', '2016']]
top_2016 = top_2016.sort_values(by='2016', ascending=False)
top_2016 = top_2016.reset_index()
top_2016 = top_2016[['Community', '2016']]
top_2016['Year'] = '2016'
top_2016 = top_2016.set_index(['Year'])

top_2017 = new_complaint_df[['Community', '2017']]
top_2017 = top_2017.sort_values(by='2017', ascending=False)
top_2017 = top_2017.reset_index()
top_2017 = top_2017[['Community', '2017']]
top_2017['Year'] = '2017'
top_2017 = top_2017.set_index(['Year'])

top_2018 = new_complaint_df[['Community', '2018']]
top_2018 = top_2018.sort_values(by='2018', ascending=False)
top_2018 = top_2018.reset_index()
top_2018 = top_2018[['Community', '2018']]
top_2018['Year'] = '2018'
top_2018 = top_2018.set_index(['Year'])

#top citiy with most complaints each year
first_2008 = top_2008.head(1)
first_2009 = top_2009.head(1)
first_2010 = top_2010.head(1)
first_2011 = top_2011.head(1)
first_2012 = top_2012.head(1)
first_2013 = top_2013.head(1)
first_2014 = top_2014.head(1)
first_2015 = top_2015.head(1)
first_2016 = top_2016.head(1)
first_2017 = top_2017.head(1)
first_2018 = top_2018.head(1)

first = pd.concat([first_2008, first_2009, first_2010, first_2011, first_2012, first_2013, 
                    first_2014, first_2015, first_2016, first_2017, first_2018], sort=True)
first = first.fillna(0)

#create dataframe to find community with most complaints each year
first['# Total of Complaints'] = first[['2008', '2009', '2010', '2011', '2012', 
                                        '2013', '2014', '2015', '2016', '2017',
                                        '2018']].sum()

first['# Total of Complaints'] = first['# Total of Complaints'].map("{:,.0f}".format)
first = first[['Community','# Total of Complaints']]
first


Unnamed: 0_level_0,Community,# Total of Complaints
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2008,Brisbane,3849
2009,Brisbane,2768
2010,Brisbane,5150
2011,Brisbane,5319
2012,Brisbane,3630
2013,Brisbane,5324
2014,Brisbane,9268
2015,Los Gatos,213850
2016,Palo Alto,863149
2017,Palo Alto,610940


In [114]:
#create new data frame to find averages over the 11 years
average_df = new_complaint_df[['Community', 'State', 'Lat','Lng', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', 
                 '2017', '2018']]

average_df

Unnamed: 0,Community,State,Lat,Lng,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,Alameda,CA,37.779872,-122.282185,10,2,8,13,9,11,7,10,423,496,218
1,Albany,CA,37.886704,-122.297756,0,0,0,0,6,0,0,0,0,0,2
2,Antioch,CA,38.004921,-121.805789,0,0,0,0,0,0,0,0,0,2,0
3,Aptos,CA,36.977173,-121.899402,0,0,0,0,0,0,0,1975,3439,8835,8162
4,Aromas,CA,36.888565,-121.643001,0,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,Tracy,CA,37.739651,-121.425223,0,0,0,0,0,0,0,1,1,0,0
113,Union City,CA,37.593356,-122.043861,0,0,0,1,0,0,0,1,2,386,2906
114,Walnut Creek,CA,37.910078,-122.065182,0,0,1,0,0,3,0,7,2,0,0
115,Watsonville,CA,36.910231,-121.756895,0,0,0,0,0,0,0,11,1765,1941,2580


In [115]:
#calcualte averages
average_df['Average # of Complaints'] = average_df[['2008', '2009', '2010', '2011', '2012', '2013',
                                                    '2014', '2015', '2016', '2017', '2018']].mean(axis=1)


average_count = average_df['Average # of Complaints']
lat_lng = average_df[['Lat', 'Lng']]
lat_lng

Unnamed: 0,Lat,Lng
0,37.779872,-122.282185
1,37.886704,-122.297756
2,38.004921,-121.805789
3,36.977173,-121.899402
4,36.888565,-121.643001
...,...,...
112,37.739651,-121.425223
113,37.593356,-122.043861
114,37.910078,-122.065182
115,36.910231,-121.756895


In [116]:
#average of the 11 years heat map
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(lat_lng, weights=average_count,
                                dissipating=False, max_intensity=10, point_radius=.01)

fig.add_layer(heat_layer)
# fig.add_layer(heat_layer)
fig

Figure(layout=FigureLayout(height='420px'))

In [31]:
#top cities with the highest average
average_df = average_df.sort_values('Average # of Complaints', ascending=False)
average_df = average_df.reset_index()

top_10_cities = average_df[['Community', 'Average # of Complaints']].head(10)
top_10_cities['Average # of Complaints'] = top_10_cities['Average # of Complaints'].map("{:,.0f}".format)
top_10_cities

Unnamed: 0,Community,Average # of Complaints
0,Los Gatos,255141
1,Palo Alto,199500
2,Santa Cruz,185468
3,Los Altos,139411
4,Scotts Valley,131788
5,Soquel,95164
6,Los Altos Hills,65010
7,Portola Valley,32340
8,Capitola,31800
9,Oakland,26678
