# **Air Pollution in Chicago, CY2010**
---
## Setup

In [1]:
# Dependencies and setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import json
import gmaps
import os
import time

# Import API key
from config import gkey

## **Question 1: Which Chicago neighborhoods submitted the highest number of air pollution complaints in 2010?**
-----

### Bar Chart of Air Pollution Complaints by Neighborhood

In [2]:
# Caroline to add

### Heat Map of Air Pollution Complaints by Neighborhood

In [3]:
# Load merged dataset into Jupyter Notebook
merged_dataset = "Resources/merged_data.csv"
air_pollution_df = pd.read_csv(merged_dataset)
# Create dataframe showing number of complaints by neighborhood in descending order 
neighborhood = air_pollution_df.groupby(["Neighborhood"])
complaint_count = neighborhood["COMPLAINT ID"].count()
neighborhood_df = pd.DataFrame({"Number of Complaints": complaint_count})
neighborhood_df = neighborhood_df.sort_values(["Number of Complaints"], ascending = False).reset_index()
# Add columns to neighborhood_df for location data
neighborhood_df["Region"] = neighborhood_df["Neighborhood"].astype(str) + ", Chicago, IL"
neighborhood_df["Latitude"] = ""
neighborhood_df["Longitude"] = ""
# Print dataframe header for reference
neighborhood_df.head()

Unnamed: 0,Neighborhood,Number of Complaints,Region,Latitude,Longitude
0,West Englewood,14,"West Englewood, Chicago, IL",,
1,Uptown,12,"Uptown, Chicago, IL",,
2,New City,9,"New City, Chicago, IL",,
3,Chatham,9,"Chatham, Chicago, IL",,
4,Loop,9,"Loop, Chicago, IL",,


In [4]:
print("---------------------------")
print("Beginning data retrieval...")
print("---------------------------")
# Build partial query URL
base_url = "https://maps.googleapis.com/maps/api/geocode/json"
params = {"key" : gkey}
# Iterate through rows of neighborhood_df 
for index, row in neighborhood_df.iterrows():
    # Get address from neighborhood_df
    search_address = row["Region"]
    # Add keyword to params dictionary
    params["address"] = search_address
    # Assemble URL and make API request
    print(f"Retrieving Results for Index {index}: {search_address}.")
    response = requests.get(base_url, params = params).json()
    # Extract location coordinates and save to neighborhood_df
    neighborhood_data = response
    try:
        neighborhood_df.loc[index, "Latitude"] = neighborhood_data["results"][0]["geometry"]["location"]["lat"]
        neighborhood_df.loc[index, "Longitude"] = neighborhood_data["results"][0]["geometry"]["location"]["lng"]
    # Skip row if neighborhood not found
    except (KeyError, IndexError):
        print("Neighborhood not found. Skipping...")
    print("-------------")
    # Set 5s timer for API requests
    time.sleep(5)
print("---------------------------")
print("Data retrieval complete.")
print("---------------------------")

---------------------------
Beginning data retrieval...
---------------------------
Retrieving Results for Index 0: West Englewood, Chicago, IL.
-------------
Retrieving Results for Index 1: Uptown, Chicago, IL.
-------------
Retrieving Results for Index 2: New City, Chicago, IL.
-------------
Retrieving Results for Index 3: Chatham, Chicago, IL.
-------------
Retrieving Results for Index 4: Loop, Chicago, IL.
-------------
Retrieving Results for Index 5: Dunning, Chicago, IL.
-------------
Retrieving Results for Index 6: Logan Square, Chicago, IL.
-------------
Retrieving Results for Index 7: Near West Side, Chicago, IL.
-------------
Retrieving Results for Index 8: Lake View, Chicago, IL.
-------------
Retrieving Results for Index 9: South Shore, Chicago, IL.
-------------
Retrieving Results for Index 10: West Garfield Park, Chicago, IL.
-------------
Retrieving Results for Index 11: Irving Park, Chicago, IL.
-------------
Retrieving Results for Index 12: Greater Grand Crossing, Chic

In [5]:
# Print neighborhood_df to ensure location data has updated
neighborhood_df

Unnamed: 0,Neighborhood,Number of Complaints,Region,Latitude,Longitude
0,West Englewood,14,"West Englewood, Chicago, IL",41.779516,-87.664291
1,Uptown,12,"Uptown, Chicago, IL",41.96654,-87.65334
2,New City,9,"New City, Chicago, IL",41.806677,-87.667976
3,Chatham,9,"Chatham, Chicago, IL",41.74012,-87.614636
4,Loop,9,"Loop, Chicago, IL",41.878635,-87.625055
5,Dunning,8,"Dunning, Chicago, IL",41.94526,-87.807
6,Logan Square,7,"Logan Square, Chicago, IL",41.92306,-87.709291
7,Near West Side,7,"Near West Side, Chicago, IL",41.866846,-87.666409
8,Lake View,7,"Lake View, Chicago, IL",41.939781,-87.658927
9,South Shore,7,"South Shore, Chicago, IL",41.758993,-87.570026


In [17]:
# Configure gmaps
gmaps.configure(api_key = gkey)
# Store latitude and longitude in locations
locations = neighborhood_df[["Latitude", "Longitude"]]
complaints = neighborhood_df["Number of Complaints"].astype(float)
# Plot heatmap
complaints_heatmap = gmaps.figure()
heat_layer = gmaps.heatmap_layer(locations, weights = complaints, dissipating = False, max_intensity = 14, point_radius = 0.02)
# Add layer
complaints_heatmap.add_layer(heat_layer)
# Display figure
complaints_heatmap

Figure(layout=FigureLayout(height='420px'))

## **Question 2: Did the number of air pollution complaints vary by neighborhood income level?**
-----

### Boxplot of Complaints: Low Income vs High Income

In [18]:
# Nicole to share boxplot

### Scatter Plot of Complaints by Income Level

In [19]:
# Caroline to share scatterplot + linear regression

## **Question 3: Did the number of air pollution complaints vary by minority communities?**
-----

### Races by Neighborhood

In [20]:
# Devyn to share

### Line Graph by Race

In [21]:
# Devyn to share