In [8]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [75]:
#Baltimore/Washington International: 39.1754, -76.668297
import requests
import json

# Define the API endpoint
url = "https://api.openaq.org/v2/measurements"

# Define the parameters for the request
parameters = {
    "parameter": "pm25",  # PM2.5 data only
    "date_from": "2023-05-01",
    "date_to": "2023-08-31",
    "coordinates": "39.1754,-76.668297",
    "radius": 7500,  # 7.5km radius
    "limit": 80000  # Limit the number of data points
}

# Send the GET request to the API
response = requests.get(url, params=parameters)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the JSON response
    data = response.json()
    
    # Save the data to a JSON file
    with open('Baltimore_Washington_data.json', 'w') as f:
        json.dump(data, f)
        
    print("Data saved successfully to Baltimore_Washington_data.json")
else:
    print("Error:", response.status_code)


Data saved successfully to Baltimore_Washington_data.json


In [19]:
####    Task: Transform, filter and store the OpenAQ data as CSV


import pandas as pd
import json

# Load the JSON data
with open('Baltimore_Washington_data.json') as f:
    data = json.load(f)

# Normalize the 'results' part of the JSON data
results = data['results']
df = pd.json_normalize(results)

# Convert the date field to datetime64 if it exists
if 'date.local' in df.columns:
    df['local_time'] = pd.to_datetime(df['date.local'])

# Check if the necessary columns are present in the DataFrame
expected_columns = ['location', 'parameter', 'value', 'local_time']
missing_columns = [col for col in expected_columns if col not in df.columns]

# If any columns are missing, print a message and handle the issue accordingly
if missing_columns:
    print(f"Missing columns: {missing_columns}")
else:
    # Extract latitude and longitude from the nested 'coordinates' fields
    df['sensor_lat'] = df['coordinates.latitude']
    df['sensor_lon'] = df['coordinates.longitude']

    # Reduce the DataFrame to include only specified fields
    df = df[expected_columns + ['sensor_lat', 'sensor_lon']]

    # Store it to a file called: 20230606_detroit_downtown_7_5km_aq.csv
    filename = 'Baltimore_Washington_7.5km_data.csv'
    df.to_csv(filename, index=False)
    print(f"Data stored in '{filename}' successfully.")


Data stored in 'Baltimore_Washington_7.5km_data.csv' successfully.


In [21]:
# Sensordensity and intensity of PM 2.5 on June 6 2023
import pandas as pd

# Load data from both datasets into DataFrames
baltimore_df = pd.read_csv('Baltimore_Washington_7.5km_data.csv')
detroit_df = pd.read_csv('20230606_detroit_downtown_7_5km_aq.csv')

# Filter data for June 6, 2023
baltimore_june6_df = baltimore_df[baltimore_df['local_time'].str.startswith("2023-06-06")]
detroit_june6_df = detroit_df[detroit_df['local_time'].str.startswith("2023-06-06")]

# Calculate sensor density for Baltimore/Washington International
baltimore_sensor_density = len(baltimore_june6_df)

# Calculate intensity of PM2.5 for Baltimore/Washington International
baltimore_pm25_intensity = baltimore_june6_df['value'].mean()

# Calculate sensor density for Detroit
detroit_sensor_density = len(detroit_june6_df)

# Calculate intensity of PM2.5 for Detroit
detroit_pm25_intensity = detroit_june6_df['value'].mean()

print("Sensor Density for Baltimore/Washington International:", baltimore_sensor_density)
print("Intensity of PM2.5 for Baltimore/Washington International:", baltimore_pm25_intensity)
print("Sensor Density for Detroit:", detroit_sensor_density)
print("Intensity of PM2.5 for Detroit:", detroit_pm25_intensity)


Sensor Density for Baltimore/Washington International: 655
Intensity of PM2.5 for Baltimore/Washington International: 37.30946564885496
Sensor Density for Detroit: 5186
Intensity of PM2.5 for Detroit: 72.8891245661396


Similarites:

Both locations possess functioning air quality monitoring systems, evident from their reported sensor density values. Additionally, they have recorded PM2.5 levels, indicating active efforts to monitor and assess air pollution levels. 
These similarities underscore shared initiatives to monitor and mitigate air pollution in urban environments.

Differences:

Detroit demonstrates a notably higher sensor density of 5186 compared to Baltimore/Washington International's 655, suggesting a more extensive air quality monitoring network in Detroit.
Additionally, Detroit reports a higher intensity of PM2.5 at 72.89 µg/m³, contrasting with Baltimore/Washington International's lower PM2.5 intensity of 37.31 µg/m³, indicating varying levels of air pollution severity between the two locations.

In [None]:
url = "https://api.openaq.org/v2/measurements?date_from=2023-05-01%2000%3A00%3A00&date_to=2023-08-31%2023%3A59%3A59&limit=50000&page=1&offset=0&sort=desc&parameter_id=2&coordinates=42.3643%2C-71.005203&radius=7500&order_by=datetime"


In [15]:
## Boston Logan International: 42.3643, -71.005203

import json
import requests

# Define the API endpoint
url = "https://api.openaq.org/v2/measurements"

# Define the parameters for the request
parameters = {
    "parameter": "pm25",  # PM2.5 data only
    "date_from": "2023-05-01T00:00:00",  # Include time for start date
    "date_to": "2023-08-31T23:59:59",    # Include time for end date
    "coordinates": "42.3643,-71.005203",
    "radius": 7500,  # 7.5km radius
    "limit": 35000
}

# Send the GET request to the API
response = requests.get(url, params=parameters)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the JSON response
    data = response.json()

    # Save the data to a JSON file
    with open('Boston_data.json', 'w') as f:
        json.dump(data, f)
        
    print("Data saved successfully to Boston_data.json")
else:
    print("Error:", response.status_code)


Data saved successfully to Boston_data.json


In [16]:
####    Task: Transform, filter and store the OpenAQ data as CSV


import pandas as pd
import json

# Load the JSON data
with open('Boston_data.json') as f:
    data = json.load(f)

# Normalize the 'results' part of the JSON data
results = data['results']
df = pd.json_normalize(results)

# Convert the date field to datetime64 if it exists
if 'date.local' in df.columns:
    df['local_time'] = pd.to_datetime(df['date.local'])

# Check if the necessary columns are present in the DataFrame
expected_columns = ['location', 'parameter', 'value', 'local_time']
missing_columns = [col for col in expected_columns if col not in df.columns]

# If any columns are missing, print a message and handle the issue accordingly
if missing_columns:
    print(f"Missing columns: {missing_columns}")
else:
    # Extract latitude and longitude from the nested 'coordinates' fields
    df['sensor_lat'] = df['coordinates.latitude']
    df['sensor_lon'] = df['coordinates.longitude']

    # Reduce the DataFrame to include only specified fields
    df = df[expected_columns + ['sensor_lat', 'sensor_lon']]

    # Store it to a file called: 20230606_detroit_downtown_7_5km_aq.csv
    filename = 'Boston_Logan_7_5km_aq.csv'
    df.to_csv(filename, index=False)
    print(f"Data stored in '{filename}' successfully.")


Data stored in 'Boston_Logan_7_5km_aq.csv' successfully.


In [23]:
# Sensor density and intensity of PM 2.5 on June 6 2023

import pandas as pd

# Load data from both datasets into DataFrames
boston_df = pd.read_csv('Boston_Logan_7_5km_aq.csv')
detroit_df = pd.read_csv('20230606_detroit_downtown_7_5km_aq.csv')

# Filter data for June 6, 2023
boston_june6_df = boston_df[boston_df['local_time'].str.startswith("2023-06-06")]
detroit_june6_df = detroit_df[detroit_df['local_time'].str.startswith("2023-06-06")]

# Calculate sensor density for Boston International
boston_sensor_density = len(boston_june6_df)

# Calculate intensity of PM2.5 for Boston International
boston_pm25_intensity = boston_june6_df['value'].mean()

# Calculate sensor density for Detroit
detroit_sensor_density = len(detroit_june6_df)

# Calculate intensity of PM2.5 for Detroit
detroit_pm25_intensity = detroit_june6_df['value'].mean()

print("Sensor Density for Boston International:", boston_sensor_density)
print("Intensity of PM2.5 for Boston International:", boston_pm25_intensity)
print("Sensor Density for Detroit:", detroit_sensor_density)
print("Intensity of PM2.5 for Detroit:", detroit_pm25_intensity)


Sensor Density for Boston International: 692
Intensity of PM2.5 for Boston International: 29.277167630057804
Sensor Density for Detroit: 5186
Intensity of PM2.5 for Detroit: 72.8891245661396



Similarites:
Both Boston International and Detroit monitor air quality on June 06 related to PM2.5 pollution.
Both cities utilize monitoring systems to track air quality data.

Differences:
Boston International has a lower sensor density (692) compared to Detroit (5186).

Detroit experiences a significantly higher intensity of PM2.5 pollution (72.889 µg/m³) compared to Boston International (29.277 µg/m³).

In [47]:
# Philadelphia International Airport:39.8719, -75.2411  
# Data from May to June

import requests
import json
import time  # Import the time module

url = "https://api.openaq.org/v2/measurements?date_from=2023-05-01%2000%3A00%3A00&date_to=2023-06-30%2023%3A59%3A59&limit=3000&page=1&offset=0&sort=desc&parameter_id=2&coordinates=39.8719%2C-75.2411&radius=7500&order_by=datetime"


headers = {"accept": "application/json"}

response = requests.get(url, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    # Save the response content (JSON) to a file
    with open('PHILA_mayjune_data.json', 'w') as json_file:
        json.dump(response.json(), json_file, indent=4)
    print("Data saved to PHILA_mayjune_data")
else:
    print("Error occurred while fetching data:", response.text)

time.sleep(2)  # Add a sleep time of 2 seconds

Data saved to PHILA_mayjune_data


Extracted data with date ranges from May to June and July to August and mereged those two into single json file named Philadelphia_data.json. I am unable to extract all the data at once it is throwing error so i broke the daterange itno two different ranges.

In [48]:
# Philadelphia International Airport:39.8719, -75.2411  
# Data from July to August

import requests
import json
import time  # Import the time module

url = "https://api.openaq.org/v2/measurements?date_from=2023-07-01%2000%3A00%3A00&date_to=2023-08-31%2023%3A59%3A59&limit=3000&page=1&offset=0&sort=desc&parameter_id=2&coordinates=39.8719%2C-75.2411&radius=7500&order_by=datetime"


headers = {"accept": "application/json"}

response = requests.get(url, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    # Save the response content (JSON) to a file
    with open('PHILA_julyaug_data.json', 'w') as json_file:
        json.dump(response.json(), json_file, indent=4)
    print("Data saved to PHILA_julyaug_data")
else:
    print("Error occurred while fetching data:", response.text)

time.sleep(2)  # Add a sleep time of 2 seconds

Data saved to PHILA_julyaug_data


In [55]:
import json

# Read the content of the first JSON file (May-June data)
with open('PHILA_mayjune_data.json', 'r') as file:
    mayjune_data = json.load(file)
    print("May-June data length:", len(mayjune_data["results"]))

# Read the content of the second JSON file (July-August data)
with open('PHILA_julyaug_data.json', 'r') as file:
    julyaug_data = json.load(file)
    print("July-August data length:", len(julyaug_data["results"]))

# Combine "results" data from both JSON files into a single list
combined_results = mayjune_data["results"] + julyaug_data["results"]

# Construct a new dictionary with combined "results"
combined_data = {
    "meta": {
        "source": "PHILA_mayjune_data and PHILA_julyaug_data"
    },
    "results": combined_results
}

# Write combined data to a JSON file
with open('Philadelphia_data.json', 'w') as file:
    json.dump(combined_data, file)

print("Combined JSON file 'Philadelphia_data.json' created successfully.")


May-June data length: 1382
July-August data length: 1443
Combined JSON file 'Philadelphia_data.json' created successfully.


In [56]:
####    Task: Transform, filter and store the OpenAQ data as CSV


import pandas as pd
import json

# Load the JSON data
with open('Philadelphia_data.json') as f:
    data = json.load(f)

# Normalize the 'results' part of the JSON data
results = data['results']
df = pd.json_normalize(results)

# Convert the date field to datetime64 if it exists
if 'date.local' in df.columns:
    df['local_time'] = pd.to_datetime(df['date.local'])

# Check if the necessary columns are present in the DataFrame
expected_columns = ['location', 'parameter', 'value', 'local_time']
missing_columns = [col for col in expected_columns if col not in df.columns]

# If any columns are missing, print a message and handle the issue accordingly
if missing_columns:
    print(f"Missing columns: {missing_columns}")
else:
    # Extract latitude and longitude from the nested 'coordinates' fields
    df['sensor_lat'] = df['coordinates.latitude']
    df['sensor_lon'] = df['coordinates.longitude']

    # Reduce the DataFrame to include only specified fields
    df = df[expected_columns + ['sensor_lat', 'sensor_lon']]

    # Store it to a file called: 20230606_detroit_downtown_7_5km_aq.csv
    filename = 'Philadelphia_7.5km_data.csv'
    df.to_csv(filename, index=False)
    print(f"Data stored in '{filename}' successfully.")


Data stored in 'Philadelphia_7.5km_data.csv' successfully.


In [58]:
# Sensor density and intensity of PM 2.5 on June 6 2023
import pandas as pd

# Load data from both datasets into DataFrames
Philadelphia_df = pd.read_csv('Philadelphia_7.5km_data.csv')
detroit_df = pd.read_csv('20230606_detroit_downtown_7_5km_aq.csv')

# Filter data for June 6, 2023
Philadelphia_june6_df = Philadelphia_df[Philadelphia_df['local_time'].str.startswith("2023-06-06")]
detroit_june6_df = detroit_df[detroit_df['local_time'].str.startswith("2023-06-06")]

# Calculate sensor density for Baltimore/Washington International
Philadelphia_sensor_density = len(Philadelphia_june6_df)

# Calculate intensity of PM2.5 for Baltimore/Washington International
Philadelphia_pm25_intensity = Philadelphia_june6_df['value'].mean()

# Calculate sensor density for Detroit
detroit_sensor_density = len(detroit_june6_df)

# Calculate intensity of PM2.5 for Detroit
detroit_pm25_intensity = detroit_june6_df['value'].mean()

print("Sensor Density for Philadelphia International Airport:", Philadelphia_sensor_density)
print("Intensity of PM2.5 for Philadelphia International Airport:", Philadelphia_pm25_intensity)
print("Sensor Density for Detroit:", detroit_sensor_density)
print("Intensity of PM2.5 for Detroit:", detroit_pm25_intensity)


Sensor Density for Philadelphia International Airport: 10
Intensity of PM2.5 for Philadelphia International Airport: 41.19
Sensor Density for Detroit: 5186
Intensity of PM2.5 for Detroit: 72.8891245661396



Comment:
Philadelphia International Airport's sensor density of 10 is remarkably lower compared to Detroit's extensive network of 5186 sensors. 

Despite this, Philadelphia experiences a significant intensity of PM2.5 pollution, recording 41.19 µg/m³ on June 6.
In contrast, Detroit faces a higher intensity of PM2.5 pollution at 72.89 µg/m³, showcasing more severe air quality challenges.
These disparities emphasize varying levels of air quality monitoring infrastructure and pollution levels between the two locations.

In [64]:
# Newark Liberty International Airport:  40.692501, -74.168701
# Data from May to June

import requests
import json
import time  # Import the time module

url = "https://api.openaq.org/v2/measurements?date_from=2023-05-01%2000%3A00%3A00&date_to=2023-06-30%2023%3A59%3A59&limit=35000&page=1&offset=0&sort=desc&parameter_id=2&coordinates=40.692501%2C-74.168701&radius=7500&order_by=datetime"


headers = {"accept": "application/json"}

response = requests.get(url, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    # Save the response content (JSON) to a file
    with open('Newark_mayjune_data.json', 'w') as json_file:
        json.dump(response.json(), json_file, indent=4)
    print("Data saved to Newark_mayjune_data")
else:
    print("Error occurred while fetching data:", response.text)

time.sleep(2)  # Add a sleep time of 2 seconds

Data saved to Newark_mayjune_data


Extracted data with date ranges from May to June and July to August and mereged those two into single json file named NEWARK_data.json. I am unable to extract all the data at once it is throwing error so i broke the daterange itno two different ranges.

In [69]:
# Newark Liberty International Airport:  40.692501, -74.168701
# Data from July to August

import requests
import json
import time  # Import the time module

url = "https://api.openaq.org/v2/measurements?date_from=2023-07-01%2000%3A00%3A00&date_to=2023-08-31%2023%3A59%3A59&limit=20000&page=1&offset=0&sort=desc&parameter_id=2&coordinates=40.692501%2C-74.168701&radius=7500&order_by=datetime"


headers = {"accept": "application/json"}

response = requests.get(url, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    # Save the response content (JSON) to a file
    with open('Newark_Julyaug_data.json', 'w') as json_file:
        json.dump(response.json(), json_file, indent=4)
    print("Data saved to Newark_Julyaug_data")
else:
    print("Error occurred while fetching data:", response.text)

time.sleep(2)  # Add a sleep time of 2 seconds

Data saved to Newark_Julyaug_data


In [70]:
import json

# Read the content of the first JSON file (May-June data)
with open('Newark_mayjune_data.json', 'r') as file:
    mayjune_data = json.load(file)
    print("May-June data length:", len(mayjune_data["results"]))

# Read the content of the second JSON file (July-August data)
with open('Newark_Julyaug_data.json', 'r') as file:
    julyaug_data = json.load(file)
    print("July-August data length:", len(julyaug_data["results"]))

# Combine "results" data from both JSON files into a single list
combined_results = mayjune_data["results"] + julyaug_data["results"]

# Construct a new dictionary with combined "results"
combined_data = {
    "meta": {
        "source": "Newark_mayjune_data and Newark_Julyaug_data"
    },
    "results": combined_results
}

# Write combined data to a JSON file
with open('NEWARK_data.json', 'w') as file:
    json.dump(combined_data, file)

print("Combined JSON file 'NEWARK_data.json' created successfully.")


May-June data length: 35000
July-August data length: 20000
Combined JSON file 'NEWARK_data.json' created successfully.


In [71]:
####    Task: Transform, filter and store the OpenAQ data as CSV


import pandas as pd
import json

# Load the JSON data
with open('NEWARK_data.json') as f:
    data = json.load(f)

# Normalize the 'results' part of the JSON data
results = data['results']
df = pd.json_normalize(results)

# Convert the date field to datetime64 if it exists
if 'date.local' in df.columns:
    df['local_time'] = pd.to_datetime(df['date.local'])

# Check if the necessary columns are present in the DataFrame
expected_columns = ['location', 'parameter', 'value', 'local_time']
missing_columns = [col for col in expected_columns if col not in df.columns]

# If any columns are missing, print a message and handle the issue accordingly
if missing_columns:
    print(f"Missing columns: {missing_columns}")
else:
    # Extract latitude and longitude from the nested 'coordinates' fields
    df['sensor_lat'] = df['coordinates.latitude']
    df['sensor_lon'] = df['coordinates.longitude']

    # Reduce the DataFrame to include only specified fields
    df = df[expected_columns + ['sensor_lat', 'sensor_lon']]

    # Store it to a file called: 20230606_detroit_downtown_7_5km_aq.csv
    filename = 'NEWARK_7.5km_data.csv'
    df.to_csv(filename, index=False)
    print(f"Data stored in '{filename}' successfully.")


Data stored in 'NEWARK_7.5km_data.csv' successfully.


In [74]:
import pandas as pd

# Load data from both datasets into DataFrames
Newark_df = pd.read_csv('NEWARK_7.5km_data.csv')
detroit_df = pd.read_csv('20230606_detroit_downtown_7_5km_aq.csv')

# Filter data for June 6, 2023
Newark_june6_df = Newark_df.loc[Newark_df['local_time'].str.startswith("2023-06-06")]
detroit_june6_df = detroit_df.loc[detroit_df['local_time'].str.startswith("2023-06-06")]

# Calculate sensor density for Newark International Airport
Newark_sensor_density = len(Newark_june6_df)

# Calculate intensity of PM2.5 for Newark International Airport
Newark_pm25_intensity = Newark_june6_df['value'].mean()

# Calculate sensor density for Detroit
detroit_sensor_density = len(detroit_june6_df)

# Calculate intensity of PM2.5 for Detroit
detroit_pm25_intensity = detroit_june6_df['value'].mean()

print("Sensor Density for Newark International Airport:", Newark_sensor_density)
print("Intensity of PM2.5 for Newark International Airport:", Newark_pm25_intensity)
print("Sensor Density for Detroit:", detroit_sensor_density)
print("Intensity of PM2.5 for Detroit:", detroit_pm25_intensity)


Sensor Density for Newark International Airport: 453
Intensity of PM2.5 for Newark International Airport: 54.55607064017661
Sensor Density for Detroit: 5186
Intensity of PM2.5 for Detroit: 72.8891245661396



Comment:

On June 6, 2023, both Newark International Airport and Detroit had active air quality monitoring systems.
However, Detroit has higher sensor density, 5186 sensors compared to Newark's 453.
This stark difference suggests Detroit's more comprehensive coverage in monitoring air quality within its vicinity.
Despite Newark's lower sensor density, it experienced a notable intensity of PM2.5 pollution, recording 54.56 µg/m³. Conversely, Detroit faced a significantly higher intensity of PM2.5 pollution at 72.89 µg/m³. 
These disparities underscore the varying levels of air quality monitoring infrastructure and pollution levels between the two locations, 