In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd, datetime
import numpy as np
import requests
import time
from scipy.stats import linregress
from ydata_profiling import ProfileReport
from pathlib import Path


# Import the OpenWeatherMap API key
from api_keys import airpollution_api_key 


# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy

airpollution_api_key = airpollution_api_key or "YOUR_API_KEY"


In [None]:
# Load the CSV file into a Pandas DataFrame
ntn_metadata = pd.read_csv("../Resources/cleaned-ntn-metadata.csv")
ntn_all = pd.read_csv("../Resources/cleaned-NTN-ALL-a-s-ueq.csv")

# Combine the data into a single dataset on the 'siteID' column
ntn_data = pd.merge(ntn_metadata, ntn_all, how="left", on="siteID")
ntn_data.head()

In [None]:
# Review data types
ntn_data.info()

In [None]:
# Describe the data
ntn_data.describe()

In [None]:
# Check for missing values
ntn_data.isnull().sum()

In [None]:
# Rename columns latitude and longitude to capitalize the first letter
ntn_data.rename(columns={"latitude": "Latitude", "longitude": "Longitude"}, inplace=True)
ntn_data.head()

In [None]:
# Check for duplicates in the data
duplicates = ntn_data.duplicated(subset=["Latitude", "Longitude"])
duplicates.sum()

In [None]:
# Run the Profiling Report using ydata_profiling
profile = ProfileReport(ntn_data,title = "Acid Rain Profile Report")
profile.to_notebook_iframe()

In [None]:
# create the html file for the acid_rain profiling report
profile.to_file(Path("../acid_rain_profile_report.html"))

In [None]:
# create the json file for the acid_rain profiling report
profile.to_file(Path("../acid_rain_profile_report.json"))

In [None]:
# view the latitude and longitude datatypes to ensure they are numeric
ntn_data[["Latitude", "Longitude"]].dtypes

In [None]:


# Create an empty list to store the weather data from the OpenWeatherMap API
acid_rain_air_pollution = []

for i in ntn_data.index:
    # Get the latitude and longitude
    lat = ntn_data.loc[i, "Latitude"]
    lon = ntn_data.loc[i, "Longitude"]
    
    # Use the OpenWeatherMap API to get the weather data
    url = f"http://api.openweathermap.org/data/2.5/air_pollution?lat={lat}&lon={lon}&appid={airpollution_api_key}"
    try:
        acid_rain_ap = requests.get(url).json()
        
        # Check if response contains expected fields
        if "coord" in acid_rain_ap and "list" in acid_rain_ap:
            coordinates = acid_rain_ap["coord"]
            components = acid_rain_ap['list'][0]["components"]
            acid_rain_air_pollution.append([
                coordinates['lat'], coordinates['lon'],
                components['co'], components['no'],
                components['no2'], components['o3'],
                components['so2'], components['pm2_5'],
                components['pm10'], components['nh3']
            ])
        else:
            print(f"No data for index {i}")
    except Exception as e:
        print(f"Error fetching data for index {i}: {e}")

acid_rain_air_pollution


In [None]:
# create a dataframe from the list of weather data
columns = ["Latitude", "Longitude", "CO", "NO", "NO2", "O3", "SO2", "PM2.5", "PM10", "NH3"]
rain_air_pollution_df = pd.DataFrame(acid_rain_air_pollution, columns=columns)   
rain_air_pollution_df.head()

In [None]:
# review the data types
rain_air_pollution_df.info()

In [None]:
# merge the acid rain data with the air pollution data
acid_rain_ap_df = pd.concat([ntn_data, rain_air_pollution_df], axis=1)
acid_rain_ap_df.head()

In [None]:
# save acid_rain_ap_df to a csv file to use for the sql database
acid_rain_ap_df.to_csv("../acid_rain_cleaned_data/acid_rain_air_pollution.csv", index=False)