In [1]:
# Import 3rd party libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
import re

# Configure Notebook
%matplotlib inline
plt.style.use('fivethirtyeight')
sns.set_context("notebook")
import warnings
warnings.filterwarnings('ignore')

In [5]:
# Load in Farmington Community Hall AQ Monitor data into a pd df
# Note that the second row contains the header names in the csv
year1 = pd.read_csv('2019_aq_data.csv', header=[1])
#year2 = pd.read_csv(r'C:\Users\efarr\OneDrive\Desktop\UofT_Classes\2022_Winter_CIV1498\Project 2\2020_aq_data.csv', header=[1])
#year3 = pd.read_csv(r'C:\Users\efarr\OneDrive\Desktop\UofT_Classes\2022_Winter_CIV1498\Project 2\2021_aq_data.csv', header=[1])

year1.tail()

FileNotFoundError: [Errno 2] File b'2019_aq_data.csv' does not exist: b'2019_aq_data.csv'

In [None]:
# Drop the last 8 rows that contain data summaries *** note to check if it is always the last 8 rows for other files
year1 = year1[:-8]
year2 = year2[:-8]
year3 = year3[:-8]

In [None]:
# Drop first row (contains units)
year1 = year1.iloc[1:]
year2 = year2.iloc[1:]
year3 = year3.iloc[1:]

In [None]:
# Combine 3 years
FarmingtonCommunityHall_AQ = pd.concat([year1, year2, year3], ignore_index=True)
FarmingtonCommunityHall_AQ.tail()

In [None]:
# Designate data types
FarmingtonCommunityHall_AQ["Date"] = FarmingtonCommunityHall_AQ["Date"].astype(str)
FarmingtonCommunityHall_AQ["Time"] = FarmingtonCommunityHall_AQ["Time"].astype(str)
FarmingtonCommunityHall_AQ[["NOx", "SO2", "NO", "NO2", "TRS", "O3"]] = FarmingtonCommunityHall_AQ[["NOx", "SO2", "NO", "NO2", "TRS", "O3"]].apply(pd.to_numeric)

In [None]:
# Turn the Date and Time columns into a DateTimeIndex
format = '%m/%d/%Y %I:%M %p'
FarmingtonCommunityHall_AQ['Date'] = FarmingtonCommunityHall_AQ[['Date', 'Time']].agg(' '.join, axis =1)
FarmingtonCommunityHall_AQ['Date'] = FarmingtonCommunityHall_AQ['Date'].str.replace('24:00', '12:00')
FarmingtonCommunityHall_AQ['Datetime'] = pd.to_datetime(FarmingtonCommunityHall_AQ['Date'].astype("string"), format=format)
FarmingtonCommunityHall_AQ = FarmingtonCommunityHall_AQ.set_index(pd.DatetimeIndex(FarmingtonCommunityHall_AQ['Datetime']))
FarmingtonCommunityHall_AQ.drop(['Time','Date', 'Datetime'], axis=1, inplace=True)

# View the df
#print(FarmingtonCommunityHall_AQ.info())
FarmingtonCommunityHall_AQ.head(24)

In [None]:
# Linear interpolation for missing values
FarmingtonCommunityHall_AQ = FarmingtonCommunityHall_AQ.interpolate(method='linear', axis = 0)
FarmingtonCommunityHall_AQ.head()

In [None]:
# Check if there are any missing values
FarmingtonCommunityHall_AQ.isna().sum()

In [None]:
# Calculate the mean of all pollutants over a 24 period
FarmingtonCommunityHall_AQ_Daily = FarmingtonCommunityHall_AQ.resample('D').mean()
FarmingtonCommunityHall_AQ_Daily.head()

In [None]:
# Calculate the mean of all pollutants over a 24 period
FarmingtonCommunityHall_AQ_Monthly = FarmingtonCommunityHall_AQ.resample('M').mean()
FarmingtonCommunityHall_AQ_Monthly.head()

In [None]:
plt.figure(figsize=(10,5))
plt.plot(FarmingtonCommunityHall_AQ_Daily.index, FarmingtonCommunityHall_AQ_Daily['O3'], 'gray', label = 'Daily Mean')
plt.plot(FarmingtonCommunityHall_AQ_Monthly.index, FarmingtonCommunityHall_AQ_Monthly['O3'], 'r', label = 'Monthly Mean')

plt.ylabel('O3 Concentration (ppb)')
plt.xlabel('Date', fontsize=14)

plt.legend()
plt.title('Daily Ozone (ppb) Concentration', fontsize=16)
plt.show()

In [None]:
plt.figure(figsize=(10,5))

plt.plot(FarmingtonCommunityHall_AQ_Daily.index, FarmingtonCommunityHall_AQ_Daily['NOx'], 'gray', label = 'Daily Mean')
plt.plot(FarmingtonCommunityHall_AQ_Monthly.index, FarmingtonCommunityHall_AQ_Monthly['NOx'], 'r', label = 'Monthly Mean')

plt.xlabel('Date', fontsize=14)
plt.ylabel('NOx (ppb)')


plt.title('Daily NOx Concentration', fontsize=16)
plt.show()

In [None]:
plt.figure(figsize=(10,5))

plt.plot(FarmingtonCommunityHall_AQ_Daily.index, FarmingtonCommunityHall_AQ_Daily['SO2'], 'gray', label = 'Daily Mean')
plt.plot(FarmingtonCommunityHall_AQ_Monthly.index, FarmingtonCommunityHall_AQ_Monthly['SO2'], 'r', label = 'Monthly Mean')

plt.xlabel('Date', fontsize=14)
plt.ylabel('SO2 (ppb)')


plt.title('Daily SO2 Concentration', fontsize=16)
plt.show()

In [None]:
plt.figure(figsize=(10,5))

plt.plot(FarmingtonCommunityHall_AQ_Daily.index, FarmingtonCommunityHall_AQ_Daily['NO'], 'gray', label = 'Daily Mean')
plt.plot(FarmingtonCommunityHall_AQ_Monthly.index, FarmingtonCommunityHall_AQ_Monthly['NO'], 'r', label = 'Monthly Mean')

plt.xlabel('Date', fontsize=14)
plt.ylabel('NO (ppb)')


plt.title('Daily NO Concentration', fontsize=16)
plt.show()

In [None]:
plt.figure(figsize=(10,5))

plt.plot(FarmingtonCommunityHall_AQ_Daily.index, FarmingtonCommunityHall_AQ_Daily['NO2'], 'gray', label = 'Daily Mean')
plt.plot(FarmingtonCommunityHall_AQ_Monthly.index, FarmingtonCommunityHall_AQ_Monthly['NO2'], 'r', label = 'Monthly Mean')


plt.xlabel('Date', fontsize=14)
plt.ylabel('NO2 (ppb)')


plt.title('Daily NO2 Concentration', fontsize=16)
plt.show()

In [None]:
plt.figure(figsize=(10,5))

plt.plot(FarmingtonCommunityHall_AQ_Daily.index, FarmingtonCommunityHall_AQ_Daily['TRS'], 'gray', label = 'Daily Mean')
plt.plot(FarmingtonCommunityHall_AQ_Monthly.index, FarmingtonCommunityHall_AQ_Monthly['TRS'], 'r', label = 'Monthly Mean')


plt.xlabel('Date', fontsize=14)
plt.ylabel('TRS (ppb)')


plt.title('Daily TRS Concentration', fontsize=16)
plt.show()

In [None]:
# Drop O3 from dataframe for boxplot
no_O3 = FarmingtonCommunityHall_AQ.drop(['O3'], axis=1)
O3 = FarmingtonCommunityHall_AQ[['O3']]
O3.head()

In [None]:
# Distribution of concentrations over 3 year period
sns.boxplot(x="variable", y="value", data=pd.melt(no_O3),
           showfliers = False)

In [None]:
# Distribution for O3 over 3 year period
sns.boxplot(x="variable", y="value", data=pd.melt(no_O3),
           showfliers = False)