In [1]:
#Import dependancies
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as st
import numpy as np
%matplotlib inline

csv_path = "aqi v2 data.csv"
aqi_ca20 = pd.read_csv(csv_path)
aqi_ca20

Unnamed: 0,Date,2020 AQI Value,Main Pollutant,Site Name,Site ID,Source,20-year High (1980-2019),20-year Low (1980-2019),5-year Median (2015-2019),New Date
0,1/1/2020,102,PM2.5,Long Beach-Route 710 Near Road,06-037-4008,AirNow,180,52,145,1-Jan
1,1/2/2020,66,PM2.5,Long Beach-Route 710 Near Road,06-037-4008,AirNow,155,37,93,2-Jan
2,1/3/2020,87,PM2.5,Long Beach-Route 710 Near Road,06-037-4008,AirNow,110,24,100,3-Jan
3,1/4/2020,99,PM2.5,Long Beach-Route 710 Near Road,06-037-4008,AirNow,118,47,85,4-Jan
4,1/5/2020,96,PM2.5,Long Beach-Route 710 Near Road,06-037-4008,AirNow,102,39,57,5-Jan
...,...,...,...,...,...,...,...,...,...,...
109,4/20/2020,43,Ozone,Pasadena,06-037-2005,AirNow,119,42,67,20-Apr
110,4/21/2020,48,PM2.5,Glendora,06-037-0016,AirNow,179,43,87,21-Apr
111,4/22/2020,101,Ozone,Glendora,06-037-0016,AirNow,161,43,61,22-Apr
112,4/23/2020,140,Ozone,Glendora,06-037-0016,AirNow,122,45,69,


In [2]:
#Reviewing column names
aqi_ca20.columns

Index(['Date', '2020 AQI Value', 'Main Pollutant', 'Site Name', 'Site ID',
       'Source', '20-year High (1980-2019)', '20-year Low (1980-2019)',
       '5-year Median (2015-2019)', 'New Date'],
      dtype='object')

In [3]:
#Dropping columns that are not needed
aqi_ca20 = aqi_ca20.drop(['Site ID','Source'],axis=1)

In [4]:
#Reviewing datatypes for yearly values
aqi_ca20.dtypes

Date                         object
2020 AQI Value                int64
Main Pollutant               object
Site Name                    object
20-year High (1980-2019)      int64
20-year Low (1980-2019)       int64
5-year Median (2015-2019)     int64
New Date                     object
dtype: object

In [5]:
#Changing 5-year Median datatype to integer. Received error. 
aqi_ca20 = aqi_ca20[('5-year Median (2015-2019)')].astype(int)

In [6]:
#After requesting assistance, was guided in utilizing the below try/except to locate the culprit: "."
for item in list(aqi_ca20['5-year Median (2015-2019)']):
    try:
        int(item)
    except:
        print(f"Could not converted to int. Here is the number{item}")

KeyError: '5-year Median (2015-2019)'

In [None]:
#Obliterate that pesky outlier.
#aqi_ca20.loc[aqi_ca20['5-year Median (2015-2019)'] == ".",:]
#tmp = aqi_ca20.drop(index=59)

In [None]:
#Confirm obliteration.
tmp.loc[tmp['5-year Median (2015-2019)'] == ".",:]

In [None]:
#Resume my previous quest to change the datatype. Success. 
aqi_ca20.dtypes

In [None]:
#Create groupbys to utilize in dataframe. 
main_pollutant_groupy = aqi_ca20.groupby(["Main Pollutant"])

total_pollutant = main_pollutant_groupy["Main Pollutant"].count()
avg_2020 = main_pollutant_groupy["2020 AQI Value"].mean().map("{:.0f}".format)
highest_in_20Y = main_pollutant_groupy["20-year High (1980-2019)"].max()
lowest_in_20Y = main_pollutant_groupy["20-year Low (1980-2019)"].min()

highest_in_5Y = main_pollutant_groupy['5-year Median (2015-2019)'].max()
lowest_in_5Y = main_pollutant_groupy['5-year Median (2015-2019)'].min()

#Creating DataFrame
pollutants = pd.DataFrame({
                                "Pollutant Totals in 2020": total_pollutant,
                                "Average in 2020": avg_2020,
                                "Highest in 20 Years": highest_in_20Y,
                                "Lowest in 20 Years": lowest_in_20Y,
                                "Highest in 5 Years": highest_in_5Y,
                                "Lowest in 5 Years": lowest_in_5Y})

pollutantsants

In [None]:
aqi_ca20.describe()

In [None]:
#Print statements

print(f"The total Air Quality Index (AQI)/day caused by Ozone in 2020 is {pollutants.iat[0,0]}")
print(f"The total Air Quality Index (AQI)/day caused by PM2.5 in 2020 is {pollutants.iat[1,0]}")
print(f"---------------------------------------------------------------")
print(f"The average Air Quality Index (AQI) caused by Ozone in 2020 is {pollutants.iat[0,1]}")
print(f"The average Air Quality Index (AQI) caused by PM2.5 in 2020 is {pollutants.iat[1,1]}")
print(f"---------------------------------------------------------------")
print(f"The highest Air Quality Index (AQI) caused by Ozone in the last 20 years is {pollutants.iat[0,2]}")
print(f"The highest Air Quality Index (AQI) caused by PM2.5 in the last 20 years is {pollutants.iat[1,2]}")
print(f"---------------------------------------------------------------")
print(f"The lowest Air Quality Index (AQI) caused by Ozone in the last 20 years is {pollutants.iat[0,3]}")
print(f"The lowest Air Quality Index (AQI) caused by PM2.5 in the last 20 years is {pollutants.iat[1,3]}")
print(f"---------------------------------------------------------------")
print(f"The highest Air Quality Index (AQI) caused by Ozone in the last 5 years is {pollutants.iat[0,4]}")
print(f"The highest Air Quality Index (AQI) caused by PM2.5 in the last 5 years is {pollutants.iat[1,4]}")
print(f"---------------------------------------------------------------")
print(f"The lowest Air Quality Index (AQI) caused by Ozone in the last 5 years is {pollutants.iat[0,5]}")
print(f"The lowest Air Quality Index (AQI) caused by PM2.5 in the last 5 years is {pollutants.iat[1,5]}")

In [None]:
#2020 Current Pollutation Level Line Plot
aqi_ca20.plot(kind= "line", x='Date', y='2020 AQI Value', marker = "*", color = "red", \
                figsize=(20,5), title= "2020 CA Air Quality Index (AQI)")
plt.ylabel("2020 AQI Value")
plt.savefig("2020_CA_AQI Values.png")
plt.show()

In [None]:
#1980-2019 Highest Pollution Level Line Plot
aqi_ca20.plot(kind= "line",x= 'New Date', y='20-year High (1980-2019)', marker = "*", color = "red", \
                figsize=(20,8),use_index=True, title= "1980-2019 CA Highest Air Quality Index (AQI)")
plt.ylabel("AQI Value")
plt.show()

In [None]:
#1980-2019 Lowest Pollution Level Line Plot
aqi_ca20.plot(kind= "line", x='New Date', y='20-year Low (1980-2019)', marker = "*", color = "red", \
                figsize=(20,8), title= "1980-2019 CA Lowest Air Quality Index (AQI)")
plt.ylabel("AQI Value")
plt.show()

In [None]:
#Last Five Years Average Pollution Level
aqi_ca20.plot(kind= "line", x='New Date', y='5-year Median (2015-2019)',marker = "*", color = "red", \
                figsize=(20,7), title= "Last 5 Years Average AQI Level (CA)")
plt.ylabel("AQI Value")
plt.savefig("CA_AQI_Last Five Years.png")
plt.show()