In [1]:
#import pandas to read the csv file and import drive to connect to the csv file
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')

#establish the file directory in gdrive and the file name
filedir = '/content/drive/MyDrive/fwe458/datasets/'
fname = filedir + "MaunaLoaCO2_monthly_sitename.csv"

#use pandas to read in the csv file
df = pd.read_csv(fname)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
#show the first few rows of the dataframe using head()
df.head()

Unnamed: 0,Year,Month,Decimal_date,Monthly_average,deseasonalized,numofdays,stdofdays,site
0,1958,3,1958.2027,315.7,314.43,-1,-9.99,MaunaLoa
1,1958,4,1958.2877,317.45,315.16,-1,-9.99,MaunaLoa
2,1958,5,1958.3699,317.51,314.71,-1,-9.99,MaunaLoa
3,1958,6,1958.4548,317.24,315.14,-1,-9.99,MaunaLoa
4,1958,7,1958.537,315.86,315.18,-1,-9.99,MaunaLoa


In [10]:
#the initial dataframe is replaced by a dataframe that excludes rows with 1958 or 2022 as their value in the 'Year' column
df = df.loc[(df["Year"] != 1958) & (df["Year"] != 2022)]

#the dataframe is then further filtered to exclude rows where "numofdays" and/or "stdofdays" have negative values
df = df.loc[(df["numofdays"] >= 0)]
df = df.loc[(df["stdofdays"] >= 0)]

#show the first and last 5 rows of the dataframe
print(df.head())
print(df.tail())

     Year  Month  Decimal_date  Monthly_average  deseasonalized  numofdays  \
194  1974      5     1974.3750           333.19          330.22         13   
195  1974      6     1974.4583           332.20          329.78         25   
196  1974      7     1974.5417           331.07          330.21         24   
197  1974      8     1974.6250           329.15          330.54         26   
198  1974      9     1974.7083           327.33          330.44         22   

     stdofdays      site CO2_Category  
194       0.31  MaunaLoa          Low  
195       0.37  MaunaLoa          Low  
196       0.24  MaunaLoa          Low  
197       0.31  MaunaLoa          Low  
198       0.47  MaunaLoa          Low  
     Year  Month  Decimal_date  Monthly_average  deseasonalized  numofdays  \
761  2021      8     2021.6250           414.47          416.49         26   
762  2021      9     2021.7083           413.30          416.90         27   
763  2021     10     2021.7917           413.93          

In [4]:
#the rows are grouped by year, then the mean of "Monthly_average" is taken
annual_average = df.groupby('Year')['Monthly_average'].mean()
annual_average.head()

Unnamed: 0_level_0,Monthly_average
Year,Unnamed: 1_level_1
1974,329.76375
1975,331.160909
1976,332.026667
1977,333.843333
1978,335.415


In [5]:
#the rows are grouped by year, then the mean of "deseasonalized" is taken
deseasonalized_annual_average = df.groupby('Year')['deseasonalized'].mean()
deseasonalized_annual_average.head()

Unnamed: 0_level_0,deseasonalized
Year,Unnamed: 1_level_1
1974,330.34625
1975,331.072727
1976,332.028333
1977,333.841667
1978,335.413333


In [6]:
#the rows are grouped by year, then the standard deviation of "Monthly_average" is taken
std_of_months = df.groupby("Year")["Monthly_average"].std()
std_of_months.head()

Unnamed: 0_level_0,Monthly_average
Year,Unnamed: 1_level_1
1974,2.203789
1975,1.943751
1976,2.08561
1977,1.931882
1978,1.995014


In [7]:
#this function classifies the CO2 level as 'low', 'medium', or 'high'
def co2_level(ppm):
    #if ppm is less than 350, 'Low' is returned
    if ppm < 350:
        return 'Low'
    #if ppm was not less than 350, this next conditional statement is evaluated
    #if ppm is less than 400, "Moderate" is returned
    elif ppm < 400:
        return 'Moderate'
    #if neither of the previous conditional statements were met, "High" is returned
    else:
        return 'High'

In [8]:
#the previously defined function co2_level is applied to each value in the column 'Monthly_average'
#the returned string is then stored in a column called 'CO2_Category' which is added to the dataframe
df['CO2_Category'] = df['Monthly_average'].apply(co2_level)

df.head()

Unnamed: 0,Year,Month,Decimal_date,Monthly_average,deseasonalized,numofdays,stdofdays,site,CO2_Category
194,1974,5,1974.375,333.19,330.22,13,0.31,MaunaLoa,Low
195,1974,6,1974.4583,332.2,329.78,25,0.37,MaunaLoa,Low
196,1974,7,1974.5417,331.07,330.21,24,0.24,MaunaLoa,Low
197,1974,8,1974.625,329.15,330.54,26,0.31,MaunaLoa,Low
198,1974,9,1974.7083,327.33,330.44,22,0.47,MaunaLoa,Low


In [9]:
#save the dataframe as a csv file in a specific location in gdrive
df.to_csv("/content/drive/MyDrive/fwe458/Kenton_MaunaLoaCO2_annual_Hw3.csv")