In [38]:
# Import Data 

import pandas as pd # load the dataset into a pandas dataframe

file_path = 'MaunaLoaCO2_monthly_sitename.csv' # defining the data 
data = pd.read_csv(file_path) # reading the data

print(data.head()) # display the first few rows 

   Year  Month  Decimal_date  Monthly_average  deseasonalized  numofdays  \
0  1958      3     1958.2027           315.70          314.43         -1   
1  1958      4     1958.2877           317.45          315.16         -1   
2  1958      5     1958.3699           317.51          314.71         -1   
3  1958      6     1958.4548           317.24          315.14         -1   
4  1958      7     1958.5370           315.86          315.18         -1   

   stdofdays      site  
0      -9.99  MaunaLoa  
1      -9.99  MaunaLoa  
2      -9.99  MaunaLoa  
3      -9.99  MaunaLoa  
4      -9.99  MaunaLoa  


In [39]:
# Filter Data (exclude data from the years 1958 and 2022)

filtered_data = data[(data['Year'] != 1958) & (data['Year'] != 2022)] # != used as not equal to 
print(filtered_data)

     Year  Month  Decimal_date  Monthly_average  deseasonalized  numofdays  \
10   1959      1     1959.0411           315.58          315.55         -1   
11   1959      2     1959.1260           316.48          315.86         -1   
12   1959      3     1959.2027           316.65          315.38         -1   
13   1959      4     1959.2877           317.72          315.41         -1   
14   1959      5     1959.3699           318.29          315.49         -1   
..    ...    ...           ...              ...             ...        ...   
761  2021      8     2021.6250           414.47          416.49         26   
762  2021      9     2021.7083           413.30          416.90         27   
763  2021     10     2021.7917           413.93          417.30         29   
764  2021     11     2021.8750           415.01          417.07         30   
765  2021     12     2021.9583           416.71          417.44         28   

     stdofdays      site  
10       -9.99  MaunaLoa  
11       

In [40]:
# Filter Data (remove rows where "numofdays" or "stdofdays" contain invalid (negative) values)

filtered_data = filtered_data[(filtered_data['numofdays'] >= 0) & (filtered_data['stdofdays'] >= 0)] # >= used to filter out negative values
print(filtered_data)

     Year  Month  Decimal_date  Monthly_average  deseasonalized  numofdays  \
194  1974      5     1974.3750           333.19          330.22         13   
195  1974      6     1974.4583           332.20          329.78         25   
196  1974      7     1974.5417           331.07          330.21         24   
197  1974      8     1974.6250           329.15          330.54         26   
198  1974      9     1974.7083           327.33          330.44         22   
..    ...    ...           ...              ...             ...        ...   
761  2021      8     2021.6250           414.47          416.49         26   
762  2021      9     2021.7083           413.30          416.90         27   
763  2021     10     2021.7917           413.93          417.30         29   
764  2021     11     2021.8750           415.01          417.07         30   
765  2021     12     2021.9583           416.71          417.44         28   

     stdofdays      site  
194       0.31  MaunaLoa  
195      

In [58]:
# Compute Annual Statistics (annual average CO₂ concentration)

annual_average = filtered_data.groupby('Year')['Monthly_average'].mean().reset_index(name = 'annual_average') # filtering data, grouped by year, and using monthly average column to calculate the mean
print(annual_average.head()) # saving the new annual_average dataframe

   Year  annual_average
0  1974      329.763750
1  1975      331.160909
2  1976      332.026667
3  1977      333.843333
4  1978      335.415000


In [56]:
# Compute Annual Statistics (annual average deseasonalized CO₂ concentration)

deseasonalized_annual_average = filtered_data.groupby('Year')['deseasonalized'].mean().reset_index(name = 'deseasonalized_annual_average')
print(deseasonalized_annual_average.head()) #same description as above but for deseasonalized column

   Year  deseasonalized_annual_average
0  1974                     330.346250
1  1975                     331.072727
2  1976                     332.028333
3  1977                     333.841667
4  1978                     335.413333


In [54]:
# Compute Annual Statistics (standard deviation of the 12-month values)

std_of_months = filtered_data.groupby('Year')['Monthly_average'].std().reset_index(name = 'std_of_months')
print(std_of_months.head()) # same description as above but for monthly_average column and for standard deviation instead of mean

   Year  std_of_months
0  1974       2.203789
1  1975       1.943751
2  1976       2.085610
3  1977       1.931882
4  1978       1.995014


In [68]:
# Apply Custom Function 

def CO2_category(CO2_level): # defining a custom function as CO2_category 
    if CO2_level < 350: # if CO2 leve is less than 350, new column will return output as "Low"
        return 'Low'
    elif 350 >= CO2_level >= 400: # if CO2 leve is between 350 and 400, new column will return output as "Moderate"
        return 'Moderate'
    else: # if CO2 leve is more than 400, new column will return output as "High"
        return 'High' 

annual_stats = annual_average.merge(deseasonalized_annual_average, on = 'Year').merge(std_of_months, on = 'Year') # merges all individual dataframes into one name annual_stats
annual_stats['CO2_Category'] = annual_stats['annual_average'].apply(CO2_category) # takes the 'annual_average' column from annual_stats dataframe and applies the defined function 'CO2_category' to it and then stores it as a new column called 'CO2_Category'

print(annual_stats.head()) # the new column called 'CO2_Category' is saved within the 'annual_stats' dataframe 

   Year  annual_average  deseasonalized_annual_average  std_of_months  \
0  1974      329.763750                     330.346250       2.203789   
1  1975      331.160909                     331.072727       1.943751   
2  1976      332.026667                     332.028333       2.085610   
3  1977      333.843333                     333.841667       1.931882   
4  1978      335.415000                     335.413333       1.995014   

  CO2_Category  
0          Low  
1          Low  
2          Low  
3          Low  
4          Low  


In [72]:
output_file = 'Feyereisen_MaunaLoaCO2_annual_Hw3.csv'
annual_stats.to_csv(output_file, index=False)