In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
pd.set_option('display.max_rows', None)

In [2]:
# Function to filter and aggregate pollution data
def filter_and_average_pollution_data(pollution):
    # Filter by parameters and states
    pollution_filtered = pollution[
        pollution['State Name'].isin(['New Mexico'])
    ]

    # # Filter by Permian Basin counties
    # pollution_county = pollution_filtered[
    #     pollution_filtered['County Name'].isin(permian_basin_counties)
    # ]

    # Select relevant columns
    county_pollution = pollution_filtered[[
        'State Name',
        'County Name', 
        'Parameter Name', 
        'Units of Measure', 
        '75th Percentile'
    ]]

    # Group by relevant columns and calculate the average of the 75th Percentile
    averaged_pollution = county_pollution.groupby(
        ['State Name', 'County Name', 'Parameter Name', 'Units of Measure'],
        as_index=False
    ).agg({'75th Percentile': 'mean'})
    
    return averaged_pollution

In [4]:
pollution_2000 = pd.read_csv("./data/annual_conc_by_monitor_2000.csv",low_memory=False)
x = filter_and_average_pollution_data(pollution_2000)
x

Unnamed: 0,State Name,County Name,Parameter Name,Units of Measure,75th Percentile
0,New Mexico,Bernalillo,Ambient Max Temperature,Degrees Centigrade,33.2
1,New Mexico,Bernalillo,Ambient Min Temperature,Degrees Centigrade,14.6
2,New Mexico,Bernalillo,Average Ambient Pressure,Millimeters (mercury),641.0
3,New Mexico,Bernalillo,Average Ambient Temperature,Degrees Centigrade,24.3
4,New Mexico,Bernalillo,Carbon monoxide,Parts per million,1.05
5,New Mexico,Bernalillo,Elapsed Sample Time,Minutes,1440.0
6,New Mexico,Bernalillo,Nitrogen dioxide (NO2),Parts per billion,34.5
7,New Mexico,Bernalillo,Ozone,Parts per million,0.057143
8,New Mexico,Bernalillo,PM10 - LC,Micrograms/cubic meter (LC),32.875
9,New Mexico,Bernalillo,PM10 Total 0-10um STP,Micrograms/cubic meter (25 C),34.857143


In [6]:
ozone = x[(x['Parameter Name']=='Ozone')]
ozone
pm_2_5 = x[(x['Parameter Name']=='PM2.5 - Local Conditions')]
pm_2_5
NO2 = x[(x['Parameter Name']=='Nitrogen dioxide (NO2)')]
NO2

Unnamed: 0,State Name,County Name,Parameter Name,Units of Measure,75th Percentile
6,New Mexico,Bernalillo,Nitrogen dioxide (NO2),Parts per billion,34.5
138,New Mexico,Dona Ana,Nitrogen dioxide (NO2),Parts per billion,16.625
150,New Mexico,Eddy,Nitrogen dioxide (NO2),Parts per billion,12.25
284,New Mexico,San Juan,Nitrogen dioxide (NO2),Parts per billion,21.75
297,New Mexico,Sandoval,Nitrogen dioxide (NO2),Parts per billion,23.0


In [8]:
pollution_2000 = pd.read_csv(r"./data/annual_conc_by_monitor_2000.csv",low_memory=False)
pollution_2001 = pd.read_csv(r"./data/annual_conc_by_monitor_2001.csv",low_memory=False)
pollution_2002 = pd.read_csv(r"./data/annual_conc_by_monitor_2002.csv",low_memory=False)
pollution_2003 = pd.read_csv(r"./data/annual_conc_by_monitor_2003.csv",low_memory=False)
pollution_2004 = pd.read_csv(r"./data/annual_conc_by_monitor_2004.csv",low_memory=False)
pollution_2005 = pd.read_csv(r"./data/annual_conc_by_monitor_2005.csv",low_memory=False)
pollution_2006 = pd.read_csv(r"./data/annual_conc_by_monitor_2006.csv",low_memory=False)
pollution_2007 = pd.read_csv(r"./data/annual_conc_by_monitor_2007.csv",low_memory=False)
pollution_2008 = pd.read_csv(r"./data/annual_conc_by_monitor_2008.csv",low_memory=False)
pollution_2009 = pd.read_csv(r"./data/annual_conc_by_monitor_2009.csv",low_memory=False)

In [2]:
import pandas as pd

# Function to process each year's dataset and extract the average values of the parameters
def extract_avg_pollution_data(year):
    # Define the correct file path for the given year
    file_path = r"./data/annual_conc_by_monitor_" + str(year) + ".csv"
    
    try:
        # Load the dataset for the given year
        data = pd.read_csv(file_path, low_memory=False)
        
        # Filter the data for the specified parameters
        ozone_data = data[data['Parameter Name'] == 'Ozone']
        
        # Filter for PM2.5 data where "PM2.5" occurs anywhere in the 'Parameter Name'
        pm25_data = data[data['Parameter Name'].str.contains('PM2.5', case=False, na=False)]
        
        # Filter for NO2 data
        no2_data = data[data['Parameter Name'] == 'Nitrogen dioxide (NO2)']

        # Filter for SO2 data
        SO2_data = data[data['Parameter Name'] == 'Sulfur dioxide'] 	

        
        # Calculate the average for each parameter
        avg_ozone = ozone_data['75th Percentile'].mean()
        avg_pm25 = pm25_data['75th Percentile'].mean()
        avg_no2 = no2_data['75th Percentile'].mean()
        avg_SO2 = SO2_data['75th Percentile'].mean()
  
        # Return the results as a dictionary
        return {
            'Year': year,
            'Ozone': avg_ozone,
            'PM2.5': avg_pm25,
            'Nitrogen dioxide (NO2)': avg_no2,
            'Sulphur dioxide (SO2)': avg_SO2
        }
    
    except FileNotFoundError:
        print(f"File for year {year} not found: {file_path}")
        return None

# Initialize an empty list to store results
result_data = []

# Process data for all years from 1990 to 2000
for year in range(2000, 2010):
    result = extract_avg_pollution_data(year)
    if result:  # Only append if the result is valid (file found and processed)
        result_data.append(result)

# Create a DataFrame from the result data
result_df = pd.DataFrame(result_data)

# Save the results to a new CSV file
output_file = 'average_pollution_2000_to_2009.csv'
np_data = result_df.to_numpy()
print(np_data)
result_df.to_csv(output_file, index=False)

# Display the resulting DataFrame by printing it
print(result_df)


[[2.00000000e+03 5.73701271e-02 1.03857931e+01 2.76721239e+01
  8.88917940e+00]
 [2.00100000e+03 5.78386640e-02 7.84976405e+00 2.74883047e+01
  8.34517312e+00]
 [2.00200000e+03 5.95713748e-02 7.15607588e+00 2.63748908e+01
  7.57184305e+00]
 [2.00300000e+03 5.70080321e-02 5.83638309e+00 2.52690265e+01
  7.60047089e+00]
 [2.00400000e+03 5.39388935e-02 5.73644730e+00 2.35680828e+01
  7.23862844e+00]
 [2.00500000e+03 5.77952149e-02 6.07994238e+00 2.39489955e+01
  7.14059244e+00]
 [2.00600000e+03 5.68845065e-02 5.75730556e+00 2.29469178e+01
  6.37173873e+00]
 [2.00700000e+03 5.73987921e-02 6.88428250e+00 2.20985126e+01
  6.03620102e+00]
 [2.00800000e+03 5.52106780e-02 5.47465062e+00 2.13231039e+01
  5.99026621e+00]
 [2.00900000e+03 5.15104670e-02 4.76454682e+00 1.94442029e+01
  5.10104214e+00]]
   Year     Ozone      PM2.5  Nitrogen dioxide (NO2)  Sulphur dioxide (SO2)
0  2000  0.057370  10.385793               27.672124               8.889179
1  2001  0.057839   7.849764               27.4