In [13]:
import pandas as pd

def process_aqi_data(input_csv_path, output_csv_path):
    """
    Processes AQI data by calculating the monthly maximum and saves the result to a new CSV.
    
    Parameters:
    - input_csv_path: Path to the input CSV file containing the original AQI data.
    - output_csv_path: Path where the processed CSV file will be saved.
    """
    # Load the dataset
    df = pd.read_csv(input_csv_path)
    
    # Ensure the 'Year', 'Month', and 'Day' columns are in the correct format
    df['year'] = df['year'].astype(str)
    df['month'] = df['month'].astype(str).str.zfill(2)  # Ensure all months are two digits
    df['day'] = df['day'].astype(str).str.zfill(2)  # Ensure all days are two digits
    
    # Create a 'YearMonth' column for grouping
    df['YearMonth'] = df['year'] + '-' + df['month']
    
    # Group by the new 'YearMonth' column and calculate the max AQI
    monthly_aqi_max = df.groupby('YearMonth')['aqi'].max().reset_index()
    
    # Split 'YearMonth' back into separate 'Year' and 'Month' columns
    monthly_aqi_max[['Year', 'Month']] = monthly_aqi_max['YearMonth'].str.split('-', expand=True)
    
    # Drop the 'YearMonth' column as it's no longer needed
    monthly_aqi_max.drop(columns=['YearMonth'], inplace=True)
    
    # Reorder columns to 'Year', 'Month', 'AQI'
    monthly_aqi_max = monthly_aqi_max[['Year', 'Month', 'aqi']]
    
    # Save the processed data to a new CSV file
    monthly_aqi_max.to_csv(output_csv_path, index=False)
    
    print(f"Processed data has been saved to {output_csv_path}")


In [23]:
process_aqi_data('./csv/aqi_raw_Visalia.csv', './csv/aqi_cleaned_Visalia.csv')

Processed data has been saved to ./csv/aqi_cleaned_Visalia.csv
