# Data Visualization with Matplotlib

## The following codes were used to visualize our data using Matplotlib before using ParaView.

In [2]:
import matplotlib.pyplot as plt
import pandas as pd

# Load your dataset here
air_quality_df = pd.read_csv('Air_Quality_V12.csv')

# Identifying top locations for each specified pollutant
def identify_top_locations_for_pollutants(df, pollutants, top_n=3):
    top_locations = {}
    for pollutant in pollutants:
        pollutant_df = df[df['Name'] == pollutant]
        top_cities = pollutant_df.groupby('Geo Place Name')['Data Value'].max().sort_values(ascending=False).head(top_n).index.tolist()
        top_locations[pollutant] = top_cities
    return top_locations

# Function to plot and save the changes in pollutant levels over time for the top locations
def plot_and_save_pollutant_trends(df, pollutant, locations, save_path):
    plt.figure(figsize=(12, 6))

    for location in locations:
        location_data = df[(df['Name'] == pollutant) & (df['Geo Place Name'] == location)]
        plt.plot(location_data['Start_Date'], location_data['Data Value'], label=location, marker='o')

    plt.title(f'{pollutant} Levels Over Time in Top Locations')
    plt.xlabel('Time')
    plt.ylabel('Data Value')
    plt.xticks(rotation=45)
    plt.legend(title='Location')
    plt.grid(True)
    plt.tight_layout()

    plt.savefig(save_path, format='png', dpi=300)
    plt.close()

# List of pollutants you're interested in
pollutants = ["Sulfur Dioxide (SO2)", "Nitrogen Dioxide (NO2)", "Fine Particulate Matter (PM2.5)", "Ozone (O3)"]

# Identify top locations for each pollutant
top_locations_for_pollutants = identify_top_locations_for_pollutants(air_quality_df, pollutants)

# Creating and saving plots for each pollutant
for pollutant, locations in top_locations_for_pollutants.items():
    save_path = f'{pollutant}_trends.png'
    plot_and_save_pollutant_trends(air_quality_df, pollutant, locations, save_path)
