In [None]:
# Install missing libraries (if not already installed)
!pip install plotly seaborn --quiet

# Importing required libraries
import pandas as pd

# Ensure plots are displayed properly in Colab
%matplotlib inline

print("✅ Libraries successfully installed and imported!")

✅ Libraries successfully installed and imported!


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Scatter Plot

In [None]:
import pandas as pd

# Load the wildfire dataset
wildfire_df = pd.read_csv("expanded-usa-wildfire-dataset.csv")

# Extract year from the date column if available
if "DISCOVERY_DATE" in wildfire_df.columns:
    wildfire_df["Year"] = pd.to_datetime(wildfire_df["DISCOVERY_DATE"]).dt.year
elif "FIRE_YEAR" in wildfire_df.columns:
    wildfire_df["Year"] = wildfire_df["FIRE_YEAR"]
else:
    raise ValueError("No valid date column found in wildfire dataset.")

# Aggregate wildfire data by year (counting number of fires)
wildfire_summary = wildfire_df.groupby("Year").size().reset_index(name="Wildfire_Count")

# Load the AQI dataset
aqi_df = pd.read_csv("pollution_2000_2023.csv")

# Select AQI columns and compute the max AQI per year
aqi_columns = ["O3 AQI", "CO AQI", "SO2 AQI", "NO2 AQI"]

# Extract year if needed
if "Date" in aqi_df.columns:
    aqi_df["Year"] = pd.to_datetime(aqi_df["Date"]).dt.year
elif "Year" not in aqi_df.columns:
    raise ValueError("No valid date column found in AQI dataset.")

# Compute the max AQI for each year
aqi_summary = aqi_df.groupby("Year")[aqi_columns].max().reset_index()

# Create an "Overall AQI" column (max AQI from all pollutants)
aqi_summary["Overall_AQI"] = aqi_summary[aqi_columns].max(axis=1)

# Merge wildfire and AQI datasets
merged_df = pd.merge(wildfire_summary, aqi_summary[["Year", "Overall_AQI"]], on="Year", how="inner")

# Save the processed data to CSV
merged_df.to_csv("wildfire_vs_aqi.csv", index=False)

print("CSV file saved as wildfire_vs_aqi.csv")

CSV file saved as wildfire_vs_aqi.csv


Bubble Chart

In [None]:
import pandas as pd

# Load the wildfire dataset
wildfire_data_path = "/content/expanded-usa-wildfire-dataset.csv"
wildfire_df = pd.read_csv(wildfire_data_path)

# Define fire size classification
fire_size_classes = {
    "A": (0, 0.25),
    "B": (0.26, 9.9),
    "C": (10, 99.9),
    "D": (100, 299.9),
    "E": (300, 999.9),
    "F": (1000, 4999.9),
    "G": (5000, float("inf"))
}

# Function to classify fire size
def classify_fire_size(size):
    for category, (min_size, max_size) in fire_size_classes.items():
        if min_size <= size <= max_size:
            return category
    return "Unknown"

# Apply classification to the dataset
wildfire_df["Fire_Size_Class"] = wildfire_df["FIRE_SIZE"].apply(classify_fire_size)

# Count the number of wildfires per size class
fire_size_distribution = wildfire_df["Fire_Size_Class"].value_counts().reset_index()
fire_size_distribution.columns = ["Fire Size Class", "Wildfire Count"]

# Save cleaned data to a new CSV file
cleaned_file_path = "cleaned_wildfire_Bubble_data.csv"
wildfire_df.to_csv(cleaned_file_path, index=False)

print(f"Processed data saved successfully: {cleaned_file_path}")

Processed data saved successfully: cleaned_wildfire_Bubble_data.csv


In [None]:
import pandas as pd

# Load the AQI dataset
aqi_df = pd.read_csv("pollution_2000_2023.csv")

# Select relevant AQI columns
aqi_columns = ["O3 AQI", "CO AQI", "SO2 AQI", "NO2 AQI"]

# Create a new column for overall AQI (maximum of all pollutants)
aqi_df["Overall_AQI"] = aqi_df[aqi_columns].max(axis=1)

# Define AQI categories
aqi_categories = {
    "Good": (0, 50),
    "Moderate": (51, 100),
    "Unhealthy for Sensitive Groups": (101, 150),
    "Unhealthy": (151, 200),
    "Very Unhealthy": (201, 300),
    "Hazardous": (301, float("inf"))
}

# Function to classify AQI levels
def categorize_aqi(aqi_value):
    for category, (min_val, max_val) in aqi_categories.items():
        if min_val <= aqi_value <= max_val:
            return category
    return "Unknown"

# Apply AQI categorization
aqi_df["AQI Category"] = aqi_df["Overall_AQI"].apply(categorize_aqi)

# Check if population data is available
if "Population" in aqi_df.columns:
    # Group by AQI category and sum population exposure
    aqi_exposure = aqi_df.groupby("AQI Category")["Population"].sum().reset_index()
else:
    # Count occurrences of each AQI category as a proxy for exposure
    aqi_exposure = aqi_df["AQI Category"].value_counts().reset_index()
    aqi_exposure.columns = ["AQI Category", "Exposure Count"]

# Save processed data to CSV
aqi_exposure.to_csv("aqi_population_exposure.csv", index=False)

print("CSV file saved as aqi_population_exposure.csv")

CSV file saved as aqi_population_exposure.csv
