In [2]:
import pandas as pd

# Load the combined AQI forecast file
df = pd.read_csv("combined_AQI_forecast.csv")

# Check that the column exists
if 'AQI_Forecast' not in df.columns:
    raise ValueError("The file does not contain an 'AQI_Forecast' column.")

# Display descriptive statistics for the AQI_Forecast column
aqi_stats = df['AQI_Forecast'].describe()
print("AQI_Forecast statistics:")
print(aqi_stats)

# Based on the distribution, let's choose a proper threshold.
# For example, using the 75th percentile might be reasonable if we consider the upper quartile as high risk.
reference_aqi = df['AQI_Forecast'].quantile(0.75)
print("Selected reference_aqi threshold (75th percentile):", reference_aqi)


AQI_Forecast statistics:
count    336.000000
mean      26.692124
std       10.039737
min        4.276920
25%       19.551462
50%       26.480328
75%       32.327350
max       45.845084
Name: AQI_Forecast, dtype: float64
Selected reference_aqi threshold (75th percentile): 32.32734960961196


In [3]:
import os
import pandas as pd

# List of file names to process
file_names = [
    "AQI_forecast_Vellore.csv",
    "AQI_forecast_Velachery.csv",
    "AQI_forecast_Tirupur.csv",
    "AQI_forecast_salem.csv",
    "AQI_forecast_Royapuram.csv",
    "AQI_forecast_Ramanathapuram.csv",
    "AQI_forecast_Perungudi.csv",
    "AQI_forecast_Manali.csv",
    "AQI_forecast_Ooty.csv",
    "AQI_forecast_Kodungaiyur.csv",
    "AQI_forecast_Alandur.csv",
    "AQI_forecast_Crescent_chengalpattu.csv"
]

# Define input and output directories
input_dir = "/content/"
output_dir = "/content/results/"

# Create the output directory if it does not exist
os.makedirs(output_dir, exist_ok=True)

# Iterate over each file
for file_name in file_names:
    file_path = os.path.join(input_dir, file_name)

    # Read the CSV file into a DataFrame
    try:
        df = pd.read_csv(file_path)
    except Exception as e:
        print(f"Error reading {file_name}: {e}")
        continue

    # Calculate the HRI: ratio of AQI to the reference value
    # Make sure the AQI column exists
    if 'AQI_Forecast' in df.columns:
        df['HRI'] = df['AQI_Forecast'] / reference_aqi
    else:
        print(f"'AQI_Forecast' column not found in {file_name}. Skipping.")
        continue

    # Construct the output file name by replacing '_aqi.csv' with '_HRI.csv'
    output_file_name = file_name.replace("AQI_", "HRI_")
    output_file_path = os.path.join(output_dir, output_file_name)

    # Save the updated DataFrame to the output file
    try:
        df.to_csv(output_file_path, index=False)
        print(f"Processed and saved: {output_file_path}")
    except Exception as e:
        print(f"Error saving {output_file_name}: {e}")


Processed and saved: /content/results/HRI_forecast_Vellore.csv
Processed and saved: /content/results/HRI_forecast_Velachery.csv
Processed and saved: /content/results/HRI_forecast_Tirupur.csv
Processed and saved: /content/results/HRI_forecast_salem.csv
Processed and saved: /content/results/HRI_forecast_Royapuram.csv
Processed and saved: /content/results/HRI_forecast_Ramanathapuram.csv
Processed and saved: /content/results/HRI_forecast_Perungudi.csv
Processed and saved: /content/results/HRI_forecast_Manali.csv
Processed and saved: /content/results/HRI_forecast_Ooty.csv
Processed and saved: /content/results/HRI_forecast_Kodungaiyur.csv
Processed and saved: /content/results/HRI_forecast_Alandur.csv
Processed and saved: /content/results/HRI_forecast_Crescent_chengalpattu.csv


In [4]:
import os
import pandas as pd

# List of file names to combine
file_names = [
    "HRI_forecast_Vellore.csv",
    "HRI_forecast_Velachery.csv",
    "HRI_forecast_Tirupur.csv",
    "HRI_forecast_salem.csv",
    "HRI_forecast_Royapuram.csv",
    "HRI_forecast_Ramanathapuram.csv",
    "HRI_forecast_Perungudi.csv",
    "HRI_forecast_Manali.csv",
    "HRI_forecast_Ooty.csv",
    "HRI_forecast_Kodungaiyur.csv",
    "HRI_forecast_Alandur.csv",
    "HRI_forecast_Crescent_chengalpattu.csv"
]

# List to hold DataFrames from each file
dfs = []

# Loop over each file, load it, and add a station name column
for file in file_names:
    file_path = os.path.join('/content', file)
    df = pd.read_csv(file_path)

    # Extract the station name using slicing
    prefix = "HRI_forecast_"
    suffix = ".csv"
    if file.startswith(prefix) and file.endswith(suffix):
        station_name = file[len(prefix):-len(suffix)]
    else:
        station_name = file  # fallback if file naming doesn't match

    df['Location'] = station_name
    dfs.append(df)

# Concatenate all DataFrames into a single DataFrame
combined_df = pd.concat(dfs, ignore_index=True)

# Save the combined DataFrame to a new CSV file in the /content/ directory
combined_csv_path = '/content/combined_HRI_forecast.csv'
combined_df.to_csv(combined_csv_path, index=False)

print(f"Combined CSV file created successfully at {combined_csv_path}")


Combined CSV file created successfully at /content/combined_HRI_forecast.csv


In [5]:
df = pd.read_csv("/content/combined_HRI_forecast.csv")

In [6]:
df['Location'].value_counts()

Unnamed: 0_level_0,count
Location,Unnamed: 1_level_1
Vellore,28
Velachery,28
Tirupur,28
salem,28
Royapuram,28
Ramanathapuram,28
Perungudi,28
Manali,28
Ooty,28
Kodungaiyur,28
