In [17]:
import pandas as pd
import joblib
import numpy as np


In [18]:
model = joblib.load('aqi_predictor_model_ridge.pkl')

In [20]:
training_data = pd.read_csv('merged_forecast_aqi.csv')

In [21]:
training_data.drop(columns=['time', 'date'], errors='ignore', inplace=True)

In [22]:
X_train = training_data.drop(columns=['us_aqi (USAQI)'])
feature_columns = X_train.columns
feature_means = X_train.mean()

In [23]:
print("Expected features by model:", list(model.feature_names_in_))
print("Available features from training data:", list(feature_columns))

Expected features by model: ['temperature_2m (°C)', 'relative_humidity_2m (%)', 'dew_point_2m (°C)', 'apparent_temperature (°C)', 'precipitation (mm)', 'rain (mm)', 'snowfall (cm)', 'snow_depth (m)', 'weather_code (wmo code)', 'pressure_msl (hPa)', 'surface_pressure (hPa)', 'cloud_cover (%)', 'cloud_cover_low (%)', 'cloud_cover_mid (%)', 'cloud_cover_high (%)', 'et0_fao_evapotranspiration (mm)', 'vapour_pressure_deficit (kPa)', 'wind_speed_10m (km/h)', 'wind_speed_100m (km/h)', 'wind_direction_10m (°)', 'wind_direction_100m (°)', 'wind_gusts_10m (km/h)', 'soil_temperature_0_to_7cm (°C)', 'soil_temperature_7_to_28cm (°C)', 'soil_temperature_28_to_100cm (°C)', 'soil_temperature_100_to_255cm (°C)', 'soil_moisture_0_to_7cm (m³/m³)', 'soil_moisture_7_to_28cm (m³/m³)', 'soil_moisture_28_to_100cm (m³/m³)', 'soil_moisture_100_to_255cm (m³/m³)', 'prev_us_aqi']
Available features from training data: ['temperature_2m (°C)', 'relative_humidity_2m (%)', 'dew_point_2m (°C)', 'apparent_temperature (°

In [24]:
new_forecast = {
    'temperature_2m_max (°C)': 30.0,
    'temperature_2m_min (°C)': 20.0,
    'wind_speed_10m_mean (km/h)': 12.0,
    'prev_us_aqi': 105
    # Many features are missing and will be filled with means
}

In [25]:
input_df = pd.DataFrame([new_forecast])

In [26]:
for col in feature_columns:
    if col not in input_df.columns:
        input_df[col] = np.nan  # Insert NaN for missing features


In [27]:
input_df = input_df[feature_columns]

In [28]:
input_df = input_df.fillna(feature_means)

In [29]:
 # Step 8: Verify the input shape and columns
print(f"\nInput shape: {input_df.shape}")
print(f"Model expects {len(model.feature_names_in_)} features")
print(f"Input has {len(input_df.columns)} features")


Input shape: (1, 31)
Model expects 31 features
Input has 31 features


In [30]:
# Check if all required features are present
missing_features = set(model.feature_names_in_) - set(input_df.columns)
extra_features = set(input_df.columns) - set(model.feature_names_in_)

if missing_features:
    print(f"Missing features: {missing_features}")
if extra_features:
    print(f"Extra features that will be removed: {extra_features}")
    # Remove extra features
    input_df = input_df[model.feature_names_in_]


In [31]:
try:
    predicted_aqi = model.predict(input_df)[0]
    print(f"\n🌫️ Predicted AQI: {predicted_aqi:.2f}")
    
    # Interpret the AQI value
    if predicted_aqi <= 50:
        quality = "Good (Green)"
    elif predicted_aqi <= 100:
        quality = "Moderate (Yellow)"
    elif predicted_aqi <= 150:
        quality = "Unhealthy for Sensitive Groups (Orange)"
    elif predicted_aqi <= 200:
        quality = "Unhealthy (Red)"
    elif predicted_aqi <= 300:
        quality = "Very Unhealthy (Purple)"
    else:
        quality = "Hazardous (Maroon)"
    
    print(f"Air Quality: {quality}")
    
except Exception as e:
    print(f"Error making prediction: {e}")



🌫️ Predicted AQI: 104.83
Air Quality: Unhealthy for Sensitive Groups (Orange)
