<a href="https://colab.research.google.com/github/santro5048/Air-quality/blob/main/Coding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score

# Sample synthetic data generation function
def generate_sample_data(n_samples=1000, random_state=42):
    np.random.seed(random_state)
    # Features: concentrations of pollutants (PM2.5, PM10, NO2, CO, O3), temperature, humidity
    data = pd.DataFrame({
        'PM2.5': np.random.uniform(5, 150, n_samples),   # micrograms/m3
        'PM10': np.random.uniform(10, 200, n_samples),   # micrograms/m3
        'NO2': np.random.uniform(5, 100, n_samples),     # ppb
        'CO': np.random.uniform(0.2, 2.0, n_samples),    # ppm
        'O3': np.random.uniform(10, 100, n_samples),     # ppb
        'Temperature': np.random.uniform(10, 35, n_samples), # Celsius
        'Humidity': np.random.uniform(20, 90, n_samples)     # percent
    })
    # Simplistic target: AQI estimation using weighted sum + noise
    data['AQI'] = (0.4*data['PM2.5'] + 0.3*data['PM10'] + 0.1*data['NO2'] +
                   10*data['CO'] + 0.1*data['O3'] +
                   0.05*data['Temperature'] - 0.02*data['Humidity'] +
                   np.random.normal(0, 5, n_samples))
    data['AQI'] = data['AQI'].clip(0, 300)  # AQI values capped between 0 and 300
    return data

# Function to classify AQI level
def classify_aqi(aqi):
    if aqi <= 50:
        return "Good"
    elif aqi <= 100:
        return "Moderate"
    elif aqi <= 150:
        return "Unhealthy for Sensitive Groups"
    elif aqi <= 200:
        return "Unhealthy"
    elif aqi <= 300:
        return "Very Unhealthy"
    else:
        return "Hazardous"

def main():
    print("Generating sample air quality data...")
    data = generate_sample_data()

    features = ['PM2.5', 'PM10', 'NO2', 'CO', 'O3', 'Temperature', 'Humidity']
    target = 'AQI'

    X = data[features]
    y = data[target]

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    print("Training Random Forest model to predict AQI...")
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    print("Predicting test data AQI values...")
    y_pred = model.predict(X_test)

    # Evaluate performance
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"Mean Absolute Error: {mae:.2f}")
    print(f"R^2 Score: {r2:.2f}")

    # Provide environmental insights on a small sample
    sample_preds = pd.DataFrame({
        'Predicted_AQI': y_pred,
        'AQI_Level': [classify_aqi(aqi) for aqi in y_pred]
    })

    print("\nSample predictions with air quality classification:")
    print(sample_preds.head(10).to_string(index=False))

    print("\nInsights:")
    counts = sample_preds['AQI_Level'].value_counts()
    for level in ['Good', 'Moderate', 'Unhealthy for Sensitive Groups', 'Unhealthy', 'Very Unhealthy', 'Hazardous']:
        count = counts.get(level, 0)
        if count > 0:
            print(f"- {count} of 10 sample predictions fall into the '{level}' category.")

    print("\nThis ML model can help environmental agencies forecast air quality levels"
          " to issue warnings and take measures to reduce pollution exposure.")

if __name__ == "__main__":
    main()

Generating sample air quality data...
Training Random Forest model to predict AQI...
Predicting test data AQI values...
Mean Absolute Error: 5.12
R^2 Score: 0.93

Sample predictions with air quality classification:
 Predicted_AQI                      AQI_Level
    101.456191 Unhealthy for Sensitive Groups
     89.024495                       Moderate
    110.770847 Unhealthy for Sensitive Groups
     83.457898                       Moderate
     85.065920                       Moderate
     90.306034                       Moderate
     54.760979                       Moderate
     85.865508                       Moderate
     79.739970                       Moderate
    108.478062 Unhealthy for Sensitive Groups

Insights:
- 26 of 10 sample predictions fall into the 'Good' category.
- 130 of 10 sample predictions fall into the 'Moderate' category.
- 44 of 10 sample predictions fall into the 'Unhealthy for Sensitive Groups' category.

