# 🌿 Environment Monitoring & Pollution Control — Simple Classification Project
Predict air quality category (Good/Moderate/Unhealthy) from sensor readings.

This notebook is **Colab-ready**. Follow the steps below.

## 1) Setup
If you're running in **Google Colab**, run the cell below and then upload the dataset file `env_pollution_sample.csv` when prompted.

In [None]:

import sys, os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report, ConfusionMatrixDisplay
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt

# If in Colab, use file upload; otherwise try local path.
try:
    from google.colab import files  # type: ignore
    IN_COLAB = True
except Exception:
    IN_COLAB = False

if IN_COLAB:
    uploaded = files.upload()  # Upload env_pollution_sample.csv
    csv_name = next(iter(uploaded))
else:
    # If running locally, ensure the CSV is in the same folder
    csv_name = "env_pollution_sample.csv"

print("Using CSV:", csv_name)
df = pd.read_csv(csv_name)
df.head()


## 2) Explore the data

In [None]:

print("Shape:", df.shape)
print("Columns:", df.columns.tolist())

print("\nBasic statistics:")
display(df.describe(include='all'))

print("\nClass distribution:")
print(df['AQI_Class'].value_counts())


## 3) Train a simple classifier

In [None]:

X = df.drop(columns=["AQI_Class"])
y = df["AQI_Class"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=7, stratify=y
)

pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("rf", RandomForestClassifier(n_estimators=200, random_state=7))
])

pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print("Test Accuracy:", round(acc, 4))

print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

ConfusionMatrixDisplay.from_estimator(pipe, X_test, y_test)
plt.title("Confusion Matrix - AQI Classifier")
plt.tight_layout()
plt.show()


## 4) Try your own inputs

In [None]:

sample = X_test.iloc[:5].copy()
print("Sample inputs:")
display(sample)

print("Predictions:")
print(pipe.predict(sample))
