In [None]:
import pandas as pd

In [None]:
df=pd.read_csv("air_quality.csv")
df.head()

In [None]:
df.describe()

In [None]:
df["ts"] = pd.to_numeric(df["ts"])

In [None]:
cols = ["mq", "temp", "hum", "ts"]
df[cols] = df[cols].apply(pd.to_numeric)

In [None]:
df["gas_norm"] = df["mq"] / (df["temp"] * df["hum"] + 1)
# Rolling statistics (last 10 samples)
WINDOW = 10
df["rolling_mean_10"] = df["mq"].rolling(WINDOW).mean()
df["rolling_std_10"]  = df["mq"].rolling(WINDOW).std()
# Rate of change / derivative
df["gas_diff"] = df["mq"].diff()
df["gas_diff_norm"] = df["gas_diff"] / (df["mq"].shift(1) + 1e-5)
#  Humidity adjusted index
df["hum_adjusted_gas"] = df["mq"] * (1 + df["hum"] / 100)
#  Interaction feature
df["temp_hum"] = df["temp"] * df["hum"]
df["temp_gas"] = df["temp"] * df["mq"]
df["hum_gas"]  = df["hum"] * df["mq"]

In [None]:
df = df.dropna().reset_index(drop=True)

In [None]:
df.to_csv("air_quality_features.csv", index=False)

In [None]:
plt.figure()
plt.plot(df.index, df["mq"])
plt.plot(df.index, df["hum"])
plt.plot(df.index, df["temp"])
plt.xlabel("Sample Index")
plt.ylabel("Value")
plt.title("Raw Sensor Signals Over Time")
plt.legend(["MQ Gas", "Humidity", "Temperature"])
plt.show()

In [None]:
plt.figure()
plt.plot(df.index, df["gas_norm"])
plt.plot(df.index, df["rolling_mean_10"])
plt.plot(df.index, df["rolling_std_10"])
plt.xlabel("Sample Index")
plt.ylabel("Value")
plt.title("Gas Normalization & Rolling Statistics")
plt.legend(["Gas Norm", "Rolling Mean (10)", "Rolling Std (10)"])
plt.show()

In [None]:
plt.figure()
plt.plot(df.index, df["gas_diff"])
plt.plot(df.index, df["gas_diff_norm"])
plt.xlabel("Sample Index")
plt.ylabel("Delta")
plt.title("Gas Change Rate")
plt.legend(["Gas Diff", "Gas Diff Norm"])
plt.show()

In [None]:
plt.figure()
plt.plot(df.index, df["hum_adjusted_gas"])
plt.plot(df.index, df["hum_gas"])
plt.plot(df.index, df["temp_gas"])
plt.xlabel("Sample Index")
plt.ylabel("Value")
plt.title("Engineered Gas Interaction Features")
plt.legend(["Hum Adjusted Gas", "Hum Ã— Gas", "Temp Ã— Gas"])
plt.show()


In [None]:
plt.figure()
corr = df.corr()
plt.imshow(corr)
plt.colorbar()
plt.xticks(range(len(corr.columns)), corr.columns, rotation=90)
plt.yticks(range(len(corr.columns)), corr.columns)
plt.title("Feature Correlation Heatmap")
plt.show()

In [None]:
FEATURES = [
    "gas_norm",
    "rolling_mean_10",
    "rolling_std_10",
    "gas_diff",
    "gas_diff_norm",
    "hum_adjusted_gas",
    "temp_hum",
    "temp_gas",
    "hum_gas"
]

In [None]:
DATA_PATH = "air_quality_features.csv"
MODEL_DIR = "models"
df = pd.read_csv(DATA_PATH)
X = df[FEATURES]

In [None]:
import numpy as np
import joblib
from sklearn.ensemble import IsolationForest
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix

In [None]:
print("\nðŸ”¹ Training Anomaly Detection Model...")
anomaly_model = IsolationForest(n_estimators=300,contamination=0.01,random_state=42)
anomaly_model.fit(X)
joblib.dump(anomaly_model, f"{MODEL_DIR}/anomaly_model.joblib")

In [None]:
def air_quality_label(mq):
    if mq < 220:
        return 0  # Good
    elif mq < 260:
        return 1  # Moderate
    elif mq < 300:
        return 2  # Poor
    else:
        return 3  # Hazardous
df["aq_label"] = df["mq"].apply(air_quality_label)
y = df["aq_label"]
X_train, X_val, y_train, y_val = train_test_split(X, y,test_size=0.2,random_state=42,stratify=y)
print("ðŸ”¹ Training AQ classifier (Decision Tree)...")
model = DecisionTreeClassifier(max_depth=5,min_samples_leaf=30,random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_val)
print("\nðŸ“Š Classification Report:")
print(classification_report(y_val, y_pred))
joblib.dump(model, f"{MODEL_DIR}/aq_classifier_tree.joblib")
joblib.dump(FEATURES, f"{MODEL_DIR}/features.joblib")