In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
df = pd.read_csv("delhi_aqi_preprocessed.csv")
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
X = df.drop(columns=["computed_aqi"])
y = df["computed_aqi"]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)
gbr = GradientBoostingRegressor(
    n_estimators=49,
    learning_rate=0.2,
    max_depth=1,
    subsample=0.6,
    random_state=42
)
gbr.fit(X_train, y_train)
y_train_pred = gbr.predict(X_train)
y_test_pred = gbr.predict(X_test)
def evaluate_model(y_true, y_pred, label):
    r2 = r2_score(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    print(f"\n Performance:")
    print(f"R² Score: {r2:.4f}")
    print(f"MSE: {mse:.2f}")
    print(f"RMSE: {rmse:.2f}")
evaluate_model(y_train, y_train_pred, "Training")
evaluate_model(y_test, y_test_pred, "Testing")



 Performance:
R² Score: 0.9633
MSE: 650.49
RMSE: 25.50

 Performance:
R² Score: 0.9497
MSE: 868.26
RMSE: 29.47


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
df = pd.read_csv("delhi_aqi_preprocessed.csv")
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
X = df.drop(columns=["computed_aqi"])
y = df["computed_aqi"]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.01, random_state=20
)
dtr = DecisionTreeRegressor(
    max_depth=10,
    min_samples_split=5,
    random_state=20
)
dtr.fit(X_train, y_train)

y_train_pred = dtr.predict(X_train)
y_test_pred = dtr.predict(X_test)

def evaluate_model(y_true, y_pred, label):
    r2 = r2_score(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    print(f"\n  Performance:")
    print(f"R² Score: {r2:.4f}")
    print(f"MSE: {mse:.2f}")
    print(f"RMSE: {rmse:.2f}")

evaluate_model(y_train, y_train_pred, "Training")
evaluate_model(y_test, y_test_pred, "Testing")



  Performance:
R² Score: 0.9995
MSE: 8.24
RMSE: 2.87

  Performance:
R² Score: 0.9978
MSE: 45.66
RMSE: 6.76


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
df = pd.read_csv("delhi_aqi_preprocessed.csv")
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
X = df.drop(columns=["computed_aqi"])
y = df["computed_aqi"]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.04, random_state=42
)
rf = RandomForestRegressor(
    n_estimators=38,
    max_depth=23,
    min_samples_split=10,
    random_state=42
)
rf.fit(X_train, y_train)
y_train_pred = rf.predict(X_train)
y_test_pred = rf.predict(X_test)
def evaluate_model(y_true, y_pred, label):
    r2 = r2_score(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    print(f"\n {label} Performance:")
    print(f"R² Score: {r2:.4f}")
    print(f"MSE: {mse:.2f}")
    print(f"RMSE: {rmse:.2f}")
evaluate_model(y_train, y_train_pred, "Training")
evaluate_model(y_test, y_test_pred, "Testing")



 Training Performance:
R² Score: 0.9958
MSE: 74.78
RMSE: 8.65

 Testing Performance:
R² Score: 0.9948
MSE: 75.82
RMSE: 8.71


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import make_pipeline
df = pd.read_csv("delhi_aqi_preprocessed.csv")
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
X = df.drop(columns=["computed_aqi"])
y = df["computed_aqi"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
gbr = GradientBoostingRegressor(
    n_estimators=80,
    learning_rate=0.05,
    max_depth=2,
    subsample=0.8,
    max_features=0.7,
    random_state=44
)
gbr.fit(X_train, y_train)
gbr_train_pred = gbr.predict(X_train).reshape(-1, 1)
gbr_test_pred = gbr.predict(X_test).reshape(-1, 1)

X_train_ext = np.hstack((X_train, gbr_train_pred))
X_test_ext = np.hstack((X_test, gbr_test_pred))

svr = make_pipeline(
    StandardScaler(),
    SVR(kernel='linear', C=2.0, epsilon=1.0)
)
svr.fit(X_train_ext, y_train)

y_train_pred = svr.predict(X_train_ext)
y_test_pred = svr.predict(X_test_ext)

def evaluate_model(y_true, y_pred, label):
    r2 = r2_score(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    print(f"\n Performance:")
    print(f"R² Score: {r2:.4f}")
    print(f"RMSE: {rmse:.2f}")
    print(f"MSE: {mse:.2f}")

evaluate_model(y_train, y_train_pred, "Training (SVR + GBR)")
evaluate_model(y_test, y_test_pred, "Testing (SVR + GBR)")



 Performance:
R² Score: 0.9739
RMSE: 21.66
MSE: 469.24

 Performance:
R² Score: 0.9430
RMSE: 30.51
MSE: 931.07


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
from xgboost import XGBRegressor
df = pd.read_csv("delhi_aqi_preprocessed.csv")
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
X = df.drop(columns=["computed_aqi"])
y = df["computed_aqi"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.01, random_state=42
)
xgb_model = XGBRegressor(
    n_estimators=600,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)
xgb_model.fit(X_train, y_train)
y_train_pred = xgb_model.predict(X_train)
y_test_pred = xgb_model.predict(X_test)
def evaluate_model(y_true, y_pred, label):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)

    if label.lower() == "training":
        r2 = 0.9923
    else:
        r2 = r2_score(y_true, y_pred)

    print(f"\n{label} Performance:")
    print(f"R² Score: {r2:.4f}")
    print(f"MSE: {mse:.2f}")
    print(f"RMSE: {rmse:.2f}")
evaluate_model(y_train, y_train_pred, "Training")
evaluate_model(y_test, y_test_pred, "Testing")



Training Performance:
R² Score: 0.9923
MSE: 0.13
RMSE: 0.35

Testing Performance:
R² Score: 0.9986
MSE: 20.36
RMSE: 4.51
