In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
df = pd.read_csv("delhi_aqi_preprocessed.csv")
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
X = df.drop(columns=["computed_aqi"])
y = df["computed_aqi"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
y = y.values

kf = KFold(n_splits=10, shuffle=True, random_state=42)
r2_scores, rmses, mses = [], [], []

fold = 1
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    gbr = GradientBoostingRegressor(
        n_estimators=49,
        learning_rate=0.2,
        max_depth=1,
        subsample=0.6,
        random_state=42
    )
    gbr.fit(X_train, y_train)

    y_pred = gbr.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)

    r2_scores.append(r2)
    mses.append(mse)
    rmses.append(rmse)

    print(f"\n Fold {fold} Performance:")
    print(f"R² Score: {r2:.4f}")
    print(f"MSE: {mse:.2f}")
    print(f"RMSE: {rmse:.2f}")
    fold += 1

print("\n Average 10-Fold Performance:")
print(f"Mean R² Score: {np.mean(r2_scores):.4f}")
print(f"Mean MSE: {np.mean(mses):.2f}")
print(f"Mean RMSE: {np.mean(rmses):.2f}")



 Fold 1 Performance:
R² Score: 0.9638
MSE: 566.96
RMSE: 23.81

 Fold 2 Performance:
R² Score: 0.9550
MSE: 846.70
RMSE: 29.10

 Fold 3 Performance:
R² Score: 0.9659
MSE: 620.40
RMSE: 24.91

 Fold 4 Performance:
R² Score: 0.9796
MSE: 363.16
RMSE: 19.06

 Fold 5 Performance:
R² Score: 0.9554
MSE: 650.74
RMSE: 25.51

 Fold 6 Performance:
R² Score: 0.9529
MSE: 885.90
RMSE: 29.76

 Fold 7 Performance:
R² Score: 0.9620
MSE: 670.00
RMSE: 25.88

 Fold 8 Performance:
R² Score: 0.9647
MSE: 636.02
RMSE: 25.22

 Fold 9 Performance:
R² Score: 0.9268
MSE: 1218.53
RMSE: 34.91

 Fold 10 Performance:
R² Score: 0.9640
MSE: 651.63
RMSE: 25.53

 Average 10-Fold Performance:
Mean R² Score: 0.9590
Mean MSE: 711.00
Mean RMSE: 26.37


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error


df = pd.read_csv("delhi_aqi_preprocessed.csv")
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
X = df.drop(columns=["computed_aqi"])
y = df["computed_aqi"]


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

kf = KFold(n_splits=10, shuffle=True, random_state=44)
r2_scores, rmses, mses = [], [], []

best_params_all_folds = []


param_grid = {
    'max_depth': [10, 20, 30, 50],
    'min_samples_split': [2, 4, 6],
    'min_samples_leaf': [1, 2, 4]
}

for fold, (train_idx, test_idx) in enumerate(kf.split(X_scaled), 1):
    X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    grid = GridSearchCV(
        estimator=DecisionTreeRegressor(random_state=42),
        param_grid=param_grid,
        cv=3,
        scoring='neg_mean_squared_error',
        n_jobs=-1
    )
    grid.fit(X_train, y_train)
    best_model = grid.best_estimator_
    best_params_all_folds.append(grid.best_params_)

    y_pred = best_model.predict(X_test)

    r2 = r2_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)

    r2_scores.append(r2)
    mses.append(mse)
    rmses.append(rmse)

    print(f"\n Fold {fold} Performance:")
    print(f"Best Params: {grid.best_params_}")
    print(f"R² Score: {r2:.4f}")
    print(f"MSE: {mse:.2f}")
    print(f"RMSE: {rmse:.2f}")

print("\n Average 10-Fold Tuned Decision Tree Performance:")
print(f"Mean R² Score: {np.mean(r2_scores):.4f}")
print(f"Mean MSE: {np.mean(mses):.2f}")
print(f"Mean RMSE: {np.mean(rmses):.2f}")





 Fold 1 Performance:
Best Params: {'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 4}
R² Score: 0.9946
MSE: 94.69
RMSE: 9.73

 Fold 2 Performance:
Best Params: {'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 2}
R² Score: 0.8803
MSE: 2280.54
RMSE: 47.75

 Fold 3 Performance:
Best Params: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 4}
R² Score: 0.9777
MSE: 342.82
RMSE: 18.52

 Fold 4 Performance:
Best Params: {'max_depth': 20, 'min_samples_leaf': 4, 'min_samples_split': 2}
R² Score: 0.9933
MSE: 122.90
RMSE: 11.09

 Fold 5 Performance:
Best Params: {'max_depth': 20, 'min_samples_leaf': 4, 'min_samples_split': 2}
R² Score: 0.9920
MSE: 133.59
RMSE: 11.56

 Fold 6 Performance:
Best Params: {'max_depth': 20, 'min_samples_leaf': 4, 'min_samples_split': 2}
R² Score: 0.9919
MSE: 157.66
RMSE: 12.56

 Fold 7 Performance:
Best Params: {'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 2}
R² Score: 0.9982
MSE: 30.45
RMSE: 5.52

 Fold 8 Perform

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error


df = pd.read_csv("delhi_aqi_preprocessed.csv")
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
X = df.drop(columns=["computed_aqi"])
y = df["computed_aqi"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

kf = KFold(n_splits=10, shuffle=True, random_state=42)


r2_scores, rmses, mses = [], [], []

for fold, (train_index, test_index) in enumerate(kf.split(X_scaled), 1):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    rf = RandomForestRegressor(
        n_estimators=38,
        max_depth=15,
        min_samples_split=10,
        random_state=42
    )
    rf.fit(X_train, y_train)
    y_pred = rf.predict(X_test)

    r2 = r2_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)


    r2_scores.append(r2)
    mses.append(mse)
    rmses.append(rmse)

    print(f"\n Fold {fold} Results:")
    print(f"R² Score: {r2:.4f}")
    print(f"MSE: {mse:.2f}")
    print(f"RMSE: {rmse:.2f}")


print("\n 10-Fold Cross-Validation Results (Random Forest):")
print(f"Average R² Score: {np.mean(r2_scores):.4f}")
print(f"Average MSE: {np.mean(mses):.2f}")
print(f"Average RMSE: {np.mean(rmses):.2f}")



 Fold 1 Results:
R² Score: 0.9813
MSE: 293.02
RMSE: 17.12

 Fold 2 Results:
R² Score: 0.9936
MSE: 120.19
RMSE: 10.96

 Fold 3 Results:
R² Score: 0.9953
MSE: 84.68
RMSE: 9.20

 Fold 4 Results:
R² Score: 0.9872
MSE: 227.02
RMSE: 15.07

 Fold 5 Results:
R² Score: 0.9881
MSE: 174.05
RMSE: 13.19

 Fold 6 Results:
R² Score: 0.9939
MSE: 115.51
RMSE: 10.75

 Fold 7 Results:
R² Score: 0.9788
MSE: 374.27
RMSE: 19.35

 Fold 8 Results:
R² Score: 0.9947
MSE: 95.96
RMSE: 9.80

 Fold 9 Results:
R² Score: 0.9226
MSE: 1288.08
RMSE: 35.89

 Fold 10 Results:
R² Score: 0.9901
MSE: 179.86
RMSE: 13.41

 10-Fold Cross-Validation Results (Random Forest):
Average R² Score: 0.9826
Average MSE: 295.26
Average RMSE: 15.47


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import make_pipeline


df = pd.read_csv("delhi_aqi_preprocessed.csv")
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
X = df.drop(columns=["computed_aqi"]).values
y = df["computed_aqi"].values

kf = KFold(n_splits=10, shuffle=True, random_state=42)

r2_scores, mses, rmses = [], [], []

for fold, (train_index, test_index) in enumerate(kf.split(X), 1):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]


    gbr = GradientBoostingRegressor(
        n_estimators=80,
        learning_rate=0.05,
        max_depth=2,
        subsample=0.8,
        max_features=0.7,
        random_state=44
    )
    gbr.fit(X_train, y_train)

    gbr_train_pred = gbr.predict(X_train).reshape(-1, 1)
    gbr_test_pred = gbr.predict(X_test).reshape(-1, 1)


    X_train_ext = np.hstack((X_train, gbr_train_pred))
    X_test_ext = np.hstack((X_test, gbr_test_pred))


    svr = make_pipeline(
        StandardScaler(),
        SVR(kernel='linear', C=2.0, epsilon=1.0)
    )
    svr.fit(X_train_ext, y_train)

    y_pred = svr.predict(X_test_ext)

    r2 = r2_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)

    r2_scores.append(r2)
    mses.append(mse)
    rmses.append(rmse)

    print(f"\n Fold {fold} Performance:")
    print(f"R² Score: {r2:.4f}")
    print(f"RMSE: {rmse:.2f}")
    print(f"MSE: {mse:.2f}")


print("\n Average 10-Fold Performance:")
print(f"Mean R² Score: {np.mean(r2_scores):.4f}")
print(f"Mean RMSE: {np.mean(rmses):.2f}")
print(f"Mean MSE: {np.mean(mses):.2f}")



 Fold 1 Performance:
R² Score: 0.9629
RMSE: 24.11
MSE: 581.31

 Fold 2 Performance:
R² Score: 0.9605
RMSE: 27.26
MSE: 743.37

 Fold 3 Performance:
R² Score: 0.9693
RMSE: 23.60
MSE: 556.99

 Fold 4 Performance:
R² Score: 0.9884
RMSE: 14.36
MSE: 206.17

 Fold 5 Performance:
R² Score: 0.9485
RMSE: 27.39
MSE: 750.28

 Fold 6 Performance:
R² Score: 0.9620
RMSE: 26.75
MSE: 715.79

 Fold 7 Performance:
R² Score: 0.9721
RMSE: 22.19
MSE: 492.22

 Fold 8 Performance:
R² Score: 0.9725
RMSE: 22.24
MSE: 494.79

 Fold 9 Performance:
R² Score: 0.9247
RMSE: 35.40
MSE: 1253.36

 Fold 10 Performance:
R² Score: 0.9761
RMSE: 20.81
MSE: 433.11

 Average 10-Fold Performance:
Mean R² Score: 0.9637
Mean RMSE: 24.41
Mean MSE: 622.74


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
from xgboost import XGBRegressor

df = pd.read_csv("delhi_aqi_preprocessed.csv")
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
X = df.drop(columns=["computed_aqi"]).values
y = df["computed_aqi"].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

kf = KFold(n_splits=10, shuffle=True, random_state=42)
r2_scores, mses, rmses = [], [], []

for fold, (train_idx, test_idx) in enumerate(kf.split(X_scaled), 1):
    X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    model = XGBRegressor(
        n_estimators=1500,
        learning_rate=0.008,
        max_depth=10,
        min_child_weight=2,
        subsample=0.9,
        colsample_bytree=0.9,
        gamma=0.1,
        reg_alpha=1.0,
        reg_lambda=1.0,
        random_state=fold,
        verbosity=0,
        tree_method='hist'
    )

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    r2 = r2_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)

    r2_scores.append(r2)
    mses.append(mse)
    rmses.append(rmse)

    print(f"\n Fold {fold} Performance:")
    print(f"R² Score: {r2:.4f}")
    print(f"MSE: {mse:.2f}")
    print(f"RMSE: {rmse:.2f}")

print("\n Average 10-Fold XGBoost Performance:")
print(f"Mean R² Score: {round(np.mean(r2_scores), 2):.2f}")
print(f"Mean MSE: {np.mean(mses):.2f}")
print(f"Mean RMSE: {np.mean(rmses):.2f}")



 Fold 1 Performance:
R² Score: 0.9836
MSE: 256.07
RMSE: 16.00

 Fold 2 Performance:
R² Score: 0.9980
MSE: 36.77
RMSE: 6.06

 Fold 3 Performance:
R² Score: 0.9770
MSE: 417.03
RMSE: 20.42

 Fold 4 Performance:
R² Score: 0.9958
MSE: 74.32
RMSE: 8.62

 Fold 5 Performance:
R² Score: 0.9762
MSE: 346.71
RMSE: 18.62

 Fold 6 Performance:
R² Score: 0.9894
MSE: 198.94
RMSE: 14.10

 Fold 7 Performance:
R² Score: 0.9923
MSE: 136.65
RMSE: 11.69

 Fold 8 Performance:
R² Score: 0.9975
MSE: 44.19
RMSE: 6.65

 Fold 9 Performance:
R² Score: 0.8835
MSE: 1939.07
RMSE: 44.03

 Fold 10 Performance:
R² Score: 0.9916
MSE: 152.47
RMSE: 12.35

 Average 10-Fold XGBoost Performance:
Mean R² Score: 0.98
Mean MSE: 360.22
Mean RMSE: 15.86
