<a href="https://colab.research.google.com/github/kavinraam/Rail-Index-Prediction-Model/blob/main/Model_y.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pandas scikit-learn xgboost lightgbm --quiet

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [None]:
df = pd.read_csv("/content/drive/My Drive/datasets/training_data.csv")

In [None]:
X = df[["SECCODE", "LINECODE", "KMFROM", "BLOCKNO", "PARAM", "RI1", "GMT"]]
y = df["RI2"]

In [None]:
df = df.dropna(subset=["SECCODE", "LINECODE", "KMFROM", "BLOCKNO", "PARAM", "RI1", "GMT", "RI2"])
X = df[["SECCODE", "LINECODE", "KMFROM", "BLOCKNO", "PARAM", "RI1", "GMT"]]
y = df["RI2"]

In [None]:
X_encoded = pd.get_dummies(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y, test_size=0.3, random_state=42
)

In [None]:
rf_params = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10]
}

xgb_params = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2]
}

lgb_params = {
    'n_estimators': [50, 100, 200],
    'max_depth': [-1, 10, 20],
    'learning_rate': [0.01, 0.1, 0.2]
}

In [None]:
rf_search = RandomizedSearchCV(
    RandomForestRegressor(random_state=42),
    rf_params,
    n_iter=10,
    cv=3,
    scoring='r2',
    random_state=42,
    n_jobs=-1
)
rf_search.fit(X_train, y_train)

xgb_search = RandomizedSearchCV(
    XGBRegressor(random_state=42, verbosity=0),
    xgb_params,
    n_iter=10,
    cv=3,
    scoring='r2',
    random_state=42,
    n_jobs=-1
)
xgb_search.fit(X_train, y_train)

lgb_search = RandomizedSearchCV(
    LGBMRegressor(random_state=42),
    lgb_params,
    n_iter=10,
    cv=3,
    scoring='r2',
    random_state=42,
    n_jobs=-1
)
lgb_search.fit(X_train, y_train)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.018331 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 901
[LightGBM] [Info] Number of data points in the train set: 283049, number of used features: 6
[LightGBM] [Info] Start training from score 2.344881


In [None]:
def evaluate_model(name, model, X_test, y_test):
    preds = model.predict(X_test)
    print(f"\n{name} Performance:")
    print(f"R² Score: {r2_score(y_test, preds):.4f}")
    print(f"MAE     : {mean_absolute_error(y_test, preds):.4f}")
    print(f"MSE     : {mean_squared_error(y_test, preds):.4f}")
    print(f"RMSE    : {np.sqrt(mean_squared_error(y_test, preds)):.4f}")


In [None]:
evaluate_model("Random Forest", rf_search.best_estimator_, X_test, y_test)
evaluate_model("XGBoost", xgb_search.best_estimator_, X_test, y_test)
evaluate_model("LightGBM", lgb_search.best_estimator_, X_test, y_test)


Random Forest Performance:
R² Score: 0.5457
MAE     : 0.0912
MSE     : 0.0254
RMSE    : 0.1594

XGBoost Performance:
R² Score: 0.5059
MAE     : 0.1013
MSE     : 0.0276
RMSE    : 0.1663

LightGBM Performance:
R² Score: 0.4684
MAE     : 0.1063
MSE     : 0.0297
RMSE    : 0.1725
