In [4]:
from sklearn.model_selection import train_test_split,cross_val_score,GridSearchCV
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor,BaggingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import root_mean_squared_error,r2_score,mean_absolute_percentage_error
from xgboost import XGBRegressor

from scripts.data_ingestion import DataIngestion
from scripts.data_transformation import DataTransformation

from helpers.config import load_config
from helpers.logger import logger


config = load_config()

data = DataIngestion(config).fetch_fred_data()


data.head(10)





Unnamed: 0,FEDFUNDS,date
0,0.8,1954-07-01
1,1.22,1954-08-01
2,1.07,1954-09-01
3,0.85,1954-10-01
4,0.83,1954-11-01
5,1.28,1954-12-01
6,1.39,1955-01-01
7,1.29,1955-02-01
8,1.35,1955-03-01
9,1.43,1955-04-01


### Data Transformation

In [5]:
train,test = DataTransformation(config).transform()


train.shape,test.shape

2025-12-05 14:52:23 - INFO - helpers.logger - 44 - shape of training data: (857, 1)
2025-12-05 14:52:23 - INFO - helpers.logger - 53 - Length of training data: 685
2025-12-05 14:52:23 - INFO - helpers.logger - 54 - Length of testing data: 172
2025-12-05 14:52:23 - INFO - helpers.logger - 73 - Shape of training data scaled: (685, 1)
2025-12-05 14:52:23 - INFO - helpers.logger - 74 - Shape of testing data scaled: (172, 1)


((685, 1), (172, 1))

In [11]:
# model trainer

class ModelTrainer:
    """Train models for MLFlow and hyper-parameter tuning."""
    
    def __init__(
        self,config: dict, data_transformation: DataTransformation | None = None,
        data_ingestion: DataIngestion | None = None
    ):
        """
        Initializing ModelTrainer class.
        
        Args:
            config (dict): Configuration file.
            data_transformation (DataTransformation):  A instance of the DataTransformation class.
        """
        
        self.config = config or load_config()
        self.data_transformation = data_transformation or DataTransformation(self.config)
        self.data_ingestion = data_ingestion or DataIngestion(self.config)  
        
        
        
    def load_models(self):
        """
        params and models loaded for GridSearchCV.
        
        Returns:
            models: sklearn model's for training.
            params: parameters for hyperparameter tuning.
        """
        
        params = {
            "LinearRegression_params": {
                "fit_intercept":[True],
                "copy_X": [True,False],
                "n_jobs": [1000,1500,2000],
                "positive": [True,False]
            },
            "Lasso_params": {
                "alpha": [1e-15,1e-10,1e-8,1e-3,1e-2,1,5,10,20,30,35,40,45,50,55,100]
            },
            "Ridge_params": {
                "alpha": [1e-15,1e-10,1e-8,1e-3,1e-2,1,5,10,20,30,35,40,45,50,55,100]
            },
            "GradientBoostingRegressor_params": {
                "n_estimators": [50, 100, 200],
                "learning_rate": [0.01, 0.1, 0.2],
                "max_depth": [3, 4, 5],
                "min_samples_split": [1, 5,10]
            },
            "RandomForestRegressor_params": {
                "n_estimators": [50, 100, 200],
                "min_samples_leaf": [1,2,4],
                "max_features": ['sqrt', 'log2', None]
            },
            "BaggingRegressor_params": {
                "n_estimators": [50,100,200],
                "max_samples": [1.0,0.8,0.6],
                "max_features": [1.0,0.8,0.6]
            },
            "XGBRegressor_params": {
                "n_estimators": [100,200,300],
                "max_depth": [3,5,9],
                "min_child_weight": [1,3,5],
                "learning_rate": [0.01, 0.1, 0.2],
                "subsample": [0.6, 0.8],
                "colsample_bytree": [0.6, 0.8]
            },
            "DecisionTreeRegressor_params": {
                "max_depth": [None,10,15],
                "min_samples_split": [2,5,10],
                "min_samples_leaf": [1,2,5]
            },
        }
        
        # models with hyper-parameters
        
        models = {
            "LinearRegression":(LinearRegression(),params["LinearRegression_params"]),
            "Lasso":(Lasso(),params["Lasso_params"]),
            "Ridge": (Ridge(), params["Ridge_params"]),
            "GradientBoostingRegressor":(GradientBoostingRegressor(),params["GradientBoostingRegressor_params"]),
            "RandomForestRegressor": (RandomForestRegressor(),params["RandomForestRegressor_params"]),
            "BaggingRegressor":(BaggingRegressor(),params["BaggingRegressor_params"]),
            "XGBRegressor": (XGBRegressor(),params["XGBRegressor_params"]),
            "DecisionTreeRegressor":(DecisionTreeRegressor(),params["DecisionTreeRegressor_params"])
        }
        return params,models
    
    def split(self):
        """Split data for training and testing"""
        # get data through DataIngestion
        
        data = self.data_ingestion.fetch_fred_data()
        
        # features and targets
        
        X = data.drop("FEDFUNDS",axis=1)
        y = data["FEDFUNDS"]
        
        # train/test split
        
        X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.20,random_state=1)
        
        
        # MinMax Scaler
        scaler = MinMaxScaler()
        
        
        # scaled training and testing data
        
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        return X_train_scaled,X_test_scaled,y_train,y_test
        
    
    def train(self):
        """Train models through GridSearchCV"""
        params,models = self.load_models()
        
        # get X_train_scaled,X_test_scaled,y_train,y_test
        
        X_train_scaled,X_test_scaled,y_train,y_test = self.split()
        
        
        
        
        for model_name,(model,params) in models.items():
            
            
            # grid-searcg
            grid_search = GridSearchCV(model,params,cv=4,scoring="neg_mean_squared_error",n_jobs=-1)
            # fit grid search
            grid_search.fit(X_train_scaled,y_train)
            
            y_pred = grid_search.predict(X_test_scaled)
            
            r2 = r2_score(y_test,y_pred)
            print(f"R2 Score->Model Name: {model_name} {r2*100:.2f}")
            
            mape = mean_absolute_percentage_error(y_test,y_pred)
            print(f"Mean-Absolute Percentage Error: {model_name}--{mape:.4}")
            # best params
            
            best_params = grid_search.best_params_
            
            # best score
            
            best_score = grid_search.best_score_
            
            print(f"Best Score for model: {model_name}<===>{best_score}")
            
            print(f"Best Params for model: {model_name}<====>{best_params}")
            
            
            
            
if __name__ == "__main__":
    config = load_config()
    mt = ModelTrainer(config)
    mt.train()     
            
        
            


R2 Score->Model Name: LinearRegression 10.67
Mean-Absolute Percentage Error: LinearRegression--4.528
Best Score for model: LinearRegression<===>-11.192741097421976
Best Params for model: LinearRegression<====>{'copy_X': True, 'fit_intercept': True, 'n_jobs': 1000, 'positive': False}
R2 Score->Model Name: Lasso 10.67
Mean-Absolute Percentage Error: Lasso--4.533
Best Score for model: Lasso<===>-11.192740368106886
Best Params for model: Lasso<====>{'alpha': 0.001}
R2 Score->Model Name: Ridge 10.67
Mean-Absolute Percentage Error: Ridge--4.555
Best Score for model: Ridge<===>-11.191853092713757
Best Params for model: Ridge<====>{'alpha': 1}


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
108 fits failed out of a total of 324.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
108 fits failed with the following error:
Traceback (most recent call last):
  File "/home/nickkats1/Fed-Funds-Forecast/venv/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/nickkats1/Fed-Funds-Forecast/venv/lib/python3.12/site-packages/sklearn/base.py", line 1358, in wrapper
    estimator._validate_params()
  File "/home/nickkats1/Fed-Funds-Forecast/venv/lib/python3.12/site-packages/sklearn/base.py", line 471, in _validate_params
    validate_parameter_con

R2 Score->Model Name: GradientBoostingRegressor 96.47
Mean-Absolute Percentage Error: GradientBoostingRegressor--0.08108
Best Score for model: GradientBoostingRegressor<===>-0.2745225135044963
Best Params for model: GradientBoostingRegressor<====>{'learning_rate': 0.2, 'max_depth': 5, 'min_samples_split': 10, 'n_estimators': 100}
R2 Score->Model Name: RandomForestRegressor 97.98
Mean-Absolute Percentage Error: RandomForestRegressor--0.05693
Best Score for model: RandomForestRegressor<===>-0.2004035046388381
Best Params for model: RandomForestRegressor<====>{'max_features': 'log2', 'min_samples_leaf': 1, 'n_estimators': 100}
R2 Score->Model Name: BaggingRegressor 98.19
Mean-Absolute Percentage Error: BaggingRegressor--0.05452
Best Score for model: BaggingRegressor<===>-0.20077764146164837
Best Params for model: BaggingRegressor<====>{'max_features': 0.6, 'max_samples': 0.8, 'n_estimators': 50}
R2 Score->Model Name: XGBRegressor 98.11
Mean-Absolute Percentage Error: XGBRegressor--0.09141