In [1]:
from dotenv import load_dotenv
load_dotenv()
import os
from sklearn.model_selection import train_test_split,cross_val_score,GridSearchCV
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor,BaggingRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import root_mean_squared_error,r2_score,mean_absolute_percentage_error
import mlflow
import fredapi as fr
import fredapi.fred as fr
from xgboost import XGBRegressor

FRED_API_KEY = os.getenv("FRED_API_KEY")


fred = fr.Fred(FRED_API_KEY)



ffr = fred.get_series("FEDFUNDS")
ffr.name = "Fed Funds Rate"

ffr = fred.get_series("FEDFUNDS")
ffr.name = "Fed Funds Rate"

data = pd.DataFrame(ffr).dropna()
data = data.reset_index()
data['Date'] = data['index']


data.drop("index",axis=1,inplace=True)
data.head(10)




Note: You have installed the 'manylinux2014' variant of XGBoost. Certain features such as GPU algorithms or federated learning are not available. To use these features, please upgrade to a recent Linux distro with glibc 2.28+, and install the 'manylinux_2_28' variant.


Unnamed: 0,Fed Funds Rate,Date
0,0.8,1954-07-01
1,1.22,1954-08-01
2,1.07,1954-09-01
3,0.85,1954-10-01
4,0.83,1954-11-01
5,1.28,1954-12-01
6,1.39,1955-01-01
7,1.29,1955-02-01
8,1.35,1955-03-01
9,1.43,1955-04-01


In [2]:
X = data.drop("Fed Funds Rate",axis=1)
y = data['Fed Funds Rate']

X_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.13,random_state=42)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(x_test)


def evaluate(y_test,pred,model_name,r2,rmse,cv_scores):


    result = {
        "Model": model_name,
        "R2": r2,
        "RMSE": rmse,
        "Cross-val Scores":cv_scores.mean()
    }

    return result


model_dict = []
models = {
    "LinearRegression":LinearRegression(),
    "lasso":Lasso(),
    "ridge":Ridge(),
    "GradientBoostingRegressor":GradientBoostingRegressor(),
    "BaggingRegressor":BaggingRegressor(),
    "xBGboostRegression":XGBRegressor(),
    "randomforest":RandomForestRegressor(),
}


for model_name,model in models.items():
    model.fit(X_train_scaled,y_train)
    pred = model.predict(X_test_scaled)
    mse = root_mean_squared_error(y_test,pred)
    r2 = r2_score(y_test,pred)
    cv_scores = cross_val_score(model, X_train_scaled,y_train,cv=10,scoring="neg_mean_squared_error")
    model_results = evaluate(y_test, pred, model_name,r2,mse,cv_scores)
    model_dict.append(model_results)



df_results = pd.DataFrame(model_dict)
print(df_results.head(10))



                       Model        R2      RMSE  Cross-val Scores
0           LinearRegression  0.068586  2.998522        -11.641676
1                      lasso -0.004599  3.114099        -13.050213
2                      ridge  0.069840  2.996504        -11.641329
3  GradientBoostingRegressor  0.965586  0.576376         -0.446371
4           BaggingRegressor  0.983353  0.400870         -0.211027
5         xBGboostRegression  0.961390  0.610500         -0.291863
6               randomforest  0.991771  0.281846         -0.210217


In [3]:
#hyper-parameter tuning

mlflow.set_experiment("fed-funds-rate-gridsearch")

linearregression_params = {
'copy_X': [True,False], 
'fit_intercept': [True,False], 
'n_jobs': [1000,5000,10000], 
'positive': [True,False]}



lasso_params = {
    'alpha': [1e-15,1e-10,1e-8,1e-3,1e-2,1,5,10,20,30,35,40,45,50,55,100]
    }


ridge_params = {
    'alpha':[1e-15,1e-10,1e-8,1e-3,1e-2,1,5,10,20,30,35,40,45,50,55,100]
    }



gradientboosting_params = {
    'n_estimators': [50,100,200],
    'learning_rate': [0.01,0.1,0.2],
    'max_depth': [3,4,5],
    'min_samples_split': [2,5,10]
    }

bagginregression_params = {
    'n_estimators': [50,100,200],
    'max_samples' : [1.0,0.8,0.6],
    'max_features': [1.0,0.8,0.6]
}



xgboost_grid = {
    'n_estimators': [100, 200, 500],
    'learning_rate': [0.01, 0.1],
    'max_depth': [3, 5, 7],
    'min_child_weight': [1, 3],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0],
    'reg_alpha': [0, 0.1],
    'reg_lambda': [0, 0.1]
}


random_forest_params = {
    'n_estimators': [50,100,200],
    'min_samples_leaf':[1,2,4],
    'max_features': ['sqrt','log2',None],

}








models = {
    "LinearRegression":(LinearRegression(),linearregression_params),
    "lasso":(Lasso(),lasso_params),
    "ridge":(Ridge(),ridge_params),
    "GradientBoostingRegresser":(GradientBoostingRegressor(),gradientboosting_params),
    "baggingREgressor":(BaggingRegressor(),bagginregression_params),
    "xGBRegressor":(XGBRegressor(),xgboost_grid),
    "randomforestregressor":(RandomForestRegressor(),random_forest_params),
    }

best_scores = []

for model_name,(model,params) in models.items():
    grid_search = GridSearchCV(model,params,scoring="neg_root_mean_squared_error",cv=4,n_jobs=-1)
    grid_search.fit(X_train_scaled,y_train)
    print(f'Best Params for {model_name}: {grid_search.best_params_}')
    print(f'Best Score for {model_name}: {grid_search.best_score_}')
    best_scores.append({"Model":model_name,"neg_root_mean_squared_log_error":grid_search.best_score_})
    with mlflow.start_run(run_name=model_name):
        mlflow.log_params(grid_search.best_params_)
        mlflow.log_metric("best_score", grid_search.best_score_)
        mlflow.sklearn.log_model(grid_search.best_estimator_, model_name)
        best_scores.append({"Model": model_name, "Best Score": grid_search.best_score_})
        mlflow.sklearn.log_model(grid_search.best_estimator_, model_name)
        best_scores.append({"Model": model_name, "Best Score": grid_search.best_score_,"Best Params":grid_search.best_params_})


best_scores_df = pd.DataFrame(best_scores)



2025/09/05 06:56:15 INFO mlflow.tracking.fluent: Experiment with name 'fed-funds-rate-gridsearch' does not exist. Creating a new experiment.


Best Params for LinearRegression: {'copy_X': True, 'fit_intercept': True, 'n_jobs': 1000, 'positive': False}
Best Score for LinearRegression: -3.4008840280728942


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Best Params for lasso: {'alpha': 0.001}
Best Score for lasso: -3.4008836413326518




Best Params for ridge: {'alpha': 0.01}
Best Score for ridge: -3.4008831870826928




Best Params for GradientBoostingRegresser: {'learning_rate': 0.1, 'max_depth': 5, 'min_samples_split': 2, 'n_estimators': 100}
Best Score for GradientBoostingRegresser: -0.5917671437158843




Best Params for baggingREgressor: {'max_features': 1.0, 'max_samples': 0.8, 'n_estimators': 200}
Best Score for baggingREgressor: -0.4738485137548393


Note: You have installed the 'manylinux2014' variant of XGBoost. Certain features such as GPU algorithms or federated learning are not available. To use these features, please upgrade to a recent Linux distro with glibc 2.28+, and install the 'manylinux_2_28' variant.
Note: You have installed the 'manylinux2014' variant of XGBoost. Certain features such as GPU algorithms or federated learning are not available. To use these features, please upgrade to a recent Linux distro with glibc 2.28+, and install the 'manylinux_2_28' variant.
Note: You have installed the 'manylinux2014' variant of XGBoost. Certain features such as GPU algorithms or federated learning are not available. To use these features, please upgrade to a recent Linux distro with glibc 2.28+, and install the 'manylinux_2_28' variant.
Note: You have installed the 'manylinux2014' variant of XGBoost. Certain features such as GPU algorithms or federated learning are not available. To use these features, please upgrade to a rece

Best Params for xGBRegressor: {'colsample_bytree': 0.8, 'learning_rate': 0.01, 'max_depth': 7, 'min_child_weight': 1, 'n_estimators': 500, 'reg_alpha': 0.1, 'reg_lambda': 0, 'subsample': 0.8}
Best Score for xGBRegressor: -0.6209462009593243




Best Params for randomforestregressor: {'max_features': 'sqrt', 'min_samples_leaf': 1, 'n_estimators': 100}
Best Score for randomforestregressor: -0.4839220956382824




In [4]:
Best_Model = RandomForestRegressor(max_features="log2",min_samples_leaf=1,n_estimators=50).fit(X_train_scaled,y_train)

pred = Best_Model.predict(X_test_scaled)
print(f'R2 Score From Best Model and HyperParameter Tuning: {r2_score(y_test,pred)*100:.2f}%')
print(f'Root Mean Squared Error from Best Model with Optimized HyperParamters: {root_mean_squared_error(y_test,pred)}')

R2 Score From Best Model and HyperParameter Tuning: 99.01%
Root Mean Squared Error from Best Model with Optimized HyperParamters: 0.3096497779750536


In [6]:
pred_vs_actual = pd.DataFrame({
    'Actual': y_test.values,
    'Predicted': pred
}, index=y_test.index)




pred_vs_actual.sort_index(inplace=True)



# Root Mean-Squared Error
rmse = root_mean_squared_error(y_test,pred)
print(f'Root Mean-Squared Error: {rmse:.4}')

# R2 Score
r2 = r2_score(y_test,pred)
print(f'R2 Score: {r2*100:.2f}%')

# mean absolute percentage error
mape = mean_absolute_percentage_error(y_test,pred)
print(f'Mean Absolute Percentage Error: {mape:.4f}')



print('Predicted Vs Actual Prices\n')

print(pred_vs_actual.head(20))
print(pred_vs_actual.tail(20))

Root Mean-Squared Error: 0.3096
R2 Score: 99.01%
Mean Absolute Percentage Error: 0.0570
Predicted Vs Actual Prices

     Actual  Predicted
23     2.71     2.7368
30     2.84     2.9388
31     3.00     2.9564
33     3.00     2.9886
39     3.50     3.4022
49     1.53     1.4958
63     3.98     3.8158
65     3.99     3.9340
66     3.99     3.8898
67     3.97     3.8574
72     3.23     3.3242
76     2.44     2.5294
77     1.98     2.4706
78     1.45     2.3714
86     1.88     2.1608
96     2.71     2.7132
109    3.49     3.0310
110    3.48     3.4350
120    3.42     3.4970
136    4.10     4.0756
     Actual  Predicted
706    0.11     0.1290
709    0.08     0.0890
713    0.09     0.0786
733    0.14     0.1354
740    0.36     0.3744
746    0.40     0.4050
753    0.90     0.8912
767    1.82     1.8088
778    2.39     2.4078
788    0.65     1.2410
792    0.09     0.0810
802    0.06     0.0742
808    0.08     0.0800
819    3.08     2.9784
830    5.33     5.3216
833    5.33     5.3300
835    5.3