In [1]:
from dotenv import load_dotenv
load_dotenv()
import os
from sklearn.model_selection import train_test_split,cross_val_score,GridSearchCV
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor,BaggingRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import root_mean_squared_error,r2_score,mean_absolute_percentage_error
import mlflow
import fredapi.fred as fr
from xgboost import XGBRegressor

FRED_API_KEY = os.getenv("FRED_API_KEY")


fred = fr.Fred(FRED_API_KEY)




ffr = fred.get_series("FEDFUNDS")
ffr.name = "Fed Funds Rate"

data = pd.DataFrame(ffr).dropna()
data = data.reset_index()
data['Date'] = data['index']


data.drop("index",axis=1,inplace=True)
data.head(10)




Unnamed: 0,Fed Funds Rate,Date
0,0.8,1954-07-01
1,1.22,1954-08-01
2,1.07,1954-09-01
3,0.85,1954-10-01
4,0.83,1954-11-01
5,1.28,1954-12-01
6,1.39,1955-01-01
7,1.29,1955-02-01
8,1.35,1955-03-01
9,1.43,1955-04-01


In [2]:
X = data.drop("Fed Funds Rate",axis=1)
y = data['Fed Funds Rate']

X_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.13,random_state=42)

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(x_test)


def evaluate(y_test,pred,model_name,r2,rmse,cv_scores):


    result = {
        "Model": model_name,
        "R2": r2,
        "RMSE": rmse,
        "Cross-val Scores":cv_scores.mean()
    }

    return result


model_dict = []
models = {
    "LinearRegression":LinearRegression(),
    "lasso":Lasso(),
    "ridge":Ridge(),
    "GradientBoostingRegressor":GradientBoostingRegressor(),
    "BaggingRegressor":BaggingRegressor(),
    "xBGboostRegression":XGBRegressor(),
    "randomforest":RandomForestRegressor(),
}


for model_name,model in models.items():
    model.fit(X_train_scaled,y_train)
    pred = model.predict(X_test_scaled)
    mse = root_mean_squared_error(y_test,pred)
    r2 = r2_score(y_test,pred)
    cv_scores = cross_val_score(model, X_train_scaled,y_train,cv=10,scoring="neg_mean_squared_error")
    model_results = evaluate(y_test, pred, model_name,r2,mse,cv_scores)
    model_dict.append(model_results)



df_results = pd.DataFrame(model_dict)
print(df_results.head(10))



                       Model        R2      RMSE  Cross-val Scores
0           LinearRegression  0.130245  3.128840        -11.498943
1                      lasso -0.005820  3.364689        -12.773805
2                      ridge  0.130205  3.128912        -11.498621
3  GradientBoostingRegressor  0.972741  0.553915         -0.438449
4           BaggingRegressor  0.992204  0.296226         -0.196567
5         xBGboostRegression  0.956078  0.703115         -0.323921
6               randomforest  0.992578  0.289031         -0.188335


In [3]:
#hyper-parameter tuning

mlflow.set_experiment("fed-funds-rate-gridsearch-1")

linearregression_params = {
'copy_X': [True,False], 
'fit_intercept': [True,False], 
'n_jobs': [1000,5000,10000], 
'positive': [True,False]}



lasso_params = {
    'alpha': [1e-15,1e-10,1e-8,1e-3,1e-2,1,5,10,20,30,35,40,45,50,55,100]
    }


ridge_params = {
    'alpha':[1e-15,1e-10,1e-8,1e-3,1e-2,1,5,10,20,30,35,40,45,50,55,100]
    }



gradientboosting_params = {
    'n_estimators': [50,100,200],
    'learning_rate': [0.01,0.1,0.2],
    'max_depth': [3,4,5],
    'min_samples_split': [2,5,10]
    }

bagginregression_params = {
    'n_estimators': [50,100,200],
    'max_samples' : [1.0,0.8,0.6],
    'max_features': [1.0,0.8,0.6]
}



xgboost_grid = {
    'n_estimators': [100, 200, 500],
    'learning_rate': [0.01, 0.1],
    'max_depth': [3, 5, 7],
    'min_child_weight': [1, 3],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0],
    'reg_alpha': [0, 0.1],
    'reg_lambda': [0, 0.1]
}


random_forest_params = {
    'n_estimators': [50,100,200],
    'min_samples_leaf':[1,2,4],
    'max_features': ['sqrt','log2',None],

}








models = {
    "LinearRegression":(LinearRegression(),linearregression_params),
    "lasso":(Lasso(),lasso_params),
    "ridge":(Ridge(),ridge_params),
    "GradientBoostingRegresser":(GradientBoostingRegressor(),gradientboosting_params),
    "baggingREgressor":(BaggingRegressor(),bagginregression_params),
    "xGBRegressor":(XGBRegressor(),xgboost_grid),
    "randomforestregressor":(RandomForestRegressor(),random_forest_params),
    }

best_scores = []

for model_name,(model,params) in models.items():
    grid_search = GridSearchCV(model,params,scoring="neg_root_mean_squared_error",cv=4,n_jobs=-1)
    grid_search.fit(X_train_scaled,y_train)
    print(f'Best Params for {model_name}: {grid_search.best_params_}')
    print(f'Best Score for {model_name}: {grid_search.best_score_}')
    best_scores.append({"Model":model_name,"neg_root_mean_squared_log_error":grid_search.best_score_})
    with mlflow.start_run(run_name=model_name):
        mlflow.log_params(grid_search.best_params_)
        mlflow.log_metric("best_score", grid_search.best_score_)
        mlflow.sklearn.log_model(grid_search.best_estimator_, model_name)
        best_scores.append({"Model": model_name, "Best Score": grid_search.best_score_})
        mlflow.sklearn.log_model(grid_search.best_estimator_, model_name)
        best_scores.append({"Model": model_name, "Best Score": grid_search.best_score_,"Best Params":grid_search.best_params_})


best_scores_df = pd.DataFrame(best_scores)



  return FileStore(store_uri, store_uri)


Best Params for LinearRegression: {'copy_X': True, 'fit_intercept': True, 'n_jobs': 1000, 'positive': False}
Best Score for LinearRegression: -3.3835043508598472


  model = cd_fast.enet_coordinate_descent(


Best Params for lasso: {'alpha': 1e-15}
Best Score for lasso: -3.383504350859847




Best Params for ridge: {'alpha': 0.01}
Best Score for ridge: -3.3835035782734266




Best Params for GradientBoostingRegresser: {'learning_rate': 0.2, 'max_depth': 5, 'min_samples_split': 10, 'n_estimators': 50}
Best Score for GradientBoostingRegresser: -0.582503613124471




Best Params for baggingREgressor: {'max_features': 0.6, 'max_samples': 1.0, 'n_estimators': 50}
Best Score for baggingREgressor: -0.4622947046984237




Best Params for xGBRegressor: {'colsample_bytree': 0.8, 'learning_rate': 0.1, 'max_depth': 7, 'min_child_weight': 3, 'n_estimators': 100, 'reg_alpha': 0, 'reg_lambda': 0, 'subsample': 1.0}
Best Score for xGBRegressor: -0.6756738509920328




Best Params for randomforestregressor: {'max_features': 'sqrt', 'min_samples_leaf': 1, 'n_estimators': 200}
Best Score for randomforestregressor: -0.4714184852249341




In [4]:
Best_Model = RandomForestRegressor(max_features="log2",min_samples_leaf=1,n_estimators=50).fit(X_train_scaled,y_train)

pred = Best_Model.predict(X_test_scaled)
print(f'R2 Score Best Model: {r2_score(y_test,pred)*100:.2f}%')
print(f'Root Mean Squared Error Best Model: {root_mean_squared_error(y_test,pred)}')
print(f'Mean Absolute Percentage Error Best Model: {mean_absolute_percentage_error(y_test,pred)}')

R2 Score Best Model: 99.19%
Root Mean Squared Error Best Model: 0.3011688610350394
Mean Absolute Percentage Error Best Model: 0.0744068942779333


In [5]:
pred_vs_actual = pd.DataFrame({
    'Actual': y_test.values,
    'Predicted': pred
}, index=y_test.index)




pred_vs_actual.sort_index(inplace=True)



# Root Mean-Squared Error
rmse = root_mean_squared_error(y_test,pred)
print(f'Root Mean-Squared Error: {rmse:.4}')

# R2 Score
r2 = r2_score(y_test,pred)
print(f'R2 Score: {r2*100:.2f}%')

# mean absolute percentage error
mape = mean_absolute_percentage_error(y_test,pred)
print(f'Mean Absolute Percentage Error: {mape:.4f}')



print('Predicted Vs Actual Prices\n')

print(pred_vs_actual.head(20))
print(pred_vs_actual.tail(20))

Root Mean-Squared Error: 0.3012
R2 Score: 99.19%
Mean Absolute Percentage Error: 0.0744
Predicted Vs Actual Prices

     Actual  Predicted
23     2.71     2.7340
30     2.84     2.9444
31     3.00     2.9584
33     3.00     2.9886
39     3.50     3.4010
49     1.53     0.9216
63     3.98     3.7702
65     3.99     3.8920
66     3.99     3.8642
67     3.97     3.8482
76     2.44     2.1394
78     1.45     2.0624
86     1.88     2.1956
96     2.71     2.7260
110    3.48     3.4824
120    3.42     3.4964
136    4.10     4.1644
137    4.32     4.3336
139    4.60     4.6280
168    6.03     6.0588
     Actual  Predicted
745    0.40     0.3930
755    1.04     1.0856
759    1.15     1.1716
777    2.42     2.3964
783    1.83     1.9226
787    1.58     0.8286
804    0.10     0.0804
811    0.08     0.1616
814    0.77     0.7186
815    1.21     1.5102
820    3.78     2.9192
821    4.10     3.5106
822    4.33     4.5110
832    5.33     5.3300
839    5.33     5.3300
842    5.13     4.9578
846    4.3