In [1]:
from dotenv import load_dotenv
load_dotenv(".env")
import os
from sklearn.model_selection import train_test_split,cross_val_score,GridSearchCV
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor,BaggingRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import root_mean_squared_error,r2_score
import mlflow
import mlflow.sklearn
from docs.untitled import fred_api_key
import fredapi as fr
from fredapi import Fred
from xgboost import XGBRegressor
from catboost import CatBoostRegressor


fred = fr.Fred(fred_api_key)



ffr = fred.get_series("FEDFUNDS")
ffr.name = "Fed Funds Rate"

ffr = fred.get_series("FEDFUNDS")
ffr.name = "Fed Funds Rate"

data = pd.DataFrame(ffr).dropna()
data = data.reset_index()
data['Date'] = data['index']


data.drop("index",axis=1,inplace=True)
data.head(10)




Unnamed: 0,Fed Funds Rate,Date
0,0.8,1954-07-01
1,1.22,1954-08-01
2,1.07,1954-09-01
3,0.85,1954-10-01
4,0.83,1954-11-01
5,1.28,1954-12-01
6,1.39,1955-01-01
7,1.29,1955-02-01
8,1.35,1955-03-01
9,1.43,1955-04-01


In [2]:
X = data.drop("Fed Funds Rate",axis=1)
y = data['Fed Funds Rate']
X_train,x_test,y_train,y_test = train_test_split(X,y,test_size=.20,random_state=42)
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(x_test)


def evaluate(y_test,pred,model_name,r2,rmse,cv_scores):


    result = {
        "Model": model_name,
        "R2": r2,
        "RMSE": rmse,
        "Cross-val Scores":cv_scores.mean()
    }

    return result


model_dict = []
models = {
    "LinearRegression":LinearRegression(),
    "lasso":Lasso(),
    "ridge":Ridge(),
    "GradientBoostingRegressor":GradientBoostingRegressor(),
    "BaggingRegressor":BaggingRegressor(),
    "xBGboostRegression":XGBRegressor(),
    "randomforest":RandomForestRegressor(),
    "catboostregressor":CatBoostRegressor()
}


for model_name,model in models.items():
    model.fit(X_train_scaled,y_train)
    pred = model.predict(X_test_scaled)
    mse = root_mean_squared_error(y_test,pred)
    r2 = r2_score(y_test,pred)
    cv_scores = cross_val_score(model, X_train_scaled,y_train,cv=10,scoring="neg_mean_squared_error")
    model_results = evaluate(y_test, pred, model_name,r2,mse,cv_scores)
    model_dict.append(model_results)



df_results = pd.DataFrame(model_dict)
print(df_results.head(10))



Learning rate set to 0.038532
0:	learn: 3.4810263	total: 66.6ms	remaining: 1m 6s
1:	learn: 3.3950453	total: 68.4ms	remaining: 34.1s
2:	learn: 3.3164867	total: 70.4ms	remaining: 23.4s
3:	learn: 3.2406933	total: 73ms	remaining: 18.2s
4:	learn: 3.1624593	total: 76.4ms	remaining: 15.2s
5:	learn: 3.0948053	total: 79.1ms	remaining: 13.1s
6:	learn: 3.0282369	total: 80.1ms	remaining: 11.4s
7:	learn: 2.9620085	total: 81.7ms	remaining: 10.1s
8:	learn: 2.9065543	total: 83.9ms	remaining: 9.24s
9:	learn: 2.8360356	total: 86.4ms	remaining: 8.55s
10:	learn: 2.7773128	total: 87.5ms	remaining: 7.87s
11:	learn: 2.7170666	total: 89.9ms	remaining: 7.4s
12:	learn: 2.6620001	total: 91.1ms	remaining: 6.91s
13:	learn: 2.6100537	total: 92ms	remaining: 6.47s
14:	learn: 2.5654613	total: 92.8ms	remaining: 6.1s
15:	learn: 2.5133436	total: 94.1ms	remaining: 5.79s
16:	learn: 2.4618138	total: 96.2ms	remaining: 5.56s
17:	learn: 2.4133392	total: 97.2ms	remaining: 5.3s
18:	learn: 2.3669223	total: 98.2ms	remaining: 5.07s

In [3]:
#hyper-parameter tuning

mlflow.set_experiment("fed-funds-rate-gridsearch")

linearregression_params = {
'copy_X': [True,False], 
'fit_intercept': [True,False], 
'n_jobs': [1000,5000,10000], 
'positive': [True,False]}



lasso_params = {
    'alpha': [1e-15,1e-10,1e-8,1e-3,1e-2,1,5,10,20,30,35,40,45,50,55,100]
    }


ridge_params = {
    'alpha':[1e-15,1e-10,1e-8,1e-3,1e-2,1,5,10,20,30,35,40,45,50,55,100]
    }



gradientboosting_params = {
    'n_estimators': [50,100,200],
    'learning_rate': [0.01,0.1,0.2],
    'max_depth': [3,4,5],
    'min_samples_split': [2,5,10]
    }

bagginregression_params = {
    'n_estimators': [50,100,200],
    'max_samples' : [1.0,0.8,0.6],
    'max_features': [1.0,0.8,0.6]
}



xgboost_grid = {
    'n_estimators': [100, 200, 500],
    'learning_rate': [0.01, 0.1],
    'max_depth': [3, 5, 7],
    'min_child_weight': [1, 3],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0],
    'reg_alpha': [0, 0.1],
    'reg_lambda': [0, 0.1]
}


random_forest_params = {
    'n_estimators': [50,100,200],
    'min_samples_leaf':[1,2,4],
    'max_features': ['sqrt','log2',None],

}

catboost_params = {
    'iterations': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1],
    'depth': [4, 6, 8],
    'l2_leaf_reg': [1, 3, 5]
}







models = {
    "LinearRegression":(LinearRegression(),linearregression_params),
    "lasso":(Lasso(),lasso_params),
    "ridge":(Ridge(),ridge_params),
    "GradientBoostingRegresser":(GradientBoostingRegressor(),gradientboosting_params),
    "baggingREgressor":(BaggingRegressor(),bagginregression_params),
    "xGBRegressor":(XGBRegressor(),xgboost_grid),
    "randomforestregressor":(RandomForestRegressor(),random_forest_params),
    "catboostregressor":(CatBoostRegressor(),catboost_params)
    }

best_scores = []

for model_name,(model,params) in models.items():
    grid_search = GridSearchCV(model,params,scoring="neg_root_mean_squared_error",cv=4,n_jobs=-1)
    grid_search.fit(X_train_scaled,y_train)
    print(f'Best Params for {model_name}: {grid_search.best_params_}')
    print(f'Best Score for {model_name}: {grid_search.best_score_}')
    best_scores.append({"Model":model_name,"neg_root_mean_squared_log_error":grid_search.best_score_})
    with mlflow.start_run(run_name=model_name):
        mlflow.log_params(grid_search.best_params_)
        mlflow.log_metric("best_score", grid_search.best_score_)
        mlflow.sklearn.log_model(grid_search.best_estimator_, model_name)
        best_scores.append({"Model": model_name, "Best Score": grid_search.best_score_})
        mlflow.sklearn.log_model(grid_search.best_estimator_, model_name)
        best_scores.append({"Model": model_name, "Best Score": grid_search.best_score_,"Best Params":grid_search.best_params_})


best_scores_df = pd.DataFrame(best_scores)



Best Params for LinearRegression: {'copy_X': True, 'fit_intercept': True, 'n_jobs': 1000, 'positive': False}
Best Score for LinearRegression: -3.3703840667767557




Best Params for lasso: {'alpha': 1e-15}
Best Score for lasso: -3.3703840667767557




Best Params for ridge: {'alpha': 0.01}
Best Score for ridge: -3.3703830823312417




Best Params for GradientBoostingRegresser: {'learning_rate': 0.1, 'max_depth': 5, 'min_samples_split': 2, 'n_estimators': 100}
Best Score for GradientBoostingRegresser: -0.6206149578764485




Best Params for baggingREgressor: {'max_features': 0.8, 'max_samples': 1.0, 'n_estimators': 200}
Best Score for baggingREgressor: -0.5143181449680573




Best Params for xGBRegressor: {'colsample_bytree': 0.8, 'learning_rate': 0.1, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 200, 'reg_alpha': 0.1, 'reg_lambda': 0.1, 'subsample': 0.8}
Best Score for xGBRegressor: -0.6124578526326955




Best Params for randomforestregressor: {'max_features': 'sqrt', 'min_samples_leaf': 1, 'n_estimators': 200}
Best Score for randomforestregressor: -0.5219952691945119




0:	learn: 3.3214568	total: 1.85ms	remaining: 554ms
1:	learn: 3.1186631	total: 3.49ms	remaining: 520ms
2:	learn: 2.9279846	total: 5.29ms	remaining: 524ms
3:	learn: 2.7226035	total: 7.01ms	remaining: 519ms
4:	learn: 2.5722868	total: 8.61ms	remaining: 508ms
5:	learn: 2.4208530	total: 10.3ms	remaining: 505ms
6:	learn: 2.2940142	total: 12.1ms	remaining: 507ms
7:	learn: 2.1577730	total: 13.7ms	remaining: 499ms
8:	learn: 2.0540134	total: 15.2ms	remaining: 490ms
9:	learn: 1.9657100	total: 16.7ms	remaining: 484ms
10:	learn: 1.8773250	total: 18.4ms	remaining: 483ms
11:	learn: 1.7881346	total: 20ms	remaining: 480ms
12:	learn: 1.7303053	total: 21.5ms	remaining: 475ms
13:	learn: 1.6641123	total: 23.1ms	remaining: 472ms
14:	learn: 1.6206352	total: 24.7ms	remaining: 470ms
15:	learn: 1.5752638	total: 26.5ms	remaining: 470ms
16:	learn: 1.5413835	total: 28.2ms	remaining: 469ms
17:	learn: 1.4885976	total: 29.8ms	remaining: 466ms
18:	learn: 1.4472190	total: 31.6ms	remaining: 468ms
19:	learn: 1.4089081	tot



In [4]:
Best_Model = RandomForestRegressor(max_features="sqrt",min_samples_leaf=1,n_estimators=200).fit(X_train_scaled,y_train)

pred = Best_Model.predict(X_test_scaled)
print(f'R2 Score From Best Model and HyperParameter Tuning: {r2_score(y_test,pred)*100:.2f}%')
print(f'Root Mean Squared Error from Best Model with Optimized HyperParamters: {root_mean_squared_error(y_test,pred)}')

R2 Score From Best Model and HyperParameter Tuning: 98.98%
Root Mean Squared Error from Best Model with Optimized HyperParamters: 0.35004488406856216


In [5]:
pred_vs_actual = pd.DataFrame({
    'Actual': y_test.values,
    'Predicted': pred
}, index=y_test.index)





pred_vs_actual.sort_index(inplace=True)
print('Predicted Vs Actual Prices\n')
print(pred_vs_actual)

print(pred_vs_actual.head(20))
print(pred_vs_actual.tail(20))

Predicted Vs Actual Prices

     Actual  Predicted
5      1.28    1.00970
7      1.29    1.36290
10     1.43    1.50130
23     2.71    2.74005
29     2.94    2.90155
..      ...        ...
835    5.33    5.33000
838    5.33    5.33000
847    4.33    4.36635
848    4.33    4.35395
850    4.33    4.34420

[171 rows x 2 columns]
    Actual  Predicted
5     1.28    1.00970
7     1.29    1.36290
10    1.43    1.50130
23    2.71    2.74005
29    2.94    2.90155
30    2.84    2.94445
31    3.00    2.96150
33    3.00    2.99275
39    3.50    3.36690
49    1.53    1.46615
54    2.48    2.45305
63    3.98    3.76375
65    3.99    3.91785
66    3.99    3.87450
67    3.97    3.85070
72    3.23    3.27755
76    2.44    2.51545
77    1.98    2.44860
78    1.45    2.38440
81    1.49    2.03685
     Actual  Predicted
744    0.39    0.38725
750    0.65    0.59525
751    0.66    0.79360
765    1.69    1.61495
767    1.82    1.86780
773    2.27    2.25335
786    1.55    1.46675
792    0.09    0.08430
800