In [20]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import r2_score as r2s
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler

In [21]:
inflation = pd.read_csv('Initial Data.csv', index_col="Period")
inflation.head()

Unnamed: 0_level_0,Yield Curve,Production Index,Housing Starts,Cost of Living Index,Unemployment,Consumer Price Index
Period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Jan.00,0.62,96.01601,118.4,1466,10.1,98.0
Feb.00,0.53,96.31606,118.2,1476,10.2,98.8
Mar.00,0.45,95.51592,118.4,1485,10.2,99.3
Apr.00,0.22,97.01617,119.3,1490,10.0,99.6
May.00,0.11,101.2169,120.5,1497,9.8,99.9


In [3]:
inflation.info()

<class 'pandas.core.frame.DataFrame'>
Index: 277 entries, Jan.00 to Jan.23
Data columns (total 6 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Yield Curve           277 non-null    float64
 1   Production Index      277 non-null    float64
 2   Housing Starts        277 non-null    float64
 3   Cost of Living Index  277 non-null    int64  
 4   Unemployment          277 non-null    float64
 5   Consumer Price Index  277 non-null    float64
dtypes: float64(5), int64(1)
memory usage: 15.1+ KB


Decision Tree (Unoptimized)

In [22]:
X = inflation.drop('Consumer Price Index', axis=1)
y = inflation['Consumer Price Index']


#creating the x and y value for the dataset and creating a test and training set from them

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
scaler = MinMaxScaler()
scaler.fit (X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

inflation_tree = DecisionTreeRegressor(random_state=42)

inflation_tree.fit(X_train_scaled, y_train)

y_pred_decision = inflation_tree.predict(X_test_scaled)
mse_tree = mse(y_test, y_pred_decision)
mae_tree = mae(y_test, y_pred_decision)
rsq_tree = r2s(y_test, y_pred_decision)
print ('MSE of Decision Tree: ', mse_tree)
print ('MAE of Decision Tree', mae_tree)
print ('R-Squared of Decision Tree', rsq_tree)


MSE of Decision Tree:  0.17828571428571438
MAE of Decision Tree 0.254285714285713
R-Squared of Decision Tree 0.9989019762554627


In [5]:
print('Depth of tree:', inflation_tree.tree_.max_depth)
print('Number of nodes in tree:', inflation_tree.tree_.node_count)

Depth of tree: 11
Number of nodes in tree: 311


In [23]:
param_dt = {'max_depth': [2, 4, 6, 8, 10, 11], 'min_samples_leaf': [1, 2, 5, 10, 15, 20, 25]}
grid_dt = GridSearchCV(estimator=inflation_tree, param_grid=param_dt, scoring='neg_mean_squared_error')
grid_result = grid_dt.fit(X_train_scaled, y_train)
print ('Best Param:', grid_result.best_params_ )

Best Param: {'max_depth': 8, 'min_samples_leaf': 1}


Optimized Decision Tree

In [29]:
inflation_tree = DecisionTreeRegressor(random_state=42, max_depth=11, min_samples_leaf=1)

inflation_tree.fit(X_train_scaled, y_train)

y_pred_decision = inflation_tree.predict(X_test_scaled)
mse_tree = mse(y_test, y_pred_decision)
mae_tree = mae(y_test, y_pred_decision)
rsq_tree = r2s(y_test, y_pred_decision)
print ('MSE of Decision Tree: ', mse_tree)
print ('MAE of Decision Tree', mae_tree)
print ('R-Squared of Decision Tree', rsq_tree)


MSE of Decision Tree:  0.17828571428571438
MAE of Decision Tree 0.254285714285713
R-Squared of Decision Tree 0.9989019762554627


Unoptimized Random Forest

In [8]:

inflation_random = RandomForestRegressor(n_estimators=200, random_state=0)
inflation_random.fit(X_train_scaled, y_train)
y_pred_rand = inflation_random.predict(X_test_scaled)
mse_rand = mse(y_test, y_pred_rand)
mae_rand = mae(y_test, y_pred_rand)
r2 = r2s(y_test, y_pred_rand)
print ('MSE of Random Forest:',mse_rand )
print ('MAE of Random Forest:',mae_rand )
print ('R-squared', r2)

MSE of Random Forest: 0.20709222142860118
MAE of Random Forest: 0.22328571428571828
R-squared 0.9987245631129303


In [9]:
grid = dict(max_depth = [1, 5, 10, 15], min_samples_leaf = [1, 5, 10, 15, 20])

forest_est = RandomForestRegressor(n_estimators = 200, random_state = 42)

grid_search = GridSearchCV(estimator=inflation_random, param_grid=grid, scoring='neg_mean_squared_error')

grid_result = grid_search.fit(X, y)

# Print out the best result
print("Best result is obtained using", grid_result.best_params_)

Best result is obtained using {'max_depth': 10, 'min_samples_leaf': 1}


Optimized Random Forest

In [46]:
inflation_random = RandomForestRegressor(n_estimators=200, random_state=42, max_depth=10, min_samples_leaf=1)
inflation_random.fit(X_train, y_train)
y_pred_rand = inflation_random.predict(X_test)
mse_rand = mse(y_test, y_pred_rand)
mae_rand = mae(y_test, y_pred_rand)
r2 = r2s(y_test, y_pred_rand)
print ('MSE of Random Forest:',mse_rand )
print ('MAE of Random Forest:',mae_rand )
print ('R-squared', r2)

MSE of Random Forest: 0.2553169205601701
MAE of Random Forest: 0.24244551020409924
R-squared 0.9984275574614581


LASSO

In [1]:
from sklearn.linear_model import Lasso

inflation_lasso = Lasso(alpha=0.1, random_state=42)
inflation_lasso.fit(X_train_scaled, y_train)
y_pred_lasso = inflation_lasso.predict(X_test_scaled)
mse_lasso = mse(y_test, y_pred_lasso)
mae_lasso = mae(y_test, y_pred_lasso)
r2_lasso = r2s(y_test, y_pred_lasso)
print ('MSE of LASSO:',mse_lasso )
print ('MAE of LASSO Forest:',mae_lasso )
print ('R-Squared of Lasso', r2_lasso)


NameError: name 'X_train_scaled' is not defined

LASSO After Manual Tuning

In [12]:
#After manual tuning
from sklearn.linear_model import Lasso

inflation_lasso = Lasso(alpha=0.001, random_state=42)
inflation_lasso.fit(X_train_scaled, y_train)
y_pred_lasso = inflation_lasso.predict(X_test_scaled)
mse_lasso = mse(y_test, y_pred_lasso)
mae_lasso = mae(y_test, y_pred_lasso)
r2_lasso = r2s(y_test, y_pred_lasso)
print ('MSE of LASSO:',mse_lasso )
print ('MAE of LASSO Forest:',mae_lasso )
print ('R-Squared of Lasso', r2_lasso)


MSE of LASSO: 0.00681481976052625
MAE of LASSO Forest: 0.041498808482829476
R-Squared of Lasso 0.9999580289764562


Unoptimized SVR

In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
data = pd.read_csv('Initial Data.csv')

# Extract the X and Y columns
X = data[['Yield Curve', 'Production Index', 'Housing Starts', 'Cost of Living Index', 'Unemployment']]
y = data['Consumer Price Index']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Scale the X data
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create an SVR model with chosen hyperparameters
svr = SVR(kernel='linear', C=1, epsilon=0.01)

# Train the SVR model on the training data
svr.fit(X_train_scaled, y_train)

# Evaluate the performance of the model on the testing data
y_pred = svr.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('MSE:', mse)
print('MAE:', mae)
print('R-squared:', r2)


MSE: 8.65326160964029
MAE: 2.6037055553570068
R-squared: 0.9431157526935037


In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
data = inflation

# Extract the X and Y columns
X = data[['Yield Curve', 'Production Index', 'Housing Starts', 'Cost of Living Index', 'Unemployment']]
y = data['Consumer Price Index']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Scale the X data
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define a dictionary of hyperparameters and their possible values
param_grid = {'kernel': ['linear', 'rbf'], 'C': [0.1, 1, 2, 3, 4, 5, 10], 'epsilon': [0.01, 0.1, 1]}

# Create an SVR model
svr = SVR()

# Use GridSearchCV to find the best combination of hyperparameters
grid_search = GridSearchCV(svr, param_grid, cv=5)
grid_search.fit(X_train_scaled, y_train)

# Get the best hyperparameters and model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

print (best_params)
print (best_model)


{'C': 10, 'epsilon': 0.01, 'kernel': 'linear'}
SVR(C=10, epsilon=0.01, kernel='linear')


Optimized SVR after Manual Tuning

In [15]:
svr = SVR (C=20, epsilon=0.0341, kernel='linear')

# Train the SVR model on the training data
svr.fit(X_train_scaled, y_train)

# Evaluate the performance of the model on the testing data
y_pred = svr.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('MSE:', mse)
print('MAE:', mae)
print('R-squared:', r2)



MSE: 0.007804155513819126
MAE: 0.04485142497050496
R-squared: 0.9999486975510169


In [None]:
import pandas as pd
import xgboost as xgb
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import r2_score as r2s
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler

In [None]:

data = pd.read_csv('Initial Data.csv', index_col='Period')
data.head()

Unnamed: 0_level_0,Yield Curve,Production Index,Housing Starts,Cost of Living Index,Unemployment,Consumer Price Index
Period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Jan.00,0.62,96.01601,118.4,1466,10.1,98.0
Feb.00,0.53,96.31606,118.2,1476,10.2,98.8
Mar.00,0.45,95.51592,118.4,1485,10.2,99.3
Apr.00,0.22,97.01617,119.3,1490,10.0,99.6
May.00,0.11,101.2169,120.5,1497,9.8,99.9


In [None]:
X = data.drop('Consumer Price Index', axis=1)
y = data['Consumer Price Index']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

scaler = MinMaxScaler()
scaler.fit (X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [None]:
param_grid = {'learning_rate': [0.005, 0.0005, 0.05, 0.1, 0.15],
              'max_depth': [1, 2, 3, 4, 5],
              'min_child_weight': [1, 2, 3, 4, 5]}


XG Boost Unoptimized

In [None]:
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=1000)
xgb_model.fit(X_train_scaled, y_train)
y_pred_xg = xgb_model.predict(X_test_scaled)
mse_xg = mse(y_test, y_pred_xg )
mae_xg = mae(y_test, y_pred_xg )
rsq_xg = r2s(y_test, y_pred_xg )
print('MSE: ', mse_xg)
print ('MAE: ', mae_xg)
print ('R-squared: ', rsq_xg)

NameError: name 'X_train_scaled' is not defined

In [None]:
from sklearn.model_selection import GridSearchCV


In [None]:
grid_search = GridSearchCV(xgb_model, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)


In [None]:
grid_search.fit(X_train_scaled, y_train)


GridSearchCV(cv=5,
             estimator=XGBRegressor(base_score=None, booster=None,
                                    callbacks=None, colsample_bylevel=None,
                                    colsample_bynode=None,
                                    colsample_bytree=None,
                                    early_stopping_rounds=None,
                                    enable_categorical=False, eval_metric=None,
                                    feature_types=None, gamma=None, gpu_id=None,
                                    grow_policy=None, importance_type=None,
                                    interaction_constraints=None,
                                    learning_rate=None, m...
                                    max_cat_to_onehot=None, max_delta_step=None,
                                    max_depth=None, max_leaves=None,
                                    min_child_weight=None, missing=nan,
                                    monotone_constraints=None,
       

In [None]:
print('Best hyperparameters:', grid_search.best_params_)
print('Best mean squared error:', -grid_search.best_score_)


Best hyperparameters: {'learning_rate': 0.05, 'max_depth': 3, 'min_child_weight': 1}
Best mean squared error: 0.1307192546575024


Optimized XG Boost

In [None]:
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=1000, learning_rate=0.05, max_depth=3, min_child_weight=1)
xgb_model.fit(X_train_scaled, y_train)

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.05, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=3, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             n_estimators=1000, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=None, ...)

In [None]:
y_pred_xg = xgb_model.predict(X_test_scaled)
mse_xg = mse(y_test, y_pred_xg )
mae_xg = mae(y_test, y_pred_xg )
rsq_xg = r2s(y_test, y_pred_xg )
print('MSE: ', mse_xg)
print ('MAE: ', mae_xg)
print ('R-squared: ', rsq_xg)

MSE:  0.09411105604273555
MAE:  0.20003764561244425
R-squared:  0.999420390048791


Unoptimized MLP Regressor (Interation Increased for Network to Merge)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import r2_score as r2s




# Train the artificial neural network
#model = MLPRegressor(hidden_layer_sizes=(50, 50), activation='relu',max_iter=2000, alpha=0.01, random_state=42)
model = MLPRegressor(random_state=42, max_iter=3000)
model.fit(X_train_scaled, y_train)

# Predict the Consumer Price Index using the trained model
y_pred_mlp = model.predict(X_test_scaled)

# Calculate the Mean Squared Error
mse_mlp = mse(y_test, y_pred_mlp)
mae_mlp = mae(y_test, y_pred_mlp)
rsq_mlp = r2s(y_test, y_pred_mlp)

# Print the Mean Squared Error
print('Mean Squared Error: ', mse_mlp)
print('Mean Absolute Error: ', mae_mlp)
print('R-squared: ', rsq_mlp)



Mean Squared Error:  30.136037147075797
Mean Absolute Error:  4.223595006133948
R-squared:  0.8143985653235436


Optimized MLP Regressor after Grid Search

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import r2_score as r2s


# Train the artificial neural network
model = MLPRegressor(hidden_layer_sizes=(50, 50), activation='relu', solver='lbfgs', max_iter=4000, learning_rate='constant', alpha=0.01, random_state=42)
model.fit(X_train_scaled, y_train)

# Predict the Consumer Price Index using the trained model
y_pred_mlp = model.predict(X_test_scaled)

# Calculate the Mean Squared Error
mse_mlp = mse(y_test, y_pred_mlp)
mae_mlp = mae(y_test, y_pred_mlp)
rsq_mlp = r2s(y_test, y_pred_mlp)

# Print the Mean Squared Error
print('Mean Squared Error: ', mse_mlp)
print('Mean Absolute Error: ', mae_mlp)
print('R-squared: ', rsq_mlp)



Mean Squared Error:  0.005138004622001014
Mean Absolute Error:  0.04378723011136917
R-squared:  0.9999683561237809
