In [6]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import r2_score as r2s
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_score

In [7]:
inflation = pd.read_csv('Initial Data.csv', index_col="Period")
inflation.head()

Unnamed: 0_level_0,Yield Curve,Production Index,Housing Starts,Cost of Living Index,Unemployment,Consumer Price Index
Period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Jan.00,0.62,96.01601,118.4,1466,10.1,98.0
Feb.00,0.53,96.31606,118.2,1476,10.2,98.8
Mar.00,0.45,95.51592,118.4,1485,10.2,99.3
Apr.00,0.22,97.01617,119.3,1490,10.0,99.6
May.00,0.11,101.2169,120.5,1497,9.8,99.9


In [8]:
inflation.info()

<class 'pandas.core.frame.DataFrame'>
Index: 277 entries, Jan.00 to Jan.23
Data columns (total 6 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Yield Curve           277 non-null    float64
 1   Production Index      277 non-null    float64
 2   Housing Starts        277 non-null    float64
 3   Cost of Living Index  277 non-null    int64  
 4   Unemployment          277 non-null    float64
 5   Consumer Price Index  277 non-null    float64
dtypes: float64(5), int64(1)
memory usage: 15.1+ KB


In [9]:
X = inflation.drop('Consumer Price Index', axis=1)
y = inflation['Consumer Price Index']


#creating the x and y value for the dataset and creating a test and training set from them

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
scaler = MinMaxScaler()
scaler.fit (X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

inflation_tree = DecisionTreeRegressor(random_state=42,)

inflation_tree.fit(X_train_scaled, y_train)

y_pred_decision = inflation_tree.predict(X_test_scaled)
mse_tree = mse(y_test, y_pred_decision)
mae_tree = mae(y_test, y_pred_decision)
rsq_tree = r2s(y_test, y_pred_decision)
print ('MSE of Decision Tree: ', mse_tree)
print ('MAE of Decision Tree', mae_tree)
print ('R-Squared of Decision Tree', rsq_tree)


MSE of Decision Tree:  0.17828571428571438
MAE of Decision Tree 0.254285714285713
R-Squared of Decision Tree 0.9989019762554627


In [10]:
scaler.fit (X)
X_scaled = scaler.transform(X)

In [11]:

dt_regressor = DecisionTreeRegressor(random_state=42, max_depth=10, min_samples_leaf=1, criterion='mse', splitter='random')

# Perform 5-fold cross-validation
cv_scores = cross_val_score(dt_regressor, X_scaled, y, cv=5, scoring='neg_mean_squared_error')

# Convert the scores from negative MSE to positive MSE
cv_scores = -cv_scores

# Print the cross-validation scores
print('CV Scores:', cv_scores)
print('Mean CV Score:', cv_scores.mean())
print('Standard Deviation:', cv_scores.std())

CV Scores: [10.81464286 16.89785714  7.176       4.50072727 59.95309091]
Mean CV Score: 19.86846363636364
Standard Deviation: 20.468705241600528


In [24]:

rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42, max_depth=15, min_samples_leaf=1, max_features='auto')

# Perform 5-fold cross-validation
cv_scores = cross_val_score(rf_regressor, X_scaled, y, cv=5, scoring='neg_mean_squared_error')

# Convert the scores from negative MSE to positive MSE
cv_scores = -cv_scores

# Print the cross-validation scores
print('CV Scores:', cv_scores)
print('Mean CV Score:', cv_scores.mean())
print('Standard Deviation:', cv_scores.std())

CV Scores: [ 8.4966212   6.75061104 11.26479407  0.95283924 64.19587884]
Mean CV Score: 18.33214887551951
Standard Deviation: 23.179039051800352


In [27]:
from sklearn.linear_model import Lasso
lasso_regressor = Lasso(alpha=0.01, fit_intercept=True, max_iter=1000, normalize=False, random_state=42)

# Perform 5-fold cross-validation
cv_scores = cross_val_score(lasso_regressor, X_scaled, y, cv=5, scoring='neg_mean_squared_error')

# Convert the scores from negative MSE to positive MSE
cv_scores = -cv_scores

# Print the cross-validation scores
print('CV Scores:', cv_scores)
print('Mean CV Score:', cv_scores.mean())
print('Standard Deviation:', cv_scores.std())

CV Scores: [0.03105434 0.0034206  0.0011875  0.00285309 0.01286097]
Mean CV Score: 0.010275300164671743
Standard Deviation: 0.011163500271523737


In [32]:
from sklearn.svm import SVR

svr_regressor = SVR(C=10, epsilon=0.01, kernel='linear')

# Perform 5-fold cross-validation
cv_scores = cross_val_score(svr_regressor, X_scaled, y, cv=5, scoring='neg_mean_squared_error')

# Convert the scores from negative MSE to positive MSE
cv_scores = -cv_scores

# Print the cross-validation scores
print('CV Scores:', cv_scores)
print('Mean CV Score:', cv_scores.mean())
print('Standard Deviation:', cv_scores.std())

CV Scores: [0.021287   0.00191872 0.00121423 0.00141208 0.00192051]
Mean CV Score: 0.005550507762689505
Standard Deviation: 0.00787316725950222


In [36]:
import xgboost as XGB

xgb_regressor = XGB.XGBRegressor(learning_rate=0.1, max_depth=5, min_child_weight=1)

# Perform 5-fold cross-validation
cv_scores = cross_val_score(xgb_regressor, X_scaled, y, cv=5, scoring='neg_mean_squared_error')

# Convert the scores from negative MSE to positive MSE
cv_scores = -cv_scores

# Print the cross-validation scores
print('CV Scores:', cv_scores)
print('Mean CV Score:', cv_scores.mean())
print('Standard Deviation:', cv_scores.std())


CV Scores: [ 7.35763556  5.57527565  4.85606994  0.41213436 62.3991253 ]
Mean CV Score: 16.120048163598266
Standard Deviation: 23.252272338257587


In [35]:
from sklearn.neural_network import MLPRegressor

mlp_regressor = MLPRegressor(hidden_layer_sizes=(50, 50), activation='relu', solver='lbfgs', max_iter=4000, learning_rate='constant', alpha=0.01, random_state=42)

# Perform 5-fold cross-validation
cv_scores = cross_val_score(mlp_regressor, X_scaled, y, cv=5, scoring='neg_mean_squared_error')

# Convert the scores from negative MSE to positive MSE
cv_scores = -cv_scores

# Print the cross-validation scores
print('CV Scores:', cv_scores)
print('Mean CV Score:', cv_scores.mean())
print('Standard Deviation:', cv_scores.std())

CV Scores: [0.02075719 0.08652317 0.00130235 0.0014936  0.001278  ]
Mean CV Score: 0.022270859793079033
Standard Deviation: 0.03299310077349042


In [5]:
print('Depth of tree:', inflation_tree.tree_.max_depth)
print('Number of nodes in tree:', inflation_tree.tree_.node_count)

Depth of tree: 11
Number of nodes in tree: 311


In [6]:
param_dt = {'max_depth': [2, 4, 6, 8, 10, 11], 'min_samples_leaf': [1, 2, 5, 10, 15, 20, 25], 'criterion': ['mse', 'friedman_mse'], 'splitter': ['best', 'random'], }
grid_dt = GridSearchCV(estimator=inflation_tree, param_grid=param_dt, scoring='neg_mean_squared_error')
grid_result = grid_dt.fit(X_train_scaled, y_train)
print ('Best Param:', grid_result.best_params_ )

Best Param: {'criterion': 'mse', 'max_depth': 10, 'min_samples_leaf': 1, 'splitter': 'random'}


In [None]:
(random_state=42, max_depth=10, min_samples_leaf=1, criterion='mse', splitter='random')

In [7]:
inflation_tree = DecisionTreeRegressor(random_state=42, max_depth=11, min_samples_leaf=1, criterion='mse', splitter='best')

inflation_tree.fit(X_train_scaled, y_train)

y_pred_decision = inflation_tree.predict(X_test_scaled)
mse_tree = mse(y_test, y_pred_decision)
mae_tree = mae(y_test, y_pred_decision)
rsq_tree = r2s(y_test, y_pred_decision)
print ('MSE of Decision Tree: ', mse_tree)
print ('MAE of Decision Tree', mae_tree)
print ('R-Squared of Decision Tree', rsq_tree)


MSE of Decision Tree:  0.17828571428571438
MAE of Decision Tree 0.254285714285713
R-Squared of Decision Tree 0.9989019762554627


In [8]:

inflation_random = RandomForestRegressor(random_state=42)
inflation_random.fit(X_train_scaled, y_train)
y_pred_rand = inflation_random.predict(X_test_scaled)
mse_rand = mse(y_test, y_pred_rand)
mae_rand = mae(y_test, y_pred_rand)
r2 = r2s(y_test, y_pred_rand)
print ('MSE of Random Forest:',mse_rand )
print ('MAE of Random Forest:',mae_rand )
print ('R-squared', r2)

MSE of Random Forest: 0.29725137142858754
MAE of Random Forest: 0.25228571428572116
R-squared 0.9981692921093959


In [9]:
grid = dict(max_depth = [1, 5, 10, 15], min_samples_leaf = [1, 5, 10, 15, 20], n_estimators = [100, 200, 300], max_features= ['auto', 'sqrt', 'log2'])

forest_est = RandomForestRegressor(random_state = 42)

grid_search = GridSearchCV(estimator=inflation_random, param_grid=grid, scoring='neg_mean_squared_error')

grid_result = grid_search.fit(X, y)

# Print out the best result
print("Best result is obtained using", grid_result.best_params_)

Best result is obtained using {'max_depth': 15, 'max_features': 'auto', 'min_samples_leaf': 1, 'n_estimators': 100}


In [10]:
inflation_random = RandomForestRegressor(n_estimators=200, random_state=42, max_depth=10, min_samples_leaf=1, max_features='auto')
inflation_random.fit(X_train, y_train)
y_pred_rand = inflation_random.predict(X_test)
mse_rand = mse(y_test, y_pred_rand)
mae_rand = mae(y_test, y_pred_rand)
r2 = r2s(y_test, y_pred_rand)
print ('MSE of Random Forest:',mse_rand )
print ('MAE of Random Forest:',mae_rand )
print ('R-squared', r2)

MSE of Random Forest: 0.2553169205601701
MAE of Random Forest: 0.24244551020409924
R-squared 0.9984275574614581


In [11]:
inflation_random = RandomForestRegressor(n_estimators=100, random_state=42, max_depth=15, min_samples_leaf=1, max_features='auto')
inflation_random.fit(X_train, y_train)
y_pred_rand = inflation_random.predict(X_test)
mse_rand = mse(y_test, y_pred_rand)
mae_rand = mae(y_test, y_pred_rand)
r2 = r2s(y_test, y_pred_rand)
print ('MSE of Random Forest:',mse_rand )
print ('MAE of Random Forest:',mae_rand )
print ('R-squared', r2)

MSE of Random Forest: 0.30137754285716073
MAE of Random Forest: 0.2608285714285783
R-squared 0.9981438798983236


In [12]:
from sklearn.linear_model import Lasso

inflation_lasso = Lasso(random_state=42, )
inflation_lasso.fit(X_train_scaled, y_train)
y_pred_lasso = inflation_lasso.predict(X_test_scaled)
mse_lasso = mse(y_test, y_pred_lasso)
mae_lasso = mae(y_test, y_pred_lasso)
r2_lasso = r2s(y_test, y_pred_lasso)
print ('MSE of LASSO:',mse_lasso )
print ('MAE of LASSO Forest:',mae_lasso )
print ('R-Squared of Lasso', r2_lasso)


MSE of LASSO: 22.591925512483385
MAE of LASSO Forest: 4.242300042507948
R-Squared of Lasso 0.8608611422013919


In [13]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV

# Define the parameter grid to search over
param_grid = {
    'alpha': [0.01, 0.1, 0.5, 1.0, 2.0, 5.0],
    'fit_intercept': [True, False],
    'normalize': [True, False],
    'max_iter': [1000, 5000, 10000],
}


# Create a GridSearchCV object
grid_search = GridSearchCV(inflation_lasso, param_grid, cv=5)

# Fit the GridSearchCV object to the training data
grid_search.fit(X_train, y_train)

# Print the best parameter combination found
print("Best parameter combination:", grid_search.best_params_)


Best parameter combination: {'alpha': 0.01, 'fit_intercept': False, 'max_iter': 5000, 'normalize': True}


In [14]:
#After manual tuning
from sklearn.linear_model import Lasso

inflation_lasso = Lasso(alpha=0.01, fit_intercept=True, max_iter=1000, normalize=False, random_state=42)
inflation_lasso.fit(X_train_scaled, y_train)
y_pred_lasso = inflation_lasso.predict(X_test_scaled)
mse_lasso = mse(y_test, y_pred_lasso)
mae_lasso = mae(y_test, y_pred_lasso)
r2_lasso = r2s(y_test, y_pred_lasso)
print ('MSE of LASSO:',mse_lasso )
print ('MAE of LASSO Forest:',mae_lasso )
print ('R-Squared of Lasso', r2_lasso)


MSE of LASSO: 0.011009331479272933
MAE of LASSO Forest: 0.06671267319508647
R-Squared of Lasso 0.9999321958720913


In [15]:
inflation_lasso = Lasso(alpha=0.01, fit_intercept=False, max_iter=5000, normalize=True, random_state=42)
inflation_lasso.fit(X_train_scaled, y_train)
y_pred_lasso = inflation_lasso.predict(X_test_scaled)
mse_lasso = mse(y_test, y_pred_lasso)
mae_lasso = mae(y_test, y_pred_lasso)
r2_lasso = r2s(y_test, y_pred_lasso)
print ('MSE of LASSO:',mse_lasso )
print ('MAE of LASSO Forest:',mae_lasso )
print ('R-Squared of Lasso', r2_lasso)

MSE of LASSO: 146.16507737840195
MAE of LASSO Forest: 9.483979663512144
R-Squared of Lasso 0.0998004173996373


In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
data = pd.read_csv('Initial Data.csv')

# Extract the X and Y columns
X = data[['Yield Curve', 'Production Index', 'Housing Starts', 'Cost of Living Index', 'Unemployment']]
y = data['Consumer Price Index']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Scale the X data
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create an SVR model with chosen hyperparameters
svr = SVR(kernel='linear', C=1, epsilon=0.01, )

# Train the SVR model on the training data
svr.fit(X_train_scaled, y_train)

# Evaluate the performance of the model on the testing data
y_pred = svr.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('MSE:', mse)
print('MAE:', mae)
print('R-squared:', r2)


MSE: 6.821893575349972
MAE: 2.242964061611064
R-squared: 0.957985410337274


In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
data = pd.read_csv('Initial Data.csv')

# Extract the X and Y columns
X = data[['Yield Curve', 'Production Index', 'Housing Starts', 'Cost of Living Index', 'Unemployment']]
y = data['Consumer Price Index']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Scale the X data
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create an SVR model with chosen hyperparameters
svr = SVR()

# Train the SVR model on the training data
svr.fit(X_train_scaled, y_train)

# Evaluate the performance of the model on the testing data
y_pred = svr.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('MSE:', mse)
print('MAE:', mae)
print('R-squared:', r2)


MSE: 10.676229437066187
MAE: 1.5744103005857342
R-squared: 0.9342473766280589


In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
data = inflation

# Extract the X and Y columns
X = data[['Yield Curve', 'Production Index', 'Housing Starts', 'Cost of Living Index', 'Unemployment']]
y = data['Consumer Price Index']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Scale the X data
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define a dictionary of hyperparameters and their possible values
param_grid = {'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'C': [0.1, 1, 2, 3, 4, 5, 10], 'epsilon': [0.01, 0.1, 1]}

# Create an SVR model
svr = SVR()

# Use GridSearchCV to find the best combination of hyperparameters
grid_search = GridSearchCV(svr, param_grid, cv=5)
grid_search.fit(X_train_scaled, y_train)

# Get the best hyperparameters and model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

print (best_params)
print (best_model)


{'C': 10, 'epsilon': 0.01, 'kernel': 'linear'}
SVR(C=10, epsilon=0.01, kernel='linear')


In [19]:
svr = SVR (C=10, epsilon=0.01, kernel='linear')

# Train the SVR model on the training data
svr.fit(X_train_scaled, y_train)

# Evaluate the performance of the model on the testing data
y_pred = svr.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('MSE:', mse)
print('MAE:', mae)
print('R-squared:', r2)



MSE: 0.0070528073078013195
MAE: 0.04232232962774946
R-squared: 0.9999565632618371


In [20]:
svr = SVR (C=20, epsilon=0.0341, kernel='linear')

# Train the SVR model on the training data
svr.fit(X_train_scaled, y_train)

# Evaluate the performance of the model on the testing data
y_pred = svr.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('MSE:', mse)
print('MAE:', mae)
print('R-squared:', r2)



MSE: 0.006657303867834006
MAE: 0.04092340885671218
R-squared: 0.999958999083293
