In [34]:
import pandas as pd
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.kernel_ridge import KernelRidge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, max_error
import random
import numpy as np

In [21]:
mbd = pd.read_csv("MBD_CFIP_20monthlag.csv")
mbd = mbd.to_numpy()

In [22]:
# train/test split
X = mbd[:,2:-1]
y = mbd[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [23]:
# SMAPE method for evaluation
def smape(y_test, y_pred):
    numerator = np.abs(y_test-y_pred)
    denominator = (y_test + np.abs(y_pred)) /200
    return np.mean(numerator/denominator)

In [8]:
# baseline with no model. predictions made by averaging the predictors

y_pred = []
for obs in X_test:
    pred = np.mean(obs)
    y_pred.append(pred)

print("SMAPE: %.8f" % smape(y_test, y_pred))

SMAPE: 6.54930402


In [9]:
# linear regression
regr = LinearRegression()
regr.fit(X_train, y_train)
y_pred = regr.predict(X_test)
print("Coefficients: \n", regr.coef_)
# The mean squared error
print("Mean squared error: %.8f" % mean_squared_error(y_test, y_pred))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.8f" % r2_score(y_test, y_pred))
# The SMAPE
print("SMAPE: %.8f" % smape(y_test, y_pred))

Coefficients: 
 [-0.01596354 -0.01583519  0.07535571  0.87028049 -0.84683723 -0.02839363
 -0.00253333 -0.0766711   0.03250409 -0.00720642  0.01937076 -0.01103107
 -0.03394624  0.01858472 -0.0033079   0.03385775 -0.07751818  0.06346416
  0.04346912  0.96908421]
Mean squared error: 0.35632800
Coefficient of determination: 0.98943040
SMAPE: 2.95944509


In [17]:
# ridge regression

clf = Ridge(alpha=0.5)
clf.fit(X_train, y_train)
y_pred_ridge = clf.predict(X_test)

print("Coefficients: \n", clf.coef_)
# The mean squared error
print("Mean squared error: %.8f" % mean_squared_error(y_test, y_pred_ridge))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.8f" % r2_score(y_test, y_pred_ridge))
# The SMAPE
print("SMAPE: %.8f" % smape(y_test, y_pred_ridge))

Coefficients: 
 [-0.01759878 -0.00606177  0.02808749  0.95683365 -0.96293702  0.0310421
 -0.00918779 -0.07774767  0.07342838 -0.02727953  0.01182001 -0.03078566
  0.011391   -0.00430099 -0.0127546   0.0416841  -0.07653172  0.06523247
  0.01478216  0.99394433]
Mean squared error: 0.49301848
Coefficient of determination: 0.98762319
SMAPE: 2.97157731


In [33]:
# kernel

kern = KernelRidge(alpha=0.5)
kern.fit(X_train, y_train)
y_pred_kernel = kern.predict(X_test)


# The mean squared error
print("Mean squared error: %.8f" % mean_squared_error(y_test, y_pred_kernel))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.8f" % r2_score(y_test, y_pred_kernel))
# The SMAPE
print("SMAPE: %.8f" % smape(y_test, y_pred_kernel))

Mean squared error: 1.46180896
Coefficient of determination: 0.96284832
SMAPE: 3.35358495


In [18]:
# decision tree regressor

tree = DecisionTreeRegressor(random_state=42)
tree.fit(X_train,y_train)
y_pred_tree = tree.predict(X_test)

# The SMAPE
print("SMAPE: %.8f" % smape(y_test, y_pred_tree))

SMAPE: nan


  return np.mean(numerator/denominator)


In [25]:
# gradient boosting regressor

gbr = GradientBoostingRegressor()
gbr.fit(X_train,y_train)
y_pred_gbr = gbr.predict(X_test)

# The SMAPE
print("SMAPE: %.8f" % smape(y_test, y_pred_gbr))

SMAPE: 2.15732004


In [32]:
# multi-layer perceptron regressor
nn = MLPRegressor()
nn.fit(X_train,y_train)
y_pred_nn = nn.predict(X_test)

# The SMAPE
print("SMAPE: %.8f" % smape(y_test, y_pred_nn))

SMAPE: 4.44827606
