In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.tree          import DecisionTreeRegressor
from sklearn.ensemble      import RandomForestRegressor
from sklearn.linear_model  import LinearRegression
from sklearn import metrics
import time


In [2]:
x = np.genfromtxt('x.csv', delimiter=',', skip_header = 1)
x.shape

(4330, 1560)

In [3]:
y = np.genfromtxt('y.csv', delimiter=',', skip_header = 1)
y.shape

(4330,)

In [6]:

tscv = TimeSeriesSplit(test_size=2)
for train_index, test_index in tscv.split(x):
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]

In [7]:
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [8]:
regressors = {
   "Decision Tree": DecisionTreeRegressor(),
   "Random Forest": RandomForestRegressor(n_estimators=100),
   'Linear Regression' : LinearRegression()
}

In [9]:
results = pd.DataFrame({'Model': [], 'MSE': [], 'MAB': [], " % error": [], 'Time': []})
rang = abs(y_train.max()) - abs(y_train.min())
for model_name, model in regressors.items():
    
    start_time = time.time()
    model.fit(x_train, y_train)
    total_time = time.time() - start_time
        
    pred = model.predict(x_test)
    
    results = results.append({"Model":    model_name,
                              "MSE": metrics.mean_squared_error(y_test, pred),
                              "MAB": metrics.mean_absolute_error(y_test, pred),
                              " % error": metrics.mean_squared_error(y_test, pred) / rang,
                              "Time":     total_time},
                              ignore_index=True)
### END SOLUTION


results_ord = results.sort_values(by=['MSE'], ascending=True, ignore_index=True)
results_ord.index += 1 
results_ord.style.bar(subset=['MSE', 'MAE'], vmin=0, vmax=100, color='#5fba7d')

print(results_ord)

               Model        MSE       MAB   % error        Time
1      Decision Tree   5.000000  2.000000  0.050000    2.767743
2      Random Forest  19.335200  3.340000  0.193352  144.555327
3  Linear Regression  21.463348  4.606609  0.214633    4.883895
