## Appendix: Simple models with dummy
*Tim Braams (8460701), Vinh Phan (8462380), Maximilian Pintilie (8462780), Rahul Singh (8464147), Kartik Vijay (8463465), Diego Zucchino (8345420)*   
  
This notebook is the same as the 90_appendix notebook with simple models but with an added dummy variables for the asset type to capute crypto specific details.

In [24]:
import os
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error

import scripts
import scripts.ResultStore as rs
import scripts.FitPredict as fp
pd.set_option("display.precision", 8)

### Load data

In [2]:
train = pd.read_json("../02_Pool_Data/pooled_train_300.json")
test = pd.read_json("../02_Pool_Data/pooled_test_300.json")

In [3]:
train = train.loc[~(train==0).any(axis=1)]
y_train = train["returns"]
X_train = train.drop(["index", "returns", "asset"], axis=1)
X_train = pd.concat([X_train, pd.get_dummies(train["asset"])], axis=1)

y_test = test["returns"]
X_test = test.drop(["index", "returns", "asset"], axis=1)
X_test = pd.concat([X_test, pd.get_dummies(test["asset"])], axis=1)

### Helper functions

In [4]:
results = rs.ResultStore(load_if_exists=False)

### Baseline

In [6]:
from sklearn.dummy import DummyRegressor
dummy_model, dummy_predictions, dummy_results = fp.fit_predict(DummyRegressor(strategy="mean"), 
                                                            X_fit=X_train, y_fit=y_train,
                                                            X_validate=X_test, y_validate=y_test,
                                                            store=results)

### Linear Regression
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html#sklearn.linear_model.LinearRegression

In [8]:
from sklearn.linear_model import LinearRegression
linear_model, linear_predictions, linear_results = fp.fit_predict(LinearRegression(), 
                                                               X_fit=X_train, y_fit=y_train,
                                                               X_validate=X_test, y_validate=y_test, 
                                                               store=results)

### Support Vector Machine
https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html#sklearn.svm.SVR

In [9]:
from sklearn.svm import SVR

In [10]:
svr_model, svr_predictions, svr_results = fp.fit_predict(SVR(kernel="rbf", C=1, gamma=0.1, epsilon=0.1), store=results, X_fit=X_train, y_fit=y_train,
                                                               X_validate=X_test, y_validate=y_test)

### ElasticNet
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html#sklearn.linear_model.ElasticNet

In [11]:
from sklearn.linear_model import ElasticNet

In [12]:
elastic_model, elastic_predictions, elastic_results = fp.fit_predict(ElasticNet(), 
                                                                     X_fit=X_train, y_fit=y_train,
                                                               X_validate=X_test, y_validate=y_test, store=results)

### PLS Regression
https://scikit-learn.org/stable/modules/generated/sklearn.cross_decomposition.PLSRegression.html#sklearn.cross_decomposition.PLSRegression

In [13]:
from sklearn.cross_decomposition import PLSRegression

In [14]:
pls_model, pls_predictions, pls_results = fp.fit_predict(PLSRegression(), X_fit=X_train, y_fit=y_train,
                                                               X_validate=X_test, y_validate=y_test, store=results)

### Gradient Boosting
https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html#sklearn.ensemble.GradientBoostingRegressor

In [15]:
from sklearn.ensemble import GradientBoostingRegressor

In [16]:
gbr_model, gbr_predictions, gbr_results = fp.fit_predict(GradientBoostingRegressor(), X_fit=X_train, y_fit=y_train,
                                                               X_validate=X_test, y_validate=y_test, store=results)

### XGBoost
https://xgboost.readthedocs.io/en/stable/python/python_api.html#module-xgboost.sklearn

In [17]:
from xgboost import XGBRegressor

In [18]:
xgb_model, xgb_predictions, xgb_results = fp.fit_predict(XGBRegressor(), X_fit=X_train, y_fit=y_train,
                                                               X_validate=X_test, y_validate=y_test, store=results)

### MLP
https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html#sklearn.neural_network.MLPRegressor

In [19]:
from sklearn.neural_network import MLPRegressor

In [20]:
mlp_model, mlp_predictions, mlp_results = fp.fit_predict(MLPRegressor(), X_fit=X_train, y_fit=y_train,
                                                               X_validate=X_test, y_validate=y_test, store=results)

### Results
The results are very similar or worse than for the same models without dummies. As we try to keep our data sparse, we will not use dummies for further analysis.

In [25]:
# results with dummy
results.get_df().sort_values("mse")

Unnamed: 0,model,predictions,truth,meta,mae,mse,train_mae,train_mse
5,GradientBoostingRegressor,"[0.001105356449188105, -0.000500846874069192, ...","[0.0, 0.006618656200000001, -0.003158927, -0.0...","{'alpha': 0.9, 'ccp_alpha': 0.0, 'criterion': ...",0.00573692,7.544e-05,0.00733018,0.00014625
6,XGBRegressor,"[0.00086879876, -0.00023733264, -0.00050619536...","[0.0, 0.006618656200000001, -0.003158927, -0.0...","{'objective': 'reg:squarederror', 'base_score'...",0.00576461,7.606e-05,0.00673906,0.00010763
4,PLSRegression,"[[0.001723354516958207], [-0.00080048045726182...","[0.0, 0.006618656200000001, -0.003158927, -0.0...","{'copy': True, 'max_iter': 500, 'n_components'...",0.00578427,7.615e-05,0.00739002,0.00015421
1,LinearRegression,"[0.0015773773193359375, -0.0008907318115234375...","[0.0, 0.006618656200000001, -0.003158927, -0.0...","{'copy_X': True, 'fit_intercept': True, 'n_job...",0.00578664,7.623e-05,0.00739102,0.0001542
7,MLPRegressor,"[0.0010900525827550397, 4.730280171024066e-05,...","[0.0, 0.006618656200000001, -0.003158927, -0.0...","{'activation': 'relu', 'alpha': 0.0001, 'batch...",0.0058045,7.675e-05,0.00739775,0.00015091
0,DummyRegressor,"[0.00016026363705720398, 0.0001602636370572039...","[0.0, 0.006618656200000001, -0.003158927, -0.0...","{'constant': None, 'quantile': None, 'strategy...",0.00580505,7.749e-05,0.0074441,0.00015741
3,ElasticNet,"[0.00016026363705720398, 0.0001602636370572039...","[0.0, 0.006618656200000001, -0.003158927, -0.0...","{'alpha': 1.0, 'copy_X': True, 'fit_intercept'...",0.00580505,7.749e-05,0.0074441,0.00015741
2,SVR,"[0.008887106052815175, 0.022087976711780644, 0...","[0.0, 0.006618656200000001, -0.003158927, -0.0...","{'C': 1, 'cache_size': 200, 'coef0': 0.0, 'deg...",0.00994051,0.00016555,0.01093575,0.00023965


In [26]:
# results without dummy
rs.ResultStore(load_if_exists=True, path="results", name="simple.json").get_df().sort_values("mse")

Unnamed: 0,model,predictions,truth,meta,mae,mse,train_mae,train_mse
5,GradientBoostingRegressor,"[0.0011792028000000001, -0.0005076767, 2.86101...","[0.0, 0.006618656200000001, -0.003158927, -0.0...","{'alpha': 0.9, 'ccp_alpha': 0.0, 'criterion': ...",0.00573726,7.544e-05,0.00733144,0.00014627
6,XGBRegressor,"[0.0011033476, -0.00040828990000000003, 0.0003...","[0.0, 0.006618656200000001, -0.003158927, -0.0...","{'objective': 'reg:squarederror', 'base_score'...",0.0057615,7.602e-05,0.00674717,0.00010793
4,PLSRegression,"[[0.0012872598], [-0.0012361877], [0.001067160...","[0.0, 0.006618656200000001, -0.003158927, -0.0...","{'copy': True, 'max_iter': 500, 'n_components'...",0.00577798,7.611e-05,0.00738981,0.00015421
1,LinearRegression,"[0.001156612, -0.0013134642000000001, 0.000941...","[0.0, 0.006618656200000001, -0.003158927, -0.0...","{'copy_X': True, 'fit_intercept': True, 'n_job...",0.00578076,7.618e-05,0.00739078,0.0001542
0,DummyRegressor,"[0.0001602636, 0.0001602636, 0.0001602636, 0.0...","[0.0, 0.006618656200000001, -0.003158927, -0.0...","{'constant': None, 'quantile': None, 'strategy...",0.00580505,7.749e-05,0.0074441,0.00015741
3,ElasticNet,"[0.0001602636, 0.0001602636, 0.0001602636, 0.0...","[0.0, 0.006618656200000001, -0.003158927, -0.0...","{'alpha': 1.0, 'copy_X': True, 'fit_intercept'...",0.00580505,7.749e-05,0.0074441,0.00015741
7,MLPRegressor,"[-0.0007679914, -0.0033770513, -0.000556813900...","[0.0, 0.006618656200000001, -0.003158927, -0.0...","{'activation': 'relu', 'alpha': 0.0001, 'batch...",0.00607021,7.944e-05,0.00757301,0.00014817
2,SVR,"[-0.0013342614000000001, 0.0107576862, 0.00588...","[0.0, 0.006618656200000001, -0.003158927, -0.0...","{'C': 1, 'cache_size': 200, 'coef0': 0.0, 'deg...",0.00768159,0.00011426,0.0097136,0.00021763


In [27]:
results.save(path="results", name="simple_dummy.json")