In [27]:
import pandas as pd
import numpy as np
import xgboost
from lightgbm import LGBMRegressor
from mlxtend.regressor import StackingRegressor
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingRegressor

In [2]:
train = pd.read_csv('../data/acc_data/training_data.csv',header = None)
test = pd.read_csv('../data/acc_data/validation_data.csv',header = None)

In [4]:
train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,6.791859,13.960164,-0.193163,48.522705,-0.589755,-16.461533,-1.630623,0.330035,0.0,0.0,0.0,0.0,0.0,0.0
1,6.800353,13.996486,-0.190413,48.528838,-0.60414,-16.450406,-1.691989,0.298628,-0.110716,0.610561,0.312486,-0.08451,-0.361382,-0.027366
2,6.800085,14.002038,-0.174075,48.533003,-0.599635,-16.425717,-1.718877,0.251782,-0.245641,0.267517,0.466097,0.002662,-0.055242,-0.162555
3,6.800586,14.011322,-0.175465,48.535982,-0.576815,-16.399336,-1.747659,0.225503,-0.262477,0.286367,0.261464,-0.004982,-0.092371,0.013831
4,6.813845,14.013416,-0.177808,48.531692,-0.565499,-16.385513,-1.769202,0.216323,-0.137533,0.214349,0.091333,-0.131924,-0.020837,0.023308


In [5]:
X = train.loc[:,:10]
y = train.loc[:,11]
X_train, X_test, y_train, y_test = train_test_split(X, y)

#### Baseline 

In [6]:
model_XGB = make_pipeline(StandardScaler(), xgboost.XGBRegressor(gamma=0.05,learning_rate=0.05, max_depth=3,
                                            n_estimators=2500,reg_alpha=0.5, reg_lambda=0.85))
print(' Extreme Gradient Boosting score is: ' ,-np.mean(cross_val_score(model_XGB,X_train,y_train, scoring='neg_median_absolute_error', cv = 5, n_jobs = -1)),'\n')

 Extreme Gradient Boosting score is:  0.06162930818793768 



#### Improved model

In [8]:
train.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
count,14357.0,14357.0,14357.0,14357.0,14357.0,14357.0,14357.0,14357.0,14357.0,14357.0,14357.0,14357.0,14357.0,14357.0
mean,0.840527,0.771505,-0.028302,23.721384,0.023528,-0.643423,-0.5122529,0.029318,-0.06926359,0.013354,0.001254,0.034509,0.016406,-0.00135
std,4.161684,3.559132,0.204225,15.39005,0.409618,5.627637,3.839009,0.184759,4.976719,6.039478,0.346368,1.2133,1.077456,0.23618
min,-19.144207,-18.422171,-1.398902,3.696221,-1.496821,-16.461533,-14.07972,-1.471033,-137.6873,-198.995666,-7.289127,-88.897156,-49.423364,-6.089816
25%,-0.08346,-0.078568,-0.031507,11.115929,-0.234398,-2.542259,-0.7390222,-0.007657,-0.3227395,-0.217211,-0.050771,-0.077659,-0.111728,-0.047309
50%,0.0,0.0,0.0,18.384793,0.055931,-0.009222,4.435016e-13,0.000111,-2.569414e-18,0.0,0.0,0.0,0.0,0.0
75%,1.541917,1.35456,0.006665,30.833862,0.277699,0.122885,0.663506,0.053991,0.17217,0.18702,0.052367,0.129667,0.163521,0.045713
max,20.270119,14.04448,1.459988,81.817653,1.576822,37.50263,23.28099,1.363321,137.6417,186.610716,7.753264,36.52789,66.786547,3.31584


In [7]:
# First we will try to see if RobustScaler helps.
model_XGB = make_pipeline(RobustScaler(), xgboost.XGBRegressor(gamma=0.05,learning_rate=0.05, max_depth=3,
                                            n_estimators=2500,reg_alpha=0.5, reg_lambda=0.85))
print(' Extreme Gradient Boosting score is: ' ,-np.mean(cross_val_score(model_XGB,X_train,y_train, scoring='neg_median_absolute_error', cv = 5, n_jobs = -1)),'\n')

 Extreme Gradient Boosting score is:  0.06033010548122959 



We can see a small increase in accuracy, so we will keep it. Now we will try three different models and see how they work, this models are:
* XGboost
* Gradient Boosting Regressor
* LightGBM Regressor

In [21]:
def boosting_models(X_train,y_train):
    
    categorical = X_train.dtypes == object
    
    model_GBC = make_pipeline(RobustScaler(), GradientBoostingRegressor(alpha = 0.85, n_estimators = 1000, max_depth = 3))
    model_LGB = make_pipeline(RobustScaler(), LGBMRegressor(objective='regression', num_leaves=5, learning_rate=0.05, n_estimators=800))
    model_XGB = make_pipeline(RobustScaler(), xgboost.XGBRegressor(gamma=0.05, learning_rate=0.05, max_depth=3, n_estimators=2500, reg_alpha=0.5, reg_lambda=0.85))

    print('Gradient Boosting score is: ' , -np.mean(cross_val_score(model_GBC,X_train,y_train, scoring='neg_median_absolute_error', cv = 5, n_jobs = -1)),'\n')
    print('Light Gradient Boosting score is: ' , -np.mean(cross_val_score(model_LGB,X_train,y_train, scoring='neg_median_absolute_error', cv = 5, n_jobs = -1)),'\n')
    print(' Extreme Gradient Boosting score is: ' , -np.mean(cross_val_score(model_XGB,X_train,y_train, scoring='neg_median_absolute_error', cv = 5, n_jobs = -1)),'\n')
    
    
    return model_GBC, model_LGB,model_XGB

In [22]:
model_GBC, model_LGB, model_XGB = boosting_models(X_train,y_train)

Gradient Boosting score is:  0.05699416235667115 

Light Gradient Boosting score is:  0.10865916634379383 

 Extreme Gradient Boosting score is:  0.06033010548122959 



In [28]:
def stacking_model(X_train,y_train):
    
    lasso = Lasso(alpha = 1e-3)
    GBC = GradientBoostingRegressor(alpha = 0.85,n_estimators = 1000,max_depth = 3)
    LGB = LGBMRegressor(objective='regression',num_leaves=5, learning_rate=0.05, n_estimators=800)
    XGB = xgboost.XGBRegressor(gamma=0.05,learning_rate=0.05, max_depth=3, n_estimators=2500,reg_alpha=0.5, reg_lambda=0.85)

    categorical = X_train.dtypes == object
    model_SR = make_pipeline(RobustScaler(), StackingRegressor(regressors=[LGB,lasso,XGB], meta_regressor=lasso))

    scores = cross_val_score(model_SR, X_train, y_train, scoring='neg_median_absolute_error',cv=5, n_jobs = -1)
    print("Stacking Regressor score: ", -scores.mean(), ' std: ', scores.std())
    
    return model_SR

In [29]:
model_SR = stacking_model(X_train,y_train)

Stacking Regressor score:  0.0600101442077855  std:  0.0033729054580565942


In [None]:
aux = [i if abs(np.mean((train.iloc[i,:10] - train.iloc[i+1,:10]))) else 0 for i in range(0,len(train))] 