In [None]:
import pandas as pd
import matplotlib.pyplot as plt 

train_data=pd.read_csv('../data/final_data/train_data.csv',index_col=0)

test_data=pd.read_csv('../data/final_data/test_data.csv',index_col=0)

In [None]:
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.kernel_ridge import KernelRidge
import xgboost as xgb
import lightgbm as lgb
from mlxtend.regressor import StackingCVRegressor

In [None]:
kernel_ridge=KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5)

gradient_boosting = GradientBoostingRegressor(n_estimators=3000, learning_rate=0.05,
                                   max_depth=4, max_features='sqrt',
                                   min_samples_leaf=15, min_samples_split=10, 
                                   loss='huber', random_state =5)

random_forest=RandomForestRegressor(n_estimators=3000)

model_xgb = xgb.XGBRegressor(colsample_bytree=0.4603, gamma=0.0468, 
                             learning_rate=0.05, max_depth=3, 
                             min_child_weight=1.7817, n_estimators=2200,
                             reg_alpha=0.4640, reg_lambda=0.8571,
                             subsample=0.5213, silent=1,
                             random_state =7, nthread = -1)

model_lgb = lgb.LGBMRegressor(objective='regression',num_leaves=5,
                              learning_rate=0.05, n_estimators=720,
                              max_bin = 55, bagging_fraction = 0.8,
                              bagging_freq = 5, feature_fraction = 0.2319,
                              feature_fraction_seed=9, bagging_seed=9,
                              min_data_in_leaf =6, min_sum_hessian_in_leaf = 11)


stack_gen = StackingCVRegressor(regressors=(kernel_ridge, gradient_boosting,random_forest, model_xgb, model_lgb),
                                meta_regressor=model_lgb,
                                use_features_in_secondary=True)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,RobustScaler
ssc = StandardScaler()
rbs = RobustScaler()

train_data_ssc =ssc.fit_transform(train_data.iloc[:,4:])
test_data_ssc = ssc.transform(test_data.iloc[:,4:])
train_target = train_data.iloc[:,:4]

train_data_rbs =rbs.fit_transform(train_data.iloc[:,4:])
test_data_rbs = rbs.transform(test_data.iloc[:,4:])

In [None]:
import numpy as np
from sklearn.model_selection import cross_val_score, KFold
kfold = KFold()
for i in range(4):
    for model in [kernel_ridge, gradient_boosting,random_forest, model_xgb, model_lgb,stack_gen]:
        result=cross_val_score(model,train_data_ssc,train_target.iloc[:,i],scoring='neg_mean_absolute_error')
        print(str(model).split('(')[0])
        print(np.mean(result)*-1)

In [None]:
from sklearn.ensemble import VotingRegressor
vot_model = VotingRegressor(
    [('m1',kernel_ridge),
     ('m2',model_xgb),
     ('m3',model_lgb),
     ('m4',gradient_boosting)]   
)

In [None]:
import numpy as np
from sklearn.model_selection import cross_val_score, KFold
kfold = KFold()
for i in range(4):
    for model in [vot_model]:
        result=cross_val_score(model,train_data_ssc,train_target.iloc[:,i],scoring='neg_mean_absolute_error')
        print(str(model).split('(')[0])
        print(np.mean(result)*-1)

In [None]:
vot_model1 = VotingRegressor(
    [('m4',kernel_ridge),
     ('m1',model_xgb),
     ('m2',gradient_boosting),
     ('m3',model_lgb)]
    
)
vot_model2 = VotingRegressor(
    [('m1',kernel_ridge),
     ('m2',model_xgb),
     ('m3',model_lgb),
     ('m4',gradient_boosting)]
    
)
vot_model3 = VotingRegressor(
    [('m1',kernel_ridge),
     ('m2',model_xgb),
     ('m3',model_lgb),
     ('m4',gradient_boosting)]    
)
vot_model4 = VotingRegressor(
    [('m1',kernel_ridge),
     ('m2',model_xgb),
     ('m3',model_lgb),
     ('m4',gradient_boosting)]    
)

In [None]:
train_target_a = train_target.iloc[:,0]
train_target_b = train_target.iloc[:,1]
train_target_c = train_target.iloc[:,2]
train_target_d = train_target.iloc[:,3]

vot_model1.fit(train_data_ssc,train_target_a)
vot_model2.fit(train_data_ssc,train_target_b)
vot_model3.fit(train_data_ssc,train_target_c)
vot_model4.fit(train_data_ssc,train_target_d)

In [None]:
sample_submission_raw=pd.read_csv('../data/raw_data/sample_submission.csv')

sample_submission_raw.iloc[:,1] =vot_model1.predict(test_data_ssc)
sample_submission_raw.iloc[:,2] =vot_model2.predict(test_data_ssc)
sample_submission_raw.iloc[:,3] =vot_model3.predict(test_data_ssc)
sample_submission_raw.iloc[:,4] =vot_model4.predict(test_data_ssc)

In [None]:
sample_submission_raw.to_csv('voting_withboosts.csv',index=False)

In [None]:
1.8989104582