In [1]:
import numpy as np
import pandas as pd
import math

import seaborn as sns
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
from scipy.stats import chi2_contingency
from sklearn.preprocessing import StandardScaler

import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [2]:
train = pd.read_csv('../data/train.csv', index_col='id')
test = pd.read_csv('../data/test.csv', index_col='id')
submission = pd.read_csv('../data/sample_submission.csv', index_col='id')

In [3]:
train_upper = train[train['Y18'].isnull()]
train_upper = train_upper.drop(['X04', 'X14', 'X16', 'X19', 'X36', 'Y18'], axis=1)

In [4]:
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.linear_model import Lasso

xgbr = XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=0.7, gamma=0,
             importance_type='gain', learning_rate=0.1, max_delta_step=0,
             max_depth=3, min_child_weight=1, missing=None, n_estimators=1500,
             n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
             silent=None, subsample=1, verbosity=1)

lgbmr = LGBMRegressor(colsample_bylevel= 0.5, colsample_bytree= 0.7, max_depth= 3, n_estimators= 1500)

lasso_reg = Lasso(alpha= 0.001)

In [5]:
columnList = train_upper.columns
target_var_list = ['Y00', 'Y01', 'Y02', 'Y03', 'Y04', 'Y05', 'Y06', 'Y07', 'Y08', 'Y09', 'Y10', 'Y11', 'Y12', 'Y13', 'Y14', 'Y15', 'Y16', 'Y17']

input_var = list(filter(lambda x: target_var_list.count(x) == 0, columnList))

In [6]:
X_features = train_upper[input_var]
X_target = train_upper['Y00']

In [7]:
from sklearn.metrics import mean_squared_error

def model_auto(model, features, target, test):
    model.fit(features, target)
    pred = model.predict(test)
    
    return pred

def model_list_auto(models, features, target, test):
    print("model_list_auto working")
    predList = []
    testPredList = []
    for model in models:
        model.fit(features, target)
        pred = model.predict(features)
        testPred = model.predict(test)
        predList.append(pred)
        testPredList.append(testPred)

    transposed = np.array(predList).T.tolist()
    testTransposed = np.array(testPredList).T.tolist()
    df = pd.DataFrame(transposed) 
    testDf = pd.DataFrame(testTransposed) 

    return df, testDf

def stack_model_pred(model, new_features, target, test):
    model.fit(new_features, target)
    testPred = model.predict(new_features)

    mse = mean_squared_error(target, testPred)
    rmse = np.sqrt(mse)

    print('스태킹 회귀 모델 최종 MSE :: ',mse)
    print('스태킹 회귀 모델 최종 RMSE :: ',rmse)

    pred = model.predict(test)
    
    return model, pred


In [8]:
models = [xgbr, lgbmr, lasso_reg]

X_features = train_upper[input_var]

X_test = test[input_var]

target_var_list = ['Y00', 'Y01', 'Y02', 'Y03', 'Y04', 'Y05', 'Y06', 'Y07', 'Y08', 'Y09', 'Y10', 'Y11', 'Y12', 'Y13', 'Y14', 'Y15', 'Y16', 'Y17']
# target_var_list = ['Y00']

for target_col in target_var_list:
    print(target_col)
    X_target = train_upper[target_col]
    df, testDf = model_list_auto(models, X_features, X_target, X_test)
    new_model, pred = stack_model_pred(lgbmr, df, X_target, testDf)

    print("pred :: ", pred)
    test[target_col] = pred

Y00
model_list_auto working
스태킹 회귀 모델 최종 MSE ::  0.00706108807218099
스태킹 회귀 모델 최종 RMSE ::  0.08403028068607762
pred ::  [22.00443911 21.97700646 22.02244313 ... 26.76287981 26.70915937
 26.68748062]
Y01
model_list_auto working
스태킹 회귀 모델 최종 MSE ::  0.0323480521479486
스태킹 회귀 모델 최종 RMSE ::  0.17985564252463307
pred ::  [21.00159679 20.94890849 20.59378287 ... 29.91451393 29.47359784
 29.53539374]
Y02
model_list_auto working
스태킹 회귀 모델 최종 MSE ::  0.030797651626318267
스태킹 회귀 모델 최종 RMSE ::  0.175492597069843
pred ::  [21.00944098 21.11095363 21.01047573 ... 29.64048289 29.59694748
 29.62760473]
Y03
model_list_auto working
스태킹 회귀 모델 최종 MSE ::  0.01157121600628213
스태킹 회귀 모델 최종 RMSE ::  0.10756958680910757
pred ::  [24.47054264 24.48290912 24.4926577  ... 28.51954787 28.77436748
 28.65263328]
Y04
model_list_auto working
스태킹 회귀 모델 최종 MSE ::  0.012751248141837035
스태킹 회귀 모델 최종 RMSE ::  0.11292142463605848
pred ::  [24.47551425 24.63788826 24.46165656 ... 28.59583784 28.69620366
 28.68406602]
Y05
mo

In [None]:
trainY18 = train[train['Y18'].notnull()]
trainY18 = trainY18.drop(['X04', 'X14', 'X16', 'X19', 'X36', 'Y18'], axis=1)