<a href="https://colab.research.google.com/github/jwc22-11/lgdacon/blob/main/HM/PolynomialFeatures3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import random
import os
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.multioutput import MultiOutputRegressor
from lightgbm import LGBMRegressor
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
import sklearn.metrics as metrics
from sklearn.metrics import make_scorer
from sklearn.metrics import mean_squared_error

from sklearn.preprocessing import PolynomialFeatures

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(42) # Seed 고정

In [None]:
def lg_nrmse(gt, preds):
    # 각 Y Feature별 NRMSE 총합
    # Y_01 ~ Y_08 까지 20% 가중치 부여
    all_nrmse = []
    for idx in range(14): # ignore 'ID'
        rmse = metrics.mean_squared_error(gt[:,idx], preds[:,idx], squared=False)
        nrmse = rmse/np.mean(np.abs(gt[:,idx]))
        all_nrmse.append(nrmse)
    score = 1.2 * np.sum(all_nrmse[:8]) + 1.0 * np.sum(all_nrmse[8:14])
    return score

In [None]:
%ls

[0m[01;34msample_data[0m/  sample_submission.csv  test.csv  train.csv


In [None]:
train_df = pd.read_csv('train.csv').drop(columns=['ID'])
test_df = pd.read_csv('test.csv').drop(columns=['ID'])

In [None]:
train_df

Unnamed: 0,X_01,X_02,X_03,X_04,X_05,X_06,X_07,X_08,X_09,X_10,...,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,70.544,103.320,67.47,1,101.892,74.983,29.45,62.38,245.71,0.0,...,29.632,16.083,4.276,-25.381,-25.529,-22.769,23.792,-25.470,-25.409,-25.304
1,69.524,103.321,65.17,1,101.944,72.943,28.73,61.23,233.61,0.0,...,33.179,16.736,3.229,-26.619,-26.523,-22.574,24.691,-26.253,-26.497,-26.438
2,72.583,103.320,64.07,1,103.153,72.943,28.81,105.77,272.20,0.0,...,31.801,17.080,2.839,-26.238,-26.216,-22.169,24.649,-26.285,-26.215,-26.370
3,71.563,103.320,67.57,1,101.971,77.022,28.92,115.21,255.36,0.0,...,34.503,17.143,3.144,-25.426,-25.079,-21.765,24.913,-25.254,-25.021,-25.345
4,69.524,103.320,63.57,1,101.981,70.904,29.68,103.38,241.46,0.0,...,32.602,17.569,3.138,-25.376,-25.242,-21.072,25.299,-25.072,-25.195,-24.974
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39602,66.465,103.320,62.27,1,103.150,66.825,30.20,77.83,298.05,0.0,...,29.194,16.582,3.410,-26.486,-26.581,-22.772,24.261,-26.491,-26.584,-26.580
39603,66.465,103.321,62.77,1,102.021,66.825,29.21,102.25,270.67,0.0,...,29.859,15.659,3.406,-27.308,-27.203,-24.674,23.427,-27.250,-27.334,-27.325
39604,68.504,103.320,64.67,1,103.144,68.864,29.96,102.61,198.07,0.0,...,24.720,16.823,3.215,-26.502,-26.687,-22.577,24.301,-26.388,-26.425,-26.601
39605,66.465,103.320,63.67,1,102.025,67.845,30.30,112.60,275.52,0.0,...,26.412,15.757,4.216,-26.760,-26.634,-24.066,23.305,-26.536,-26.751,-26.635


In [None]:
test_df

Unnamed: 0,X_01,X_02,X_03,X_04,X_05,X_06,X_07,X_08,X_09,X_10,...,X_47,X_48,X_49,X_50,X_51,X_52,X_53,X_54,X_55,X_56
0,68.504,103.321,76.67,1,101.867,73.963,30.51,63.57,239.80,0.0,...,1,1,17227.63,138.130429,129.460682,141.506570,133.427229,129.711498,133.138096,121.859684
1,67.485,103.320,69.37,1,101.992,67.845,28.03,116.99,189.23,0.0,...,1,1,17134.53,136.148839,128.266277,145.911745,131.196417,132.411480,133.629025,124.178623
2,69.524,103.320,68.97,1,101.884,77.022,29.65,205.68,214.93,0.0,...,1,1,14860.83,120.447446,119.988804,132.099908,120.450155,130.051708,128.252972,114.475628
3,69.524,103.320,65.87,1,101.866,73.963,28.15,103.38,180.80,0.0,...,1,1,15252.53,133.994695,125.069180,147.507669,123.142653,125.963665,139.666592,126.589253
4,73.603,103.321,66.67,1,101.891,74.983,29.92,71.20,231.93,0.0,...,1,1,10752.23,137.918202,135.116192,138.600473,127.173033,137.252712,134.411335,124.020016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,68.504,103.320,63.97,1,103.157,68.864,29.49,116.35,284.16,0.0,...,1,1,62123.53,127.741246,126.494312,139.119905,125.271109,128.284572,140.176945,128.292843
39604,68.504,103.320,61.37,1,103.137,68.864,32.29,116.28,272.41,0.0,...,1,1,61844.13,127.767377,124.062809,138.238664,119.879393,127.322529,137.312047,131.570614
39605,69.524,103.320,63.67,1,103.149,69.884,30.00,113.05,295.54,0.0,...,1,1,60277.53,128.593640,124.774037,138.659624,123.999571,126.075542,135.656132,127.671108
39606,67.485,103.321,61.77,1,103.148,67.845,32.05,115.05,267.26,0.0,...,1,1,60236.73,121.110646,125.471699,134.989984,120.889578,129.296909,132.673977,131.882893


In [None]:
train_x = train_df.filter(regex='X') # Input : X Featrue
train_y = train_df.filter(regex='Y') # Output : Y Feature

In [None]:
poly = PolynomialFeatures(include_bias=False)

poly.fit(train_x)
train_poly = poly.transform(train_x)

In [None]:
train_poly.shape

(39607, 1652)

In [None]:
train_input, val_input, train_target, val_target = train_test_split(
    train_poly, train_y, test_size=0.2, random_state=42)

In [None]:
print(f'train_input : {train_input.shape}')
print(f'val_input : {val_input.shape}')
print(f'train_target  : {train_target.shape}')
print(f'val_target  : {val_target.shape}')

train_input : (31685, 1652)
val_input : (7922, 1652)
train_target  : (31685, 14)
val_target  : (7922, 14)


In [None]:
poly.get_feature_names_out()

array(['X_01', 'X_02', 'X_03', ..., 'X_55^2', 'X_55 X_56', 'X_56^2'],
      dtype=object)

In [None]:
test_poly = poly.transform(test_df)

In [None]:
test_poly.shape

(39608, 1652)

In [None]:
#Regression Model Fit
lgbm = MultiOutputRegressor(LGBMRegressor(n_estimators=1000,
                                            learning_rate=0.01, 
                                            subsample=0.8, 
                                            colsample_bytree = 0.8, 
                                            max_depth=7)).fit(train_input, train_target)
print('Done.')

Done.


In [None]:
pred = lgbm.predict(val_input)
print('Done.')

Done.


In [None]:
val_target = val_target.to_numpy()

In [None]:
lg_nrmse(val_target, pred)

1.9342842996921956

In [None]:
preds = lgbm.predict(test_poly)

In [None]:
submit = pd.read_csv('sample_submission.csv')

In [None]:
for idx, col in enumerate(submit.columns):
    if col=='ID':
        continue
    submit[col] = preds[:,idx-1]
print('Done.')

Done.


In [None]:
submit.to_csv('submit_lgbm_pf.csv', index=False)

In [None]:
lgbm = pd.read_csv('submit_lgbm_pf.csv').drop(['ID'], axis=1)
rf = pd.read_csv('m0818.csv').drop(['ID'],axis=1)
xgb = pd.read_csv('m0819b.csv').drop(['ID'],axis=1)

In [None]:
lgbm

Unnamed: 0,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,1.485566,1.219847,1.137039,12.391696,31.142293,16.748061,3.262882,-26.121726,-26.198622,-22.193217,24.390115,-26.089774,-26.049972,-26.105262
1,1.503325,1.225551,1.165086,13.466374,30.742130,16.704805,3.198988,-26.101601,-26.164538,-22.279934,24.393268,-26.030689,-26.045008,-26.048865
2,1.500275,1.199047,1.053089,15.628127,31.892709,16.938427,3.121985,-25.926064,-26.138442,-22.310620,24.624132,-25.922341,-26.032021,-26.051523
3,1.446046,1.116497,1.007423,15.217888,32.438783,17.079224,3.067622,-25.637234,-25.747562,-21.830725,24.823450,-25.673040,-25.633919,-25.648983
4,1.329818,1.022184,1.004612,15.012274,31.566459,17.014805,3.099347,-25.727355,-25.648915,-21.866644,24.866550,-25.652889,-25.590279,-25.588001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,1.220686,0.946531,0.942489,13.040639,31.476130,16.722322,3.161358,-26.453836,-26.433538,-22.887078,24.416911,-26.338228,-26.396787,-26.419548
39604,1.192472,0.868487,0.901778,13.893704,31.197945,16.757917,3.175841,-26.358071,-26.377189,-22.787866,24.546632,-26.337682,-26.336597,-26.322168
39605,1.208239,0.940601,0.966392,13.005679,31.310212,16.653299,3.160484,-26.513758,-26.552152,-22.981710,24.242284,-26.440065,-26.461687,-26.455113
39606,1.167273,0.794061,0.895328,13.018794,31.391013,16.705228,3.187878,-26.436850,-26.454713,-22.897532,24.499967,-26.396604,-26.415812,-26.403379


In [None]:
rf

Unnamed: 0,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,1.438107,1.199771,1.121014,14.370218,31.244155,16.755128,3.141357,-26.090938,-26.205975,-22.341878,24.458312,-26.098112,-26.018970,-26.105003
1,1.464131,1.215498,1.164229,13.625130,30.923053,16.628905,3.151577,-26.182050,-26.247850,-22.248992,24.334064,-26.148851,-26.167335,-26.165044
2,1.339500,1.141190,1.025548,14.229651,31.756741,16.887573,3.091448,-25.854888,-25.927949,-22.307935,24.536325,-25.801750,-25.860793,-25.853513
3,1.476512,1.144012,1.037104,14.805853,31.785293,17.102236,3.102744,-25.670000,-25.696937,-21.824645,24.860282,-25.685292,-25.685004,-25.657764
4,1.413517,1.051658,1.044588,14.849921,31.728607,16.947440,3.148017,-25.690171,-25.679639,-22.254959,24.640117,-25.644878,-25.606978,-25.696756
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,1.256519,0.962741,0.972590,12.664860,30.662966,16.511296,3.195536,-26.643464,-26.605611,-23.055031,24.242662,-26.472262,-26.538601,-26.521781
39604,1.274067,0.880770,0.953750,14.160787,31.347877,16.709145,3.174681,-26.429520,-26.486183,-22.882203,24.348047,-26.417314,-26.405402,-26.414946
39605,1.263372,0.927841,0.957998,13.047354,31.127292,16.636129,3.170676,-26.560164,-26.567615,-22.848750,24.207950,-26.465291,-26.430005,-26.461016
39606,1.210503,0.885401,0.895156,13.586503,31.285840,16.655163,3.189133,-26.502781,-26.512278,-22.893862,24.408930,-26.398803,-26.445256,-26.467016


In [None]:
xgb

Unnamed: 0,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,1.416645,1.351831,1.128694,15.309586,31.924870,16.738285,3.122261,-26.043660,-26.075373,-21.905447,24.548798,-25.934320,-25.805878,-25.722555
1,1.449433,1.263718,1.066043,13.250424,30.685540,16.516954,3.157964,-26.366280,-25.999483,-22.483927,24.270473,-26.122278,-26.122625,-26.137680
2,1.503237,1.151469,1.166494,12.274327,30.922773,16.053535,3.047713,-25.806952,-25.862627,-22.050863,23.910475,-25.637207,-25.686895,-25.706427
3,1.518115,1.253980,1.042606,14.488544,32.532757,16.966760,3.044385,-25.687975,-25.787415,-21.602709,25.171537,-25.691093,-25.656834,-25.547110
4,1.399159,1.013148,0.961070,15.435185,30.787102,16.840288,3.220014,-25.678564,-25.666555,-22.171703,24.650885,-25.624279,-25.470665,-25.571980
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,1.286503,0.967967,1.022471,12.246459,30.342403,16.524420,3.205255,-26.655699,-26.544653,-23.139116,24.193184,-26.493078,-26.476768,-26.515831
39604,1.319660,0.842030,0.942301,14.094836,31.268484,16.944420,3.098076,-26.435293,-26.333910,-22.955954,24.474314,-26.345140,-26.399070,-26.386465
39605,1.255467,0.844903,1.048103,13.117178,30.843540,16.499327,3.135324,-26.535090,-26.547125,-22.884537,24.186214,-26.531273,-26.491938,-26.546083
39606,1.228233,0.856860,0.888184,13.464694,31.541695,16.865368,3.148652,-26.508875,-26.488552,-22.905367,24.461494,-26.419437,-26.460468,-26.524086


In [None]:
sum_ = lgbm + xgb + rf ;
div_ = sum_ / 3 ; div_

Unnamed: 0,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,1.446773,1.257150,1.128916,14.023833,31.437106,16.747158,3.175500,-26.085441,-26.159990,-22.146847,24.465741,-26.040736,-25.958273,-25.977607
1,1.472296,1.234922,1.131786,13.447309,30.783574,16.616888,3.169510,-26.216644,-26.137290,-22.337618,24.332602,-26.100606,-26.111656,-26.117196
2,1.447670,1.163902,1.081710,14.044035,31.524074,16.626512,3.087049,-25.862635,-25.976339,-22.223139,24.356977,-25.787099,-25.859903,-25.870487
3,1.480224,1.171496,1.029044,14.837428,32.252278,17.049407,3.071583,-25.665070,-25.743971,-21.752693,24.951756,-25.683142,-25.658586,-25.617952
4,1.380832,1.028997,1.003423,15.099127,31.360722,16.934178,3.155792,-25.698697,-25.665036,-22.097768,24.719184,-25.640682,-25.555974,-25.618912
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,1.254570,0.959080,0.979183,12.650653,30.827166,16.586013,3.187383,-26.584333,-26.527934,-23.027075,24.284252,-26.434523,-26.470719,-26.485720
39604,1.262066,0.863763,0.932610,14.049776,31.271435,16.803827,3.149533,-26.407628,-26.399094,-22.875341,24.456331,-26.366712,-26.380356,-26.374526
39605,1.242359,0.904448,0.990831,13.056737,31.093681,16.596252,3.155495,-26.536337,-26.555631,-22.904999,24.212149,-26.478876,-26.461210,-26.487404
39606,1.202003,0.845441,0.892889,13.356664,31.406182,16.741920,3.175221,-26.482835,-26.485181,-22.898920,24.456797,-26.404948,-26.440512,-26.464827


In [None]:
submit_ = pd.concat([submit[['ID']], div_], axis=1)
submit_.to_csv('assemble_a.csv', index=False)