In [1]:
!pip install lightgbm

Collecting lightgbm
  Downloading lightgbm-3.3.2-py3-none-manylinux1_x86_64.whl (2.0 MB)
[K     |████████████████████████████████| 2.0 MB 2.6 MB/s eta 0:00:01
Installing collected packages: lightgbm
Successfully installed lightgbm-3.3.2
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [2]:
import pandas as pd
import random
import os
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.multioutput import MultiOutputRegressor
from lightgbm import LGBMRegressor
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
import sklearn.metrics as metrics
from sklearn.metrics import make_scorer
from sklearn.metrics import mean_squared_error

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(42) # Seed 고정

In [4]:
# train csv
zip_dir = '/Data1/Radar'
meta_dir = os.path.join(zip_dir, 'meta')
x_feature = pd.read_csv(meta_dir+'/x_feature_info.csv')
train = pd.read_csv(os.path.join(zip_dir,'train.csv'))
test = pd.read_csv(os.path.join(zip_dir,'test.csv'))
submission = pd.read_csv(zip_dir+'/sample_submission.csv')

In [6]:
train_x = train.filter(regex='X') # Input : X Featrue
train_y = train.filter(regex='Y') # Output : Y Feature

In [7]:
def lg_nrmse(gt, preds):
    # 각 Y Feature별 NRMSE 총합
    # Y_01 ~ Y_08 까지 20% 가중치 부여
    all_nrmse = []
    for idx in range(14): # ignore 'ID'
        rmse = metrics.mean_squared_error(gt[:,idx], preds[:,idx], squared=False)
        nrmse = rmse/np.mean(np.abs(gt[:,idx]))
        all_nrmse.append(nrmse)
    score = 1.2 * np.sum(all_nrmse[:8]) + 1.0 * np.sum(all_nrmse[8:14])
    return score


In [8]:
train_x = train_x.to_numpy()
train_y = train_y.to_numpy()

In [9]:
train_input, val_input, train_target, val_target = train_test_split(
    train_x, train_y, test_size=0.1, random_state=42)

In [10]:
print(train_input.shape)
print(val_input.shape)
print(train_target.shape)
print(val_target.shape)

(35646, 56)
(3961, 56)
(35646, 14)
(3961, 14)


In [11]:
#Regression Model Fit
lgbm = MultiOutputRegressor(LGBMRegressor(n_estimators=1000,
                                            learning_rate=0.01, 
                                            subsample=0.8, 
                                            colsample_bytree = 0.8, 
                                            max_depth=7)).fit(train_input, train_target)
print('Done.')

Done.


In [12]:
pred = lgbm.predict(val_input)
print('Done.')

Done.


In [13]:
lg_nrmse(val_target, pred)

1.9319556929357877

In [14]:
test_x = test.drop(columns=['ID'])
preds = lgbm.predict(test_x)

In [16]:
for idx, col in enumerate(submission.columns):
    if col=='ID':
        continue
    submission[col] = preds[:,idx-1]
print('Done.')

Done.


In [18]:
submission.to_csv('submit_lgbm_cv.csv', index=False)

In [24]:
lgbm = pd.read_csv('submit_lgbm_cv.csv').drop(['ID'], axis=1)
rf = pd.read_csv('m0818.csv').drop(['ID'],axis=1)
xgb = pd.read_csv('m0819b.csv').drop(['ID'],axis=1)

In [25]:
lgbm

Unnamed: 0,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,1.477964,1.227491,1.112148,13.956719,30.760950,16.605729,3.163129,-26.099476,-26.203594,-22.161177,24.507797,-26.047327,-26.005118,-26.101966
1,1.508476,1.247797,1.152528,13.531251,31.281792,16.524662,3.128987,-26.176329,-26.185211,-22.317694,24.329813,-26.113116,-26.147701,-26.106472
2,1.440670,1.149246,1.124144,14.858978,32.018522,16.820061,3.033383,-25.972865,-25.946134,-22.156501,24.625912,-25.921987,-25.836796,-25.952562
3,1.435722,1.138737,1.040493,15.182287,32.531971,17.168880,3.086526,-25.661874,-25.688992,-21.762365,24.825684,-25.668129,-25.577033,-25.696524
4,1.357759,0.997454,0.979350,14.952485,31.654701,16.980702,3.102281,-25.744126,-25.687231,-21.943312,24.735232,-25.651149,-25.708456,-25.639153
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,1.256066,0.965677,0.974071,13.112978,30.928482,16.742145,3.185256,-26.495789,-26.445445,-22.930987,24.329692,-26.401532,-26.423824,-26.449650
39604,1.222782,0.888287,0.928781,13.722461,31.332855,16.737509,3.238470,-26.369704,-26.387233,-22.763891,24.442407,-26.265619,-26.305457,-26.324807
39605,1.208889,0.916278,0.940126,13.082896,31.142432,16.636401,3.172073,-26.566547,-26.536115,-22.874651,24.225258,-26.493664,-26.500992,-26.514059
39606,1.220573,0.885381,0.936945,12.952038,31.307722,16.760299,3.205706,-26.539855,-26.523410,-22.780509,24.453523,-26.483325,-26.516758,-26.528424


In [26]:
rf

Unnamed: 0,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,1.438107,1.199771,1.121014,14.370218,31.244155,16.755128,3.141357,-26.090938,-26.205975,-22.341878,24.458312,-26.098112,-26.018970,-26.105003
1,1.464131,1.215498,1.164229,13.625130,30.923053,16.628905,3.151577,-26.182050,-26.247850,-22.248992,24.334064,-26.148851,-26.167335,-26.165044
2,1.339500,1.141190,1.025548,14.229651,31.756741,16.887573,3.091448,-25.854888,-25.927949,-22.307935,24.536325,-25.801750,-25.860793,-25.853513
3,1.476512,1.144012,1.037104,14.805853,31.785293,17.102236,3.102744,-25.670000,-25.696937,-21.824645,24.860282,-25.685292,-25.685004,-25.657764
4,1.413517,1.051658,1.044588,14.849921,31.728607,16.947440,3.148017,-25.690171,-25.679639,-22.254959,24.640117,-25.644878,-25.606978,-25.696756
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,1.256519,0.962741,0.972590,12.664860,30.662966,16.511296,3.195536,-26.643464,-26.605611,-23.055031,24.242662,-26.472262,-26.538601,-26.521781
39604,1.274067,0.880770,0.953750,14.160787,31.347877,16.709145,3.174681,-26.429520,-26.486183,-22.882203,24.348047,-26.417314,-26.405402,-26.414946
39605,1.263372,0.927841,0.957998,13.047354,31.127292,16.636129,3.170676,-26.560164,-26.567615,-22.848750,24.207950,-26.465291,-26.430005,-26.461016
39606,1.210503,0.885401,0.895156,13.586503,31.285840,16.655163,3.189133,-26.502781,-26.512278,-22.893862,24.408930,-26.398803,-26.445256,-26.467016


In [27]:
xgb

Unnamed: 0,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,1.416645,1.351831,1.128694,15.309586,31.924870,16.738285,3.122261,-26.043660,-26.075373,-21.905447,24.548798,-25.934320,-25.805878,-25.722555
1,1.449433,1.263718,1.066043,13.250424,30.685540,16.516954,3.157964,-26.366280,-25.999483,-22.483927,24.270473,-26.122278,-26.122625,-26.137680
2,1.503237,1.151469,1.166494,12.274327,30.922773,16.053535,3.047713,-25.806952,-25.862627,-22.050863,23.910475,-25.637207,-25.686895,-25.706427
3,1.518115,1.253980,1.042606,14.488544,32.532757,16.966760,3.044385,-25.687975,-25.787415,-21.602709,25.171537,-25.691093,-25.656834,-25.547110
4,1.399159,1.013148,0.961070,15.435185,30.787102,16.840288,3.220014,-25.678564,-25.666555,-22.171703,24.650885,-25.624279,-25.470665,-25.571980
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,1.286503,0.967967,1.022471,12.246459,30.342403,16.524420,3.205255,-26.655699,-26.544653,-23.139116,24.193184,-26.493078,-26.476768,-26.515831
39604,1.319660,0.842030,0.942301,14.094836,31.268484,16.944420,3.098076,-26.435293,-26.333910,-22.955954,24.474314,-26.345140,-26.399070,-26.386465
39605,1.255467,0.844903,1.048103,13.117178,30.843540,16.499327,3.135324,-26.535090,-26.547125,-22.884537,24.186214,-26.531273,-26.491938,-26.546083
39606,1.228233,0.856860,0.888184,13.464694,31.541695,16.865368,3.148652,-26.508875,-26.488552,-22.905367,24.461494,-26.419437,-26.460468,-26.524086


In [30]:
sum_ = lgbm + xgb + rf ;
div_ = sum_ / 3 ; div_

Unnamed: 0,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,1.444239,1.259698,1.120619,14.545508,31.309992,16.699714,3.142249,-26.078025,-26.161647,-22.136167,24.504969,-26.026587,-25.943322,-25.976508
1,1.474013,1.242338,1.127600,13.468935,30.963461,16.556840,3.146176,-26.241553,-26.144181,-22.350204,24.311450,-26.128082,-26.145887,-26.136399
2,1.427802,1.147302,1.105395,13.787652,31.566012,16.587056,3.057515,-25.878235,-25.912237,-22.171766,24.357571,-25.786981,-25.794828,-25.837501
3,1.476783,1.178910,1.040068,14.825561,32.283340,17.079292,3.077885,-25.673283,-25.724448,-21.729906,24.952501,-25.681505,-25.639624,-25.633799
4,1.390145,1.020753,0.995003,15.079197,31.390136,16.922810,3.156771,-25.704287,-25.677808,-22.123324,24.675411,-25.640102,-25.595366,-25.635963
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,1.266363,0.965462,0.989711,12.674766,30.644617,16.592621,3.195349,-26.598317,-26.531903,-23.041711,24.255179,-26.455624,-26.479731,-26.495754
39604,1.272170,0.870363,0.941611,13.992695,31.316405,16.797025,3.170409,-26.411506,-26.402442,-22.867349,24.421589,-26.342691,-26.369976,-26.375406
39605,1.242576,0.896340,0.982076,13.082476,31.037754,16.590619,3.159358,-26.553934,-26.550285,-22.869313,24.206474,-26.496742,-26.474312,-26.507053
39606,1.219769,0.875881,0.906761,13.334412,31.378419,16.760277,3.181164,-26.517170,-26.508080,-22.859913,24.441316,-26.433855,-26.474161,-26.506509


In [34]:
submit_ = pd.concat([submission[['ID']], div_], axis=1)
submit_.to_csv('m0823a.csv', index=False)