In [27]:
import pandas as pd
import numpy as np 

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [28]:
train_features = pd.read_csv('../../Data/KAERI/train_features.csv')
train_target = pd.read_csv('../../Data/KAERI/train_target.csv', index_col = 'id')
test_features = pd.read_csv('../../Data/KAERI/test_features.csv')

In [29]:
def preprocessing_KAERI(data) :
    '''
    data: train_features.csv or test_features.csv
    
    return: Random Forest 모델 입력용 데이터
    '''
    
    # 충돌체 별로 0.000116 초 까지의 가속도 데이터만 활용해보기 
    _data = data.groupby('id').head(20)
    
    # string 형태로 변환
    _data['Time'] = _data['Time'].astype('str')
    
    # Random Forest 모델에 입력 할 수 있는 1차원 형태로 가속도 데이터 변환
    _data = _data.pivot_table(index = 'id', columns = 'Time', values = ['S1', 'S2', 'S3', 'S4'])
    
    # column 명 변환
    _data.columns = ['_'.join(col) for col in _data.columns.values]
    
    return _data

In [30]:
train_features = preprocessing_KAERI(train_features)
test_features = preprocessing_KAERI(test_features)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':


In [55]:
import sklearn
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [110]:
X_train, X_test, y_train, y_test = train_test_split(train_features, train_target, test_size=0.2, shuffle=123)

In [57]:
import lightgbm as lgb

In [60]:
train_data_X = lgb.Dataset(X_train, label=y_train.X)
test_data_X = lgb.Dataset(X_test, label=y_test.X)

train_data_Y = lgb.Dataset(X_train, label=y_train.Y)
test_data_Y = lgb.Dataset(X_test, label=y_test.Y)

train_data_M = lgb.Dataset(X_train, label=y_train.M)
test_data_M = lgb.Dataset(X_test, label=y_test.M)

train_data_V = lgb.Dataset(X_train, label=y_train.V)
test_data_V = lgb.Dataset(X_test, label=y_test.V)

In [74]:
params = {'learning_rate': 0.01, 
          'max_depth': 16, 
          'boosting': 'gbdt', 
          'objective': 'regression', 
          'metric': 'mse', 
          'is_training_metric': True, 
          'num_leaves': 144, 
          'feature_fraction': 0.9, 
          'bagging_fraction': 0.7, 
          'bagging_freq': 5, 
          'seed':2018}

In [89]:
params1 = {    
    'objective': 'binary',
    'boosting': 'gbdt',
    'metrics':'auc'
} 

In [90]:
model_X = lgb.train(params1, train_data_X, 5000, test_data_X, verbose_eval=10, early_stopping_rounds=500)

Training until validation scores don't improve for 500 rounds
[10]	valid_0's auc: 0.990869
[20]	valid_0's auc: 0.992102
[30]	valid_0's auc: 0.993489
[40]	valid_0's auc: 0.993502
[50]	valid_0's auc: 0.993502
[60]	valid_0's auc: 0.99345
[70]	valid_0's auc: 0.993334
[80]	valid_0's auc: 0.993074
[90]	valid_0's auc: 0.993113
[100]	valid_0's auc: 0.993204
[110]	valid_0's auc: 0.993113
[120]	valid_0's auc: 0.993035
[130]	valid_0's auc: 0.993009
[140]	valid_0's auc: 0.992945
[150]	valid_0's auc: 0.992893
[160]	valid_0's auc: 0.992737
[170]	valid_0's auc: 0.992335
[180]	valid_0's auc: 0.992102
[190]	valid_0's auc: 0.992011
[200]	valid_0's auc: 0.991985
[210]	valid_0's auc: 0.992037
[220]	valid_0's auc: 0.992011
[230]	valid_0's auc: 0.992063
[240]	valid_0's auc: 0.992037
[250]	valid_0's auc: 0.991972
[260]	valid_0's auc: 0.991868
[270]	valid_0's auc: 0.991894
[280]	valid_0's auc: 0.991894
[290]	valid_0's auc: 0.991855
[300]	valid_0's auc: 0.991803
[310]	valid_0's auc: 0.99179
[320]	valid_0's auc

In [76]:
model_Y = lgb.train(params, train_data_Y, 2000, test_data_Y, verbose_eval=100, early_stopping_rounds=100)

Training until validation scores don't improve for 100 rounds
[100]	valid_0's l2: 9631.1
[200]	valid_0's l2: 1309.23
[300]	valid_0's l2: 184.373
[400]	valid_0's l2: 30.3582
[500]	valid_0's l2: 8.52455
[600]	valid_0's l2: 5.12762
[700]	valid_0's l2: 4.29488
[800]	valid_0's l2: 4.00324
[900]	valid_0's l2: 3.78555
[1000]	valid_0's l2: 3.6628
[1100]	valid_0's l2: 3.57839
[1200]	valid_0's l2: 3.52502
[1300]	valid_0's l2: 3.4258
[1400]	valid_0's l2: 3.35416
[1500]	valid_0's l2: 3.2624
[1600]	valid_0's l2: 3.18965
[1700]	valid_0's l2: 3.1627
[1800]	valid_0's l2: 3.15466
[1900]	valid_0's l2: 3.13541
[2000]	valid_0's l2: 3.10194
Did not meet early stopping. Best iteration is:
[1970]	valid_0's l2: 3.10133


In [77]:
model_M = lgb.train(params, train_data_M, 2000, test_data_M, verbose_eval=100, early_stopping_rounds=100)

Training until validation scores don't improve for 100 rounds
[100]	valid_0's l2: 1279.01
[200]	valid_0's l2: 940.354
[300]	valid_0's l2: 794.82
[400]	valid_0's l2: 714.66
[500]	valid_0's l2: 652.092
[600]	valid_0's l2: 611.214
[700]	valid_0's l2: 579.729
[800]	valid_0's l2: 555.08
[900]	valid_0's l2: 532.334
[1000]	valid_0's l2: 513.306
[1100]	valid_0's l2: 495.94
[1200]	valid_0's l2: 480.888
[1300]	valid_0's l2: 466.765
[1400]	valid_0's l2: 453.782
[1500]	valid_0's l2: 440.452
[1600]	valid_0's l2: 430.042
[1700]	valid_0's l2: 420.387
[1800]	valid_0's l2: 412.806
[1900]	valid_0's l2: 403.801
[2000]	valid_0's l2: 396.395
Did not meet early stopping. Best iteration is:
[1998]	valid_0's l2: 396.366


In [78]:
model_V = lgb.train(params, train_data_V, 2000, test_data_V, verbose_eval=100, early_stopping_rounds=100)

Training until validation scores don't improve for 100 rounds
[100]	valid_0's l2: 0.0211927
[200]	valid_0's l2: 0.00748897
[300]	valid_0's l2: 0.00396722
[400]	valid_0's l2: 0.00266541
[500]	valid_0's l2: 0.00204731
[600]	valid_0's l2: 0.00174317
[700]	valid_0's l2: 0.00151635
[800]	valid_0's l2: 0.00138147
[900]	valid_0's l2: 0.00128213
[1000]	valid_0's l2: 0.00121045
[1100]	valid_0's l2: 0.00115406
[1200]	valid_0's l2: 0.0011024
[1300]	valid_0's l2: 0.00106721
[1400]	valid_0's l2: 0.00102903
[1500]	valid_0's l2: 0.00100398
[1600]	valid_0's l2: 0.000981972
[1700]	valid_0's l2: 0.000960625
[1800]	valid_0's l2: 0.000940758
[1900]	valid_0's l2: 0.000922538
[2000]	valid_0's l2: 0.000907647
Did not meet early stopping. Best iteration is:
[2000]	valid_0's l2: 0.000907647


In [115]:
y_train.V

2143    0.4
1630    0.6
491     0.8
2378    0.8
471     1.0
       ... 
2423    0.8
1678    1.0
2261    0.6
883     0.8
1901    0.8
Name: V, Length: 2240, dtype: float64

In [117]:
X = model_X.predict(test_features)
Y = model_Y.predict(test_features)
M = model_M.predict(test_features)
V = model_V.predict(test_features)

In [118]:
num = np.arange(2800, 3500, 1)

In [119]:
submit = pd.DataFrame({'id': num, 'X': X, 'Y': Y, 'M': M, 'V': V})

In [120]:
submit.to_csv('submission/07.csv', index = False)