In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split as tts
from sklearn.model_selection import RandomizedSearchCV
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import make_scorer
from sklearn  import metrics
from sklearn.ensemble import RandomForestRegressor
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
# train csv
zip_dir = '/Data1/Radar'
meta_dir = os.path.join(zip_dir, 'meta')
x_feature = pd.read_csv(meta_dir+'/x_feature_info.csv')
train = pd.read_csv(os.path.join(zip_dir,'train.csv'))
test = pd.read_csv(os.path.join(zip_dir,'test.csv'))
submission = pd.read_csv(zip_dir+'/sample_submission.csv')

In [3]:
def lg_nrmse(gt, preds):
    # 각 Y Feature별 NRMSE 총합
    # Y_01 ~ Y_08 까지 20% 가중치 부여
    all_nrmse = []
    for idx in range(14): # ignore 'ID'
        rmse = metrics.mean_squared_error(gt[:,idx], preds[:,idx], squared=False)
        nrmse = rmse/np.mean(np.abs(gt[:,idx]))
        all_nrmse.append(nrmse)
    score = 1.2 * np.sum(all_nrmse[:8]) + 1.0 * np.sum(all_nrmse[8:14])
    return score


In [4]:
train[train.columns[-14:]]

Unnamed: 0,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,2.056,1.456,1.680,10.502,29.632,16.083,4.276,-25.381,-25.529,-22.769,23.792,-25.470,-25.409,-25.304
1,1.446,1.184,1.268,18.507,33.179,16.736,3.229,-26.619,-26.523,-22.574,24.691,-26.253,-26.497,-26.438
2,1.251,0.665,0.782,14.082,31.801,17.080,2.839,-26.238,-26.216,-22.169,24.649,-26.285,-26.215,-26.370
3,1.464,1.079,1.052,16.975,34.503,17.143,3.144,-25.426,-25.079,-21.765,24.913,-25.254,-25.021,-25.345
4,0.983,0.646,0.689,15.047,32.602,17.569,3.138,-25.376,-25.242,-21.072,25.299,-25.072,-25.195,-24.974
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39602,1.382,1.215,1.263,10.874,29.194,16.582,3.410,-26.486,-26.581,-22.772,24.261,-26.491,-26.584,-26.580
39603,1.482,0.606,1.083,8.759,29.859,15.659,3.406,-27.308,-27.203,-24.674,23.427,-27.250,-27.334,-27.325
39604,1.117,1.154,0.993,13.159,24.720,16.823,3.215,-26.502,-26.687,-22.577,24.301,-26.388,-26.425,-26.601
39605,0.895,0.187,0.477,9.123,26.412,15.757,4.216,-26.760,-26.634,-24.066,23.305,-26.536,-26.751,-26.635


In [5]:
train_x = np.array(train[train.columns[1:-14]])
print(f'train_x : {train_x.shape}')
train_y = np.array(train[train.columns[-14:]])
print(f'train_y : {train_y.shape}')

train_x : (39607, 56)
train_y : (39607, 14)


In [6]:
scaler = StandardScaler()
scaler.fit(train_x)
scaled_x = scaler.transform(train_x)
scaled_x.shape

(39607, 56)

In [7]:
label = np.array(train_y)
label.shape

(39607, 14)

# Split the dataset

In [8]:
x_train, x_test, y_train, y_test = tts(scaled_x, label, test_size = 0.2, random_state = 1)
print(f'x_train : {x_train.shape}')
print(f'y_train : {y_train.shape}')
print(f'x_test  : {x_test.shape}')
print(f'y_test  : {y_test.shape}')

x_train : (31685, 56)
y_train : (31685, 14)
x_test  : (7922, 56)
y_test  : (7922, 14)


# Regressor

In [9]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  
  try:
    tf.config.experimental.set_visible_devices(gpus[3], 'GPU')
  except RuntimeError as e:
    
    print(e)
def get_model() :
    inputs = keras.Input(shape=(56,))

    x1 = layers.Dense(112, activation = 'swish')(inputs)
    x = layers.BatchNormalization()(x1)
    x = layers.Dropout(0.3)(x)
    x = x + x1
    
    x2 = layers.Dense(60, activation = 'swish')(x)
    x = layers.BatchNormalization()(x2)
    x = layers.Dropout(0.3)(x)
    x = x + x2

    x3 = layers.Dense(30, activation = 'swish')(x)
    x = layers.BatchNormalization()(x3)
    x = layers.Dropout(0.3)(x)
    x = x + x3
    x = layers.Dropout(0.3)(x)

    outputs = layers.Dense(14)(x)

    return keras.Model(inputs,outputs)

In [10]:
model = get_model()
model.compile(loss = 'mse',
             optimizer = 'adam',
             metrics=['mse'])

2022-08-23 11:39:35.407145: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-08-23 11:39:36.133088: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 42866 MB memory:  -> device: 3, name: Quadro RTX 8000, pci bus id: 0000:40:00.0, compute capability: 7.5


In [17]:
callbacks = tf.keras.callbacks.ModelCheckpoint(
    filepath = 'm0821.ckpt',
    monitor = 'val_loss',
    verbose = 1,
    save_best_only = True,
    save_weights_only = True)

In [19]:
model.fit(x_train,y_train,
          validation_data=(x_test,y_test),
          epochs=300, batch_size = 512,
          callbacks=callbacks)

2022-08-23 11:49:21.792738: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/300

Epoch 00001: val_loss improved from inf to 90.63081, saving model to m0821.ckpt
Epoch 2/300

Epoch 00002: val_loss improved from 90.63081 to 26.58909, saving model to m0821.ckpt
Epoch 3/300

Epoch 00003: val_loss improved from 26.58909 to 17.45601, saving model to m0821.ckpt
Epoch 4/300

Epoch 00004: val_loss improved from 17.45601 to 9.37112, saving model to m0821.ckpt
Epoch 5/300

Epoch 00005: val_loss improved from 9.37112 to 6.94054, saving model to m0821.ckpt
Epoch 6/300

Epoch 00006: val_loss improved from 6.94054 to 4.87273, saving model to m0821.ckpt
Epoch 7/300

Epoch 00007: val_loss improved from 4.87273 to 4.30713, saving model to m0821.ckpt
Epoch 8/300

Epoch 00008: val_loss improved from 4.30713 to 3.89485, saving model to m0821.ckpt
Epoch 9/300

Epoch 00009: val_loss improved from 3.89485 to 3.65040, saving model to m0821.ckpt
Epoch 10/300

Epoch 00010: val_loss improved from 3.65040 to 2.74651, saving model to m0821.ckpt
Epoch 11/300

Epoch 00011: val_loss d

<keras.callbacks.History at 0x7f3354a5ebe0>

In [22]:
new_model = get_model()
new_model.load_weights('m0821.ckpt')
new_model.summary()

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 56)]         0                                            
__________________________________________________________________________________________________
dense_12 (Dense)                (None, 112)          6384        input_4[0][0]                    
__________________________________________________________________________________________________
batch_normalization_9 (BatchNor (None, 112)          448         dense_12[0][0]                   
__________________________________________________________________________________________________
dropout_12 (Dropout)            (None, 112)          0           batch_normalization_9[0][0]      
____________________________________________________________________________________________

In [24]:
new_model.compile(loss = 'mse',
             optimizer = 'adam',
             metrics=['mse'])

In [26]:
new_model.evaluate(x_test,y_test)



[1.5257716178894043, 1.5257716178894043]

In [29]:
predicted = new_model.predict(x_test)

In [31]:
def lg_nrmse(gt, preds):
    # 각 Y Feature별 NRMSE 총합
    # Y_01 ~ Y_08 까지 20% 가중치 부여
    all_nrmse = []
    for idx in range(14): # ignore 'ID'
        rmse = metrics.mean_squared_error(gt[:,idx], preds[:,idx], squared=False)
        nrmse = rmse/np.mean(np.abs(gt[:,idx]))
        all_nrmse.append(nrmse)
    score = 1.2 * np.sum(all_nrmse[:8]) + 1.0 * np.sum(all_nrmse[8:14])
    return score

In [32]:
lg_nrmse(y_test,predicted)

2.0152360933191997