In [12]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split as tts
from sklearn.model_selection import RandomizedSearchCV
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import make_scorer
from sklearn  import metrics
from sklearn.ensemble import RandomForestRegressor
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [13]:
# train csv
zip_dir = '/Data1/Radar'
meta_dir = os.path.join(zip_dir, 'meta')
x_feature = pd.read_csv(meta_dir+'/x_feature_info.csv')
train = pd.read_csv(os.path.join(zip_dir,'train.csv'))
test = pd.read_csv(os.path.join(zip_dir,'test.csv'))
submission = pd.read_csv(zip_dir+'/sample_submission.csv')

In [14]:
def lg_nrmse(gt, preds):
    # 각 Y Feature별 NRMSE 총합
    # Y_01 ~ Y_08 까지 20% 가중치 부여
    all_nrmse = []
    for idx in range(14): # ignore 'ID'
        rmse = metrics.mean_squared_error(gt[:,idx], preds[:,idx], squared=False)
        nrmse = rmse/np.mean(np.abs(gt[:,idx]))
        all_nrmse.append(nrmse)
    score = 1.2 * np.sum(all_nrmse[:8]) + 1.0 * np.sum(all_nrmse[8:14])
    return score


In [15]:
train[train.columns[-14:]]

Unnamed: 0,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,2.056,1.456,1.680,10.502,29.632,16.083,4.276,-25.381,-25.529,-22.769,23.792,-25.470,-25.409,-25.304
1,1.446,1.184,1.268,18.507,33.179,16.736,3.229,-26.619,-26.523,-22.574,24.691,-26.253,-26.497,-26.438
2,1.251,0.665,0.782,14.082,31.801,17.080,2.839,-26.238,-26.216,-22.169,24.649,-26.285,-26.215,-26.370
3,1.464,1.079,1.052,16.975,34.503,17.143,3.144,-25.426,-25.079,-21.765,24.913,-25.254,-25.021,-25.345
4,0.983,0.646,0.689,15.047,32.602,17.569,3.138,-25.376,-25.242,-21.072,25.299,-25.072,-25.195,-24.974
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39602,1.382,1.215,1.263,10.874,29.194,16.582,3.410,-26.486,-26.581,-22.772,24.261,-26.491,-26.584,-26.580
39603,1.482,0.606,1.083,8.759,29.859,15.659,3.406,-27.308,-27.203,-24.674,23.427,-27.250,-27.334,-27.325
39604,1.117,1.154,0.993,13.159,24.720,16.823,3.215,-26.502,-26.687,-22.577,24.301,-26.388,-26.425,-26.601
39605,0.895,0.187,0.477,9.123,26.412,15.757,4.216,-26.760,-26.634,-24.066,23.305,-26.536,-26.751,-26.635


In [16]:
train_x = np.array(train[train.columns[1:-14]])
print(f'train_x : {train_x.shape}')
train_y = np.array(train[train.columns[-14:]])
print(f'train_y : {train_y.shape}')

train_x : (39607, 56)
train_y : (39607, 14)


In [17]:
scaler = StandardScaler()
scaler.fit(train_x)
scaled_x = scaler.transform(train_x)
scaled_x.shape

(39607, 56)

In [18]:
label = np.array(train_y)
label.shape

(39607, 14)

# Split the dataset

In [19]:
x_train, x_test, y_train, y_test = tts(scaled_x, label, test_size = 0.2, random_state = 1)
print(f'x_train : {x_train.shape}')
print(f'y_train : {y_train.shape}')
print(f'x_test  : {x_test.shape}')
print(f'y_test  : {y_test.shape}')

x_train : (31685, 56)
y_train : (31685, 14)
x_test  : (7922, 56)
y_test  : (7922, 14)


In [20]:
y_train_1, y_train_2 = y_train[:,:8] , y_train[:,8:]
y_test_1, y_test_2 = y_test[:,:8] , y_test[:,8:]

print(y_test_1.shape)

(7922, 8)


# Regressor

In [21]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  
  try:
    tf.config.experimental.set_visible_devices(gpus[3], 'GPU')
  except RuntimeError as e:
    
    print(e)
def get_model() :
    inputs = keras.Input(shape=(56,))

    x1 = layers.Dense(112, activation = 'swish')(inputs)
    x = layers.BatchNormalization()(x1)
    x = layers.Dropout(0.3)(x)
    x = x + x1
    
    x2 = layers.Dense(60, activation = 'swish')(x)
    x = layers.BatchNormalization()(x2)
    x = layers.Dropout(0.3)(x)
    x = x + x2

    x3 = layers.Dense(30, activation = 'swish')(x)
    x = layers.BatchNormalization()(x3)
    x = layers.Dropout(0.3)(x)
    x = x + x3
    x = layers.Dropout(0.3)(x)

    outputs1 = layers.Dense(8, name = 'first')(x)
    outputs2 = layers.Dense(6, name = 'second')(x)

    return keras.Model(inputs,[outputs1,outputs2])

In [22]:
my_model = get_model()
my_model.compile(
    optimizer = keras.optimizers.RMSprop(1e-3),
    loss = {
        'first'  : keras.losses.MeanSquaredError(),
        'second' : keras.losses.MeanSquaredError(),
    },

    metrics = {
        'first'  : [keras.metrics.RootMeanSquaredError()],
        'second' : [keras.metrics.RootMeanSquaredError()],
    },
    loss_weights={"first": 1.2, "second": 1.0},
)

2022-08-23 14:28:08.812642: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-08-23 14:28:09.492006: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 45537 MB memory:  -> device: 3, name: Quadro RTX 8000, pci bus id: 0000:40:00.0, compute capability: 7.5


In [24]:
callbacks = tf.keras.callbacks.ModelCheckpoint(
    filepath = 'm0821b.ckpt',
    monitor = 'val_first_loss',
    mode = 'min',
    verbose = 1,
    save_best_only = True,
    save_weights_only = True)

In [26]:
my_model.fit(x_train,[y_train_1, y_train_2],
          validation_data=(x_test,[y_test_1, y_test_2]),
          epochs=1000, batch_size = 512,
          callbacks=callbacks)

2022-08-23 14:29:45.863748: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/1000

Epoch 00001: val_first_loss improved from inf to 45.55021, saving model to m0821b.ckpt
Epoch 2/1000

Epoch 00002: val_first_loss improved from 45.55021 to 16.99027, saving model to m0821b.ckpt
Epoch 3/1000

Epoch 00003: val_first_loss improved from 16.99027 to 9.92364, saving model to m0821b.ckpt
Epoch 4/1000

Epoch 00004: val_first_loss improved from 9.92364 to 5.13418, saving model to m0821b.ckpt
Epoch 5/1000

Epoch 00005: val_first_loss improved from 5.13418 to 3.53904, saving model to m0821b.ckpt
Epoch 6/1000

Epoch 00006: val_first_loss improved from 3.53904 to 3.14593, saving model to m0821b.ckpt
Epoch 7/1000

Epoch 00007: val_first_loss did not improve from 3.14593
Epoch 8/1000

Epoch 00008: val_first_loss improved from 3.14593 to 2.81951, saving model to m0821b.ckpt
Epoch 9/1000

Epoch 00009: val_first_loss did not improve from 2.81951
Epoch 10/1000

Epoch 00010: val_first_loss improved from 2.81951 to 2.65768, saving model to m0821b.ckpt
Epoch 11/1000

Epoch 0001

<keras.callbacks.History at 0x7f6cdb0253d0>

In [27]:
new_model = get_model()
new_model.load_weights('m0821b.ckpt')
new_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 56)]         0                                            
__________________________________________________________________________________________________
dense_3 (Dense)                 (None, 112)          6384        input_2[0][0]                    
__________________________________________________________________________________________________
batch_normalization_3 (BatchNor (None, 112)          448         dense_3[0][0]                    
__________________________________________________________________________________________________
dropout_4 (Dropout)             (None, 112)          0           batch_normalization_3[0][0]      
____________________________________________________________________________________________

In [28]:
new_model.compile(
    optimizer = keras.optimizers.RMSprop(1e-3),
    loss = {
        'first'  : keras.losses.MeanSquaredError(),
        'second' : keras.losses.MeanSquaredError(),
    },

    metrics = {
        'first'  : [keras.metrics.RootMeanSquaredError()],
        'second' : [keras.metrics.RootMeanSquaredError()],
    },
    loss_weights={"first": 1.2, "second": 1.0},
)

In [29]:
new_model.evaluate(x_test,[y_test_1,y_test_2])



[3.1507067680358887,
 2.1926376819610596,
 0.5195419192314148,
 1.4807556867599487,
 0.7207926511764526]

In [30]:
predicted = new_model.predict(x_test)

In [34]:
res1, res2 = predicted[0],predicted[1]

In [42]:
res = np.concatenate([res1,res2],axis=1)

In [31]:
def lg_nrmse(gt, preds):
    # 각 Y Feature별 NRMSE 총합
    # Y_01 ~ Y_08 까지 20% 가중치 부여
    all_nrmse = []
    for idx in range(14): # ignore 'ID'
        rmse = metrics.mean_squared_error(gt[:,idx], preds[:,idx], squared=False)
        nrmse = rmse/np.mean(np.abs(gt[:,idx]))
        all_nrmse.append(nrmse)
    score = 1.2 * np.sum(all_nrmse[:8]) + 1.0 * np.sum(all_nrmse[8:14])
    return score

In [43]:
lg_nrmse(y_test,res)

2.000898428503376

# Inference

In [50]:
test_x = np.array(test[train.columns[1:-14]]);test_x.shape

(39608, 56)

In [51]:
scaled_x = scaler.transform(test_x)
scaled_x.shape

(39608, 56)

In [52]:
predicted_ = new_model.predict(scaled_x)

In [53]:
res1_, res2_ = predicted_[0], predicted_[1]

In [54]:
res_ = np.concatenate([res1_,res2_],axis=1)

In [55]:
label_names = submission.columns;label_names

Index(['ID', 'Y_01', 'Y_02', 'Y_03', 'Y_04', 'Y_05', 'Y_06', 'Y_07', 'Y_08',
       'Y_09', 'Y_10', 'Y_11', 'Y_12', 'Y_13', 'Y_14'],
      dtype='object')

In [56]:
submit = pd.concat([submission[['ID']],pd.DataFrame(res_)],axis=1)
submit.columns = label_names
submit.to_csv('m0821b.csv', index=False)