# Import

In [1]:
import pandas as pd
import numpy as np
import gc
import os
from sklearn.preprocessing import LabelEncoder
import sherpa
import sherpa.algorithms.bayesian_optimization as bayesian_optimization
import tempfile
from sklearn.metrics import mean_absolute_error

# to parquet

In [4]:
def csv_to_parquet(csv_path, save_name):
    df = pd.read_csv(csv_path)
    df.to_parquet(f'./{save_name}.parquet')
    del df
    gc.collect()
    print(save_name, 'Done.')

In [8]:
csv_to_parquet('./train.csv', 'train')
csv_to_parquet('./test.csv', 'test')

train Done.
test Done.


# Data load

In [21]:
from email.utils import parsedate


train = pd.read_parquet('./train.parquet')
test = pd.read_parquet('./test.parquet')

In [22]:
train.head()

Unnamed: 0,id,base_date,day_of_week,base_hour,road_in_use,lane_count,road_rating,road_name,multi_linked,connect_code,...,road_type,start_node_name,start_latitude,start_longitude,start_turn_restricted,end_node_name,end_latitude,end_longitude,end_turn_restricted,target
0,TRAIN_0000000,20220623,목,17,0,1,106,지방도1112호선,0,0,...,3,제3교래교,33.427747,126.662612,없음,제3교래교,33.427749,126.662335,없음,52.0
1,TRAIN_0000001,20220728,목,21,0,2,103,일반국도11호선,0,0,...,0,광양사거리,33.50073,126.529107,있음,KAL사거리,33.504811,126.52624,없음,30.0
2,TRAIN_0000002,20211010,일,7,0,2,103,일반국도16호선,0,0,...,0,창고천교,33.279145,126.368598,없음,상창육교,33.280072,126.362147,없음,61.0
3,TRAIN_0000003,20220311,금,13,0,2,107,태평로,0,0,...,0,남양리조트,33.246081,126.567204,없음,서현주택,33.245565,126.566228,없음,20.0
4,TRAIN_0000004,20211005,화,8,0,2,103,일반국도12호선,0,0,...,0,애월샷시,33.462214,126.326551,없음,애월입구,33.462677,126.330152,없음,38.0


# Preprocess

In [23]:
str_col = ['day_of_week','start_turn_restricted','end_turn_restricted']
for i in str_col:
    le = LabelEncoder()
    le=le.fit(train[i])
    train[i]=le.transform(train[i])
    
    for label in np.unique(test[i]):
        if label not in le.classes_: 
            le.classes_ = np.append(le.classes_, label)
    test[i]=le.transform(test[i])

In [24]:
y_train = train['target'] 

X_train = train.drop(['id','base_date', 'target','road_name', 'start_node_name', 'end_node_name','vehicle_restricted'], axis=1)

test = test.drop(['id','base_date', 'road_name', 'start_node_name', 'end_node_name','vehicle_restricted'], axis=1)

print(X_train.shape)
print(y_train.shape)
print(test.shape)

(4701217, 17)
(4701217,)
(291241, 17)


In [6]:
from tqdm import tqdm

def time_window(df, t, t_sep):
    seq_len = t
    seqence_length = seq_len + t_sep

    result = []
    for index in tqdm(range(len(df) - seqence_length)):
        result.append(df[index: index + seqence_length])

    return np.array(result)

In [7]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split


# train
# train, validation 분리 (8 : 2)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, shuffle=True, random_state=119)

# scaling
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# time series window 생성
X_train = time_window(X_train_scaled, 0, 1)
X_val = time_window(X_val_scaled, 0, 1)
y_train = time_window(y_train, 0, 1)
y_val = time_window(y_val, 0, 1)

# y의 길이와 같은 길이로 설정
X_train = X_train[:len(y_train)]
X_val = X_val[:len(y_val)]

100%|██████████| 3760972/3760972 [00:01<00:00, 2081353.85it/s]
100%|██████████| 940243/940243 [00:00<00:00, 2017729.79it/s]
100%|██████████| 3760972/3760972 [02:02<00:00, 30578.38it/s]
100%|██████████| 940243/940243 [00:27<00:00, 34246.36it/s]


# Modeling

## transformer 정의

### encoder

In [8]:
import tensorflow as tf
from tensorflow.keras import layers
import random

# 시드고정
tf.random.set_seed(42)
random.seed(42)
np.random.seed(42)

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout):

    x = layers.LayerNormalization(epsilon=epsilon)(inputs) # 레이어 정규화
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    x = layers.LayerNormalization(epsilon=epsilon)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation=activation)(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

### build

In [9]:
def build_model(input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout=0, mlp_dropout=0):
    inputs = keras.Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation=activation)(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(1)(x)
    return keras.Model(inputs, outputs)

## Sherpa Setup

In [10]:
# parameter

epsilon = 1e-6
mlp_units = [64]
mlp_dropout = 0.1
dropout = 0.1
earlystop_patience = 10

parameters = [sherpa.Continuous('learning_rate', [1e-4, 1e-2], 'log'),
              sherpa.Choice('batch', [64, 128, 256, 512, 1024]),
              sherpa.Discrete('num_transformer_blocks', [2, 16]),
              sherpa.Discrete('ff_dim', [2, 16]),
              sherpa.Choice('head_size', [32, 64, 128, 256]),
              sherpa.Discrete('num_heads', [2, 16]),              
              sherpa.Choice('activation', ['relu', 'swish'])]
algorithm = alg = sherpa.algorithms.SuccessiveHalving(r=1, R=9, eta=3, s=0, max_finished_configs=1)
study = sherpa.Study(parameters=parameters,
                     algorithm=algorithm,
                     lower_is_better=False,
                     disable_dashboard=True)

In [11]:
model_dir = tempfile.mkdtemp()

## Hyperparameter Optimization

In [12]:
from tensorflow import keras

for trial in study:
    # Getting number of training epochs
    initial_epoch = {1: 0, 3: 1, 9: 4}[trial.parameters['resource']]
    epochs = trial.parameters['resource'] + initial_epoch

    print("-"*100)
    print(f"Trial:\t{trial.id}\nEpochs:\t{initial_epoch} to {epochs}\nParameters:{trial.parameters}\n")

    if trial.parameters['load_from'] == "":
        print(f"Creating new model for trial {trial.id}...\n")

        # Get hyperparameters
        learning_rate = trial.parameters['learning_rate']
        batch_size = trial.parameters['batch']
        activation = trial.parameters['activation']
        num_transformer_blocks = trial.parameters['num_transformer_blocks']
        ff_dim = trial.parameters['ff_dim']
        head_size = trial.parameters['head_size']
        num_heads = trial.parameters['num_heads']

        # Create model
        model = build_model(X_train.shape[1:],
                            head_size=head_size,
                            num_heads=num_heads,
                            ff_dim=ff_dim,
                            num_transformer_blocks=num_transformer_blocks,
                            mlp_units=mlp_units,
                            mlp_dropout=mlp_dropout,
                            dropout=dropout,
                            )

        model.compile(loss="mean_squared_error",
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate))
        
    else:
        print(f"Loading model from: ", os.path.join(model_dir, trial.parameters['load_from']), "...\n")

        # Loading model
        model = tf.keras.models.load_model(os.path.join(model_dir, trial.parameters['load_from']))


    # Train model
    for i in range(initial_epoch, epochs):
        with tf.device('/device:GPU:0'):
            model.fit(
        X_train, y_train,
        initial_epoch=i,
        epochs = i+1,
        steps_per_epoch=len(X_train) / batch_size,
        batch_size=batch_size,
        validation_data=(X_val, y_val),
        validation_steps=len(X_val) / batch_size,
        shuffle=True)
        y_pred = model.predict(X_val)
        mae = mean_absolute_error(y_val, y_pred)

        print("MAE loss: ", mae)
        study.add_observation(trial=trial, iteration=i,
                              objective=-mae,
                              context={'loss': mae})
        

    study.finalize(trial=trial)
    print(f"Saving model at: ", os.path.join(model_dir, trial.parameters['save_to']))
    model.save(os.path.join(model_dir, trial.parameters['save_to']))

    study.save(model_dir)

----------------------------------------------------------------------------------------------------
Trial:	1
Epochs:	0 to 1
Parameters:{'learning_rate': 0.0031064565916570303, 'batch': 1024, 'num_transformer_blocks': 15, 'ff_dim': 12, 'head_size': 256, 'num_heads': 7, 'activation': 'relu', 'resource': 1, 'rung': 0, 'load_from': '', 'save_to': '1'}

Creating new model for trial 1...

MAE loss:  6.71792338698902
Saving model at:  C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\1


INFO:tensorflow:Assets written to: C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\1\assets


----------------------------------------------------------------------------------------------------
Trial:	2
Epochs:	0 to 1
Parameters:{'learning_rate': 0.0004610553724347624, 'batch': 128, 'num_transformer_blocks': 10, 'ff_dim': 8, 'head_size': 128, 'num_heads': 9, 'activation': 'swish', 'resource': 1, 'rung': 0, 'load_from': '', 'save_to': '2'}

Creating new model for trial 2...

MAE loss:  6.960507845091085
Saving model at:  C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\2


INFO:tensorflow:Assets written to: C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\2\assets


----------------------------------------------------------------------------------------------------
Trial:	3
Epochs:	0 to 1
Parameters:{'learning_rate': 0.005555620885158486, 'batch': 256, 'num_transformer_blocks': 14, 'ff_dim': 14, 'head_size': 128, 'num_heads': 3, 'activation': 'swish', 'resource': 1, 'rung': 0, 'load_from': '', 'save_to': '3'}

Creating new model for trial 3...

MAE loss:  6.129128399810737
Saving model at:  C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\3


INFO:tensorflow:Assets written to: C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\3\assets


----------------------------------------------------------------------------------------------------
Trial:	4
Epochs:	1 to 4
Parameters:{'learning_rate': 0.005555620885158486, 'batch': 256, 'num_transformer_blocks': 14, 'ff_dim': 14, 'head_size': 128, 'num_heads': 3, 'activation': 'swish', 'save_to': '4', 'resource': 3, 'rung': 1, 'load_from': '3'}

Loading model from:  C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\3 ...





Epoch 2/2
MAE loss:  5.941279230634241
Epoch 3/3
MAE loss:  5.779961556693815
Epoch 4/4
MAE loss:  5.8057123333255305
Saving model at:  C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\4


INFO:tensorflow:Assets written to: C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\4\assets


----------------------------------------------------------------------------------------------------
Trial:	5
Epochs:	0 to 1
Parameters:{'learning_rate': 0.0030459022368485356, 'batch': 512, 'num_transformer_blocks': 2, 'ff_dim': 14, 'head_size': 256, 'num_heads': 3, 'activation': 'swish', 'resource': 1, 'rung': 0, 'load_from': '', 'save_to': '5'}

Creating new model for trial 5...

MAE loss:  7.741712847250476
Saving model at:  C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\5


INFO:tensorflow:Assets written to: C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\5\assets


----------------------------------------------------------------------------------------------------
Trial:	6
Epochs:	0 to 1
Parameters:{'learning_rate': 0.0003181616306611627, 'batch': 128, 'num_transformer_blocks': 9, 'ff_dim': 7, 'head_size': 256, 'num_heads': 15, 'activation': 'relu', 'resource': 1, 'rung': 0, 'load_from': '', 'save_to': '6'}

Creating new model for trial 6...

MAE loss:  6.98564167288894
Saving model at:  C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\6


INFO:tensorflow:Assets written to: C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\6\assets


----------------------------------------------------------------------------------------------------
Trial:	7
Epochs:	0 to 1
Parameters:{'learning_rate': 0.0024089375621135873, 'batch': 1024, 'num_transformer_blocks': 13, 'ff_dim': 13, 'head_size': 64, 'num_heads': 3, 'activation': 'relu', 'resource': 1, 'rung': 0, 'load_from': '', 'save_to': '7'}

Creating new model for trial 7...

MAE loss:  6.497158513491242
Saving model at:  C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\7


INFO:tensorflow:Assets written to: C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\7\assets


----------------------------------------------------------------------------------------------------
Trial:	8
Epochs:	1 to 4
Parameters:{'learning_rate': 0.0024089375621135873, 'batch': 1024, 'num_transformer_blocks': 13, 'ff_dim': 13, 'head_size': 64, 'num_heads': 3, 'activation': 'relu', 'save_to': '8', 'resource': 3, 'rung': 1, 'load_from': '7'}

Loading model from:  C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\7 ...

Epoch 2/2
MAE loss:  6.2421968218324135
Epoch 3/3
MAE loss:  6.138065667525829
Epoch 4/4
MAE loss:  6.001417224086274
Saving model at:  C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\8


INFO:tensorflow:Assets written to: C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\8\assets


----------------------------------------------------------------------------------------------------
Trial:	9
Epochs:	0 to 1
Parameters:{'learning_rate': 0.0002762039976376295, 'batch': 64, 'num_transformer_blocks': 13, 'ff_dim': 4, 'head_size': 128, 'num_heads': 13, 'activation': 'relu', 'resource': 1, 'rung': 0, 'load_from': '', 'save_to': '9'}

Creating new model for trial 9...

MAE loss:  7.097218982612798
Saving model at:  C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\9


INFO:tensorflow:Assets written to: C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\9\assets


----------------------------------------------------------------------------------------------------
Trial:	10
Epochs:	0 to 1
Parameters:{'learning_rate': 0.0002767327634802866, 'batch': 1024, 'num_transformer_blocks': 2, 'ff_dim': 3, 'head_size': 32, 'num_heads': 8, 'activation': 'swish', 'resource': 1, 'rung': 0, 'load_from': '', 'save_to': '10'}

Creating new model for trial 10...

MAE loss:  10.246973569540495
Saving model at:  C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\10


INFO:tensorflow:Assets written to: C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\10\assets


----------------------------------------------------------------------------------------------------
Trial:	11
Epochs:	0 to 1
Parameters:{'learning_rate': 0.0003259048442058629, 'batch': 512, 'num_transformer_blocks': 2, 'ff_dim': 11, 'head_size': 256, 'num_heads': 7, 'activation': 'swish', 'resource': 1, 'rung': 0, 'load_from': '', 'save_to': '11'}

Creating new model for trial 11...

MAE loss:  8.384994529963663
Saving model at:  C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\11


INFO:tensorflow:Assets written to: C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\11\assets


----------------------------------------------------------------------------------------------------
Trial:	12
Epochs:	1 to 4
Parameters:{'learning_rate': 0.0031064565916570303, 'batch': 1024, 'num_transformer_blocks': 15, 'ff_dim': 12, 'head_size': 256, 'num_heads': 7, 'activation': 'relu', 'save_to': '12', 'resource': 3, 'rung': 1, 'load_from': '1'}

Loading model from:  C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\1 ...

Epoch 2/2
MAE loss:  6.29123868331996
Epoch 3/3
MAE loss:  6.0847129206288955
Epoch 4/4
MAE loss:  5.961845276088969
Saving model at:  C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\12


INFO:tensorflow:Assets written to: C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\12\assets


----------------------------------------------------------------------------------------------------
Trial:	13
Epochs:	4 to 13
Parameters:{'learning_rate': 0.005555620885158486, 'batch': 256, 'num_transformer_blocks': 14, 'ff_dim': 14, 'head_size': 128, 'num_heads': 3, 'activation': 'swish', 'save_to': '13', 'resource': 9, 'rung': 2, 'load_from': '4'}

Loading model from:  C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\4 ...





Epoch 5/5
MAE loss:  5.549729315793271
Epoch 6/6
MAE loss:  5.436175295636936
Epoch 7/7
MAE loss:  5.350047987986291
Epoch 8/8
MAE loss:  5.369793487335861
Epoch 9/9
MAE loss:  5.3723092691653775
Epoch 10/10
MAE loss:  5.292538710417435
Epoch 11/11
MAE loss:  5.259261885582947
Epoch 12/12
MAE loss:  5.235974853668758
Epoch 13/13
MAE loss:  5.249115452772651
Saving model at:  C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\13


INFO:tensorflow:Assets written to: C:\Users\nehcr\AppData\Local\Temp\tmpyzm0zve0\13\assets


In [13]:
study.get_best_result()

{'Trial-ID': 13,
 'Iteration': 11,
 'activation': 'swish',
 'batch': 256,
 'ff_dim': 14,
 'head_size': 128,
 'learning_rate': 0.005555620885158486,
 'load_from': '4',
 'num_heads': 3,
 'num_transformer_blocks': 14,
 'resource': 9,
 'rung': 2,
 'save_to': '13',
 'Objective': -5.235974853668758,
 'loss': 5.235974853668758}

# Predict

In [27]:
# test
# scaling
test_scaled = scaler.transform(test)

# reshape
test = test_scaled.reshape(test_scaled.shape[0], 1, test_scaled.shape[1])

In [29]:
# predict
model_test = tf.keras.models.load_model(os.path.join(model_dir, trial.parameters['save_to']))
pred = model_test.predict(test)

# 결과 저장
sample_submission = pd.read_csv('./sample_submission.csv')
sample_submission['target'] = pred
sample_submission.to_csv("./submit.csv", index = False)

sample_submission



Unnamed: 0,id,target
0,TEST_000000,29.396742
1,TEST_000001,43.523006
2,TEST_000002,60.596142
3,TEST_000003,32.455292
4,TEST_000004,42.918282
...,...,...
291236,TEST_291236,49.485130
291237,TEST_291237,53.355247
291238,TEST_291238,24.626883
291239,TEST_291239,25.863487
