In [1]:
!nvidia-smi

Thu Nov 10 06:08:21 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   59C    P8    10W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, KFold

from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import LabelEncoder

import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.notebook import tqdm
import gc

from sklearn.linear_model import LogisticRegression

In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [4]:
def csv_to_parquet(csv_path, save_name):
    df = pd.read_csv(csv_path)
    df.to_parquet(f'./{save_name}.parquet')
    del df
    gc.collect()
    print(save_name, 'Done.')

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
df_submission_path = '/content/drive/MyDrive/머신러닝 엔지니어링/데이콘/제주도 도로 교통량 예측/data/sample_submission.csv'
df_train_path = '/content/drive/MyDrive/머신러닝 엔지니어링/데이콘/제주도 도로 교통량 예측/data/df_train_V2.csv'
df_test_path = '/content/drive/MyDrive/머신러닝 엔지니어링/데이콘/제주도 도로 교통량 예측/data/df_test_V2.csv'

In [7]:
csv_to_parquet(df_train_path, 'train')
csv_to_parquet(df_test_path, 'test')

train Done.
test Done.


In [8]:
df_train = pd.read_parquet('./train.parquet')
df_test = pd.read_parquet('./test.parquet')

In [9]:
df_train.drop(columns='Unnamed: 0', inplace=True)
df_test.drop(columns='Unnamed: 0', inplace=True)

In [10]:
cat_cols = df_test.dtypes[df_test.dtypes == "object"].index.tolist()
print("Number of Categorical features: ", len(cat_cols))

Number of Categorical features:  34


In [11]:
for i in cat_cols:
    le = LabelEncoder()
    le=le.fit(df_train[i])
    df_train[i]=le.transform(df_train[i])
    
    for label in np.unique(df_test[i]):
        if label not in le.classes_: 
            le.classes_ = np.append(le.classes_, label)
    df_test[i]=le.transform(df_test[i])

In [12]:
X = df_train.drop(columns='target')
y = df_train['target']

In [13]:
len(X.keys())

36

In [20]:
def build_model():
  model = keras.Sequential([
    layers.Dense(36, activation='relu', input_shape=[len(X.keys())]),
    layers.Dense(18, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(9, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(4, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1)
  ])

  optimizer = tf.keras.optimizers.Adam(0.001)

  model.compile(loss='mae',
                optimizer=optimizer,
                metrics=['mae', 'mse'])
  return model

In [21]:
model = build_model()

In [22]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 36)                1332      
                                                                 
 dense_7 (Dense)             (None, 18)                666       
                                                                 
 dropout_4 (Dropout)         (None, 18)                0         
                                                                 
 dense_8 (Dense)             (None, 9)                 171       
                                                                 
 dropout_5 (Dropout)         (None, 9)                 0         
                                                                 
 dense_9 (Dense)             (None, 4)                 40        
                                                                 
 dropout_6 (Dropout)         (None, 4)                

In [23]:
early_stop = keras.callbacks.EarlyStopping(
                                           monitor='val_loss',
                                           patience=5)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
                                                 monitor='val_loss',
                                                 factor=0.1,
                                                 patience=3,
                                                 verbose=0,
                                                 mode='auto',
                                                 min_delta=0.0001,
                                                 cooldown=0,
                                                 min_lr=0,
                                                 )

In [None]:
history = model.fit(
                    x=X, y=y,
                    validation_split=0.005,
                    epochs=10,
                    verbose=1,
                    callbacks=[early_stop, reduce_lr],
                    )

Epoch 1/10
Epoch 2/10
Epoch 3/10
   657/146179 [..............................] - ETA: 7:49 - loss: 13.2716 - mae: 13.2716 - mse: 255.0636

In [None]:
import matplotlib.pyplot as plt

def plot_history(history):
  hist = pd.DataFrame(history.history)
  hist['epoch'] = history.epoch

  plt.figure(figsize=(8,12))

  plt.subplot(2,1,1)
  plt.xlabel('Epoch')
  plt.ylabel('Mean Abs Error [MPG]')
  plt.plot(hist['epoch'], hist['mae'],
           label='Train Error')
  plt.plot(hist['epoch'], hist['val_mae'],
           label = 'Val Error')
  plt.ylim([0,5])
  plt.legend()

  plt.subplot(2,1,2)
  plt.xlabel('Epoch')
  plt.ylabel('Mean Square Error [$MPG^2$]')
  plt.plot(hist['epoch'], hist['mse'],
           label='Train Error')
  plt.plot(hist['epoch'], hist['val_mse'],
           label = 'Val Error')
  plt.ylim([0,20])
  plt.legend()
  plt.show()

plot_history(history)

In [None]:
y_pred = model.predict(df_test)