In [1]:
!pip install wandb==0.16.0



In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import wandb
from wandb.keras import WandbCallback
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D, Bidirectional
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.utils import plot_model
import warnings

warnings.filterwarnings(action='ignore')

In [3]:
class_to_idx = {
    '협박 대화' : 0,
    '갈취 대화' : 1,
    '직장 내 괴롭힘 대화' : 2,
    '기타 괴롭힘 대화' : 3,
    '일반 대화' : 4
}

In [4]:
# Load data
train_df = pd.read_csv('custom_train.csv')
test_df = pd.read_csv('custom_test.csv')

# Prepare the data
max_words = 10000
max_len = 500
tokenizer = Tokenizer(num_words=max_words, lower=True)
tokenizer.fit_on_texts(train_df['conversation'].values)

X_train = tokenizer.texts_to_sequences(train_df['conversation'].values)
X_train = pad_sequences(X_train, maxlen=max_len)

train_df['label'] = train_df['class'].apply(lambda x: class_to_idx[x])

label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(train_df['label'])

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

y_train = tf.keras.utils.to_categorical(y_train, num_classes=len(np.unique(y_train)))
y_val = tf.keras.utils.to_categorical(y_val, num_classes=len(np.unique(y_val)))

X_test = tokenizer.texts_to_sequences(test_df['conversation'].values)
X_test = pad_sequences(X_test, maxlen=max_len)

In [6]:
wandb.login(key = '개인키')

[34m[1mwandb[0m: Currently logged in as: [33mjanghyeon06[0m ([33mjanghyeon[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /aiffel/.netrc


True

In [7]:
sweep_config = {
    "name": "sweep_test_nlp",
    "metric": {"name": "val_loss", "goal": "minimize"},
    "method": "random",
    "parameters": {
        "learning_rate" : {
            "min" : 0.001,
            "max" : 0.1
            },
        "epoch" : {
            "distribution" : "int_uniform",
            "min" : 5,
            "max" : 10
            }
                    
        }
    }

In [8]:
def train():
    default_config = {
        "vocab" : 1000,
        "embeddings" : 64,
        "units_1" : 128,
        "units_2" : 128,
        "units_3" : 64,
        "class_num" : 5,
        "learning_rate" : 0.005,
        "optimizer" : "adam",
        "loss" : "categorical_crossentropy",
        "metrics" : ["accuracy"],
        "epoch" : 5,
        "batch_size" : 32
    }
    import os

    # 환경 변수 설정
    os.environ['WANDB_AGENT_DISABLE_FLAPPING'] = 'true'

    wandb.init(config = default_config)
    config = wandb.config

    # Model

    model = Sequential()
    model.add(Embedding(config.vocab, config.units_1, input_length=max_len))
    model.add(SpatialDropout1D(0.2))
    model.add(Bidirectional(LSTM(config.units_2, dropout=0.2, recurrent_dropout=0.2)))
    model.add(Dense(config.units_3, activation='relu'))
    model.add(Dense(len(np.unique(train_df['label'])), activation='softmax'))

    # 머신 러닝 학습때 여러가지 optimzier를 사용할 경우나 learning rate를 조절할 경우에는 아래와 같은 형태의 코드를 응용합니다.

    if config.optimizer == 'adam':
        optimizer = keras.optimizers.Adam(learning_rate = config.learning_rate)
    
    model.compile(optimizer = optimizer,
                  loss = config.loss,
                  metrics = config.metrics)

    # WandbCallback 함수는 후술합니다.
    
    EarlyStopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, min_delta=0.0001)
    
    model.fit(X_train, y_train,
              epochs = config.epoch,
              batch_size = config.batch_size,
              validation_split=0.2,
              callbacks = [EarlyStopping, WandbCallback()])
    
    validation_loss, validation_accuracy = model.evaluate(X_val, y_val, verbose=2)
    
    # wandb.log 함수 안에 기록하고 싶은 정보를 담습니다.
    
    wandb.log({"Validation Accuracy Rate: " : round(validation_accuracy * 100, 2),
               "Validation Error Rate: " : round((1 - validation_accuracy) * 100, 2)})

In [None]:
# entity와 project에 본인의 아이디와 프로젝트명을 입력하세요

sweep_id = wandb.sweep(sweep_config,
                       entity = 'janghyeon',
                       project = 'DLTHON1')

# run the sweep
wandb.agent(sweep_id,
            function=train,
            count=10)

Create sweep with ID: 3dvjoktg
Sweep URL: https://wandb.ai/janghyeon/DLTHON1/sweeps/3dvjoktg


[34m[1mwandb[0m: Agent Starting Run: p2sdwiro with config:
[34m[1mwandb[0m: 	epoch: 6
[34m[1mwandb[0m: 	learning_rate: 0.07732787456900941






Epoch 1/6
INFO:tensorflow:Assets written to: /aiffel/aiffel/wandb/run-20240626_020916-p2sdwiro/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/wandb/run-20240626_020916-p2sdwiro/files/model-best)... Done. 0.0s


Epoch 2/6
Epoch 3/6
Epoch 4/6
32/32 - 8s - loss: 1.6130 - accuracy: 0.2040


VBox(children=(Label(value='11.499 MB of 11.499 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Validation Accuracy Rate:,▁
Validation Error Rate:,▁
accuracy,▃▆▁█
epoch,▁▃▆█
loss,█▁▂▁
val_accuracy,▁██▁
val_loss,▁▆█▂

0,1
Validation Accuracy Rate:,20.4
Validation Error Rate:,79.6
accuracy,0.21937
best_epoch,0.0
best_val_loss,1.60533
epoch,3.0
loss,1.62373
val_accuracy,0.22401
val_loss,1.60877


[34m[1mwandb[0m: Agent Starting Run: pqq23mzo with config:
[34m[1mwandb[0m: 	epoch: 10
[34m[1mwandb[0m: 	learning_rate: 0.0316487819702937


Epoch 1/10
INFO:tensorflow:Assets written to: /aiffel/aiffel/wandb/run-20240626_022051-pqq23mzo/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/wandb/run-20240626_022051-pqq23mzo/files/model-best)... Done. 0.0s


Epoch 2/10
INFO:tensorflow:Assets written to: /aiffel/aiffel/wandb/run-20240626_022051-pqq23mzo/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/wandb/run-20240626_022051-pqq23mzo/files/model-best)... Done. 0.0s


Epoch 3/10
INFO:tensorflow:Assets written to: /aiffel/aiffel/wandb/run-20240626_022051-pqq23mzo/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/wandb/run-20240626_022051-pqq23mzo/files/model-best)... Done. 0.0s


Epoch 4/10
INFO:tensorflow:Assets written to: /aiffel/aiffel/wandb/run-20240626_022051-pqq23mzo/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/wandb/run-20240626_022051-pqq23mzo/files/model-best)... Done. 0.0s


Epoch 5/10
Epoch 6/10
INFO:tensorflow:Assets written to: /aiffel/aiffel/wandb/run-20240626_022051-pqq23mzo/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/wandb/run-20240626_022051-pqq23mzo/files/model-best)... Done. 0.0s


Epoch 7/10
Epoch 8/10
Epoch 9/10
32/32 - 8s - loss: 1.2138 - accuracy: 0.3931


VBox(children=(Label(value='38.588 MB of 38.588 MB uploaded (0.066 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
Validation Accuracy Rate:,▁
Validation Error Rate:,▁
accuracy,▁▃▆▇▇▇▇██
epoch,▁▂▃▄▅▅▆▇█
loss,█▆▃▂▁▂▁▁▁
val_accuracy,▁▆███▇█▇█
val_loss,█▄▁▁▁▁▁▂▂

0,1
Validation Accuracy Rate:,39.31
Validation Error Rate:,60.69
accuracy,0.44678
best_epoch,5.0
best_val_loss,1.21462
epoch,8.0
loss,1.1381
val_accuracy,0.41089
val_loss,1.24747


[34m[1mwandb[0m: Agent Starting Run: udpaygii with config:
[34m[1mwandb[0m: 	epoch: 5
[34m[1mwandb[0m: 	learning_rate: 0.010300786147315354


Epoch 1/5
INFO:tensorflow:Assets written to: /aiffel/aiffel/wandb/run-20240626_024711-udpaygii/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/wandb/run-20240626_024711-udpaygii/files/model-best)... Done. 0.0s


Epoch 2/5
INFO:tensorflow:Assets written to: /aiffel/aiffel/wandb/run-20240626_024711-udpaygii/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/wandb/run-20240626_024711-udpaygii/files/model-best)... Done. 0.0s


Epoch 3/5
Epoch 4/5
INFO:tensorflow:Assets written to: /aiffel/aiffel/wandb/run-20240626_024711-udpaygii/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/wandb/run-20240626_024711-udpaygii/files/model-best)... Done. 0.0s


Epoch 5/5
32/32 - 8s - loss: 0.6962 - accuracy: 0.7911


VBox(children=(Label(value='25.046 MB of 25.046 MB uploaded (0.033 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
Validation Accuracy Rate:,▁
Validation Error Rate:,▁
accuracy,▁▅▇██
epoch,▁▃▅▆█
loss,█▄▂▁▁
val_accuracy,▁▅▅██
val_loss,█▃▄▁▂

0,1
Validation Accuracy Rate:,79.11
Validation Error Rate:,20.89
accuracy,0.89016
best_epoch,3.0
best_val_loss,0.64531
epoch,4.0
loss,0.34705
val_accuracy,0.78094
val_loss,0.70166


[34m[1mwandb[0m: Agent Starting Run: pnt8rl0b with config:
[34m[1mwandb[0m: 	epoch: 5
[34m[1mwandb[0m: 	learning_rate: 0.031603631032838315


Epoch 1/5
INFO:tensorflow:Assets written to: /aiffel/aiffel/wandb/run-20240626_030158-pnt8rl0b/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/wandb/run-20240626_030158-pnt8rl0b/files/model-best)... Done. 0.0s


Epoch 2/5
INFO:tensorflow:Assets written to: /aiffel/aiffel/wandb/run-20240626_030158-pnt8rl0b/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/wandb/run-20240626_030158-pnt8rl0b/files/model-best)... Done. 0.0s


Epoch 3/5
INFO:tensorflow:Assets written to: /aiffel/aiffel/wandb/run-20240626_030158-pnt8rl0b/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/wandb/run-20240626_030158-pnt8rl0b/files/model-best)... Done. 0.0s


Epoch 4/5
Epoch 5/5
32/32 - 8s - loss: 1.6125 - accuracy: 0.2149


VBox(children=(Label(value='25.046 MB of 25.046 MB uploaded (0.033 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
Validation Accuracy Rate:,▁
Validation Error Rate:,▁
accuracy,▁▆▅█▅
epoch,▁▃▅▆█
loss,█▁▁▁▁
val_accuracy,███▁▁
val_loss,█▄▁▁▄

0,1
Validation Accuracy Rate:,21.49
Validation Error Rate:,78.51
accuracy,0.21968
best_epoch,2.0
best_val_loss,1.61143
epoch,4.0
loss,1.60627
val_accuracy,0.20916
val_loss,1.61665


[34m[1mwandb[0m: Agent Starting Run: oynedlrw with config:
[34m[1mwandb[0m: 	epoch: 7
[34m[1mwandb[0m: 	learning_rate: 0.09617993341237918


Epoch 1/7
INFO:tensorflow:Assets written to: /aiffel/aiffel/wandb/run-20240626_031640-oynedlrw/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/aiffel/aiffel/wandb/run-20240626_031640-oynedlrw/files/model-best)... Done. 0.0s


Epoch 2/7
  7/101 [=>............................] - ETA: 2:33 - loss: 1.6038 - accuracy: 0.2455