# Data and Library Importing

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.metrics import confusion_matrix,classification_report,f1_score

import time
import optuna
from optuna.integration import TFKerasPruningCallback
from optuna.visualization import plot_intermediate_values
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_contour
from optuna.visualization import plot_pareto_front

from tensorflow.keras import Sequential
from tensorflow.keras.backend import clear_session
from tensorflow.keras.layers import Input,Dense,concatenate,Dropout,Conv1D,Bidirectional,Embedding,LSTM,SimpleRNN,GlobalAveragePooling1D
from tensorflow.keras.models import Model,load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.metrics import CategoricalCrossentropy
from tensorflow.random import set_seed

In [2]:
set_seed(123)
np.random.seed(123)

In [3]:
label_latih=np.load('../input/emot-dataset/label_latih.npy', allow_pickle=True)
label_validasi=np.load('../input/emot-dataset/label_validasi.npy', allow_pickle=True)
label_uji=np.load('../input/emot-dataset/label_uji.npy', allow_pickle=True)

In [4]:
embedding_dim = 450
max_length = 80
trunc_type = 'post'
padding_type = 'post'
oov_tok = "<OOV>"

In [5]:
teks_latih_pad=np.load('../input/emot-dataset/teks_latih_pad.npy', allow_pickle=True)
teks_latih=np.load('../input/emot-dataset/teks_latih.npy', allow_pickle=True)
teks_validasi_pad=np.load('../input/emot-dataset/teks_validasi_pad.npy', allow_pickle=True)
teks_validasi=np.load('../input/emot-dataset/teks_validasi.npy', allow_pickle=True)
teks_uji_pad=np.load('../input/emot-dataset/teks_uji_pad.npy', allow_pickle=True)
teks_uji=np.load('../input/emot-dataset/teks_uji.npy', allow_pickle=True)

# Modelling

In [6]:
jumlah_token=17039

In [7]:
def create_model(trial):
  lstm_units=np.zeros(2, dtype=int)
  lstm_units[0]=trial.suggest_int("lstm_units_L1", 16, 48)
  lstm_units[1]=trial.suggest_int("lstm_units_L2", 4, 20)
  do1=trial.suggest_float("dropout_L1",0,0.5)
  do2=trial.suggest_float("dropout_L2",0,0.5)
  lr=trial.suggest_float("learning_rate",1e-5,1e-2)

  inputs=Input(shape=(max_length,))
  layers=Embedding(jumlah_token, embedding_dim,input_length=max_length,trainable=False)(inputs)
  layers=Bidirectional(LSTM(lstm_units[0], return_sequences=True))(layers)
  layers=Dropout(do1)(layers)
  layers=Bidirectional(LSTM(lstm_units[1]))(layers)
  layers=Dropout(do2)(layers)
  layers=Dense(8, activation='relu')(layers)
  output=Dense(5, activation='softmax')(layers)

  model=Model(inputs=[inputs],outputs=[output])

  model.compile(optimizer=Adam(learning_rate=lr),loss="sparse_categorical_crossentropy",metrics=["accuracy"])

  return model

## Hyperband (F1-score)

In [8]:
#Fungsi Objektif
def objective_fun_f1(trial):
  model=create_model(trial)

  start=time.time()
  history=model.fit(teks_latih_pad,label_latih,epochs=25,batch_size=16,callbacks=[TFKerasPruningCallback(trial, "val_loss")],
                    validation_data=(teks_validasi_pad,label_validasi),verbose=0)
  end=time.time()

  Label_pred=model.predict(teks_uji_pad)
  label_pred=Label_pred.argmax(axis=1)
  fscore=f1_score(label_uji, label_pred, average='macro')
  duration=end-start
  model.save('Hyperband (F1-score) Model-{}.h5'.format(trial.number))

  print("F1-Score:",fscore)
  print("Duration:",duration)

  return fscore

In [9]:
study3=optuna.create_study(study_name="HB (F1-score)",
                          direction="maximize", 
                          #sampler=optuna.samplers.BaseSampler(), 
                          pruner=optuna.pruners.HyperbandPruner())
study3.optimize(objective_fun_f1,n_trials=25)

[32m[I 2023-04-05 06:07:41,612][0m A new study created in memory with name: HB (F1-score)[0m
[32m[I 2023-04-05 06:09:27,561][0m Trial 0 finished with value: 0.5059031713485374 and parameters: {'lstm_units_L1': 27, 'lstm_units_L2': 4, 'dropout_L1': 0.46082845920684734, 'dropout_L2': 0.4720206935205589, 'learning_rate': 0.006734945736820542}. Best is trial 0 with value: 0.5059031713485374.[0m


F1-Score: 0.5059031713485374
Duration: 98.39686226844788


[32m[I 2023-04-05 06:11:04,597][0m Trial 1 finished with value: 0.42671938130454035 and parameters: {'lstm_units_L1': 36, 'lstm_units_L2': 9, 'dropout_L1': 0.12719647239902315, 'dropout_L2': 0.14183396038515417, 'learning_rate': 0.0007209932457629583}. Best is trial 0 with value: 0.5059031713485374.[0m


F1-Score: 0.42671938130454035
Duration: 94.95648860931396


[32m[I 2023-04-05 06:12:41,004][0m Trial 2 finished with value: 0.5164978662292354 and parameters: {'lstm_units_L1': 33, 'lstm_units_L2': 12, 'dropout_L1': 0.1297269975986713, 'dropout_L2': 0.23342037880169175, 'learning_rate': 0.004303802773782116}. Best is trial 2 with value: 0.5164978662292354.[0m


F1-Score: 0.5164978662292354
Duration: 93.85601377487183


[32m[I 2023-04-05 06:14:19,250][0m Trial 3 finished with value: 0.5344231215147662 and parameters: {'lstm_units_L1': 43, 'lstm_units_L2': 7, 'dropout_L1': 0.4409196142800319, 'dropout_L2': 0.24465270409662043, 'learning_rate': 0.006700108460756799}. Best is trial 3 with value: 0.5344231215147662.[0m


F1-Score: 0.5344231215147662
Duration: 95.41437435150146


[32m[I 2023-04-05 06:14:33,081][0m Trial 4 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:14:46,981][0m Trial 5 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:15:02,722][0m Trial 6 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:15:46,216][0m Trial 7 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 06:16:33,063][0m Trial 8 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 06:17:15,733][0m Trial 9 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 06:17:57,168][0m Trial 10 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 06:18:38,843][0m Trial 11 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 06:18:57,444][0m Trial 12 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:20:35,769][0m Trial 13 finished with value: 0.08 and parameters: {'lstm_units_L1': 23, 'lstm_units_L2': 19, 'dropout_L1': 0.1003077012496093, 'dropout_L2': 0.24704558358744272, 'learning_rate': 0.0047

F1-Score: 0.08
Duration: 94.41371297836304


[32m[I 2023-04-05 06:20:56,904][0m Trial 14 pruned. Trial was pruned at epoch 3.[0m
[32m[I 2023-04-05 06:21:43,431][0m Trial 15 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 06:22:02,273][0m Trial 16 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:23:41,460][0m Trial 17 finished with value: 0.35377847752693725 and parameters: {'lstm_units_L1': 37, 'lstm_units_L2': 8, 'dropout_L1': 0.3483068219698509, 'dropout_L2': 0.09793879777529371, 'learning_rate': 0.008053775724621097}. Best is trial 3 with value: 0.5344231215147662.[0m


F1-Score: 0.35377847752693725
Duration: 94.66019368171692


[32m[I 2023-04-05 06:23:55,184][0m Trial 18 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:24:16,218][0m Trial 19 pruned. Trial was pruned at epoch 3.[0m
[32m[I 2023-04-05 06:25:03,755][0m Trial 20 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 06:25:49,192][0m Trial 21 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 06:26:36,558][0m Trial 22 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 06:27:01,959][0m Trial 23 pruned. Trial was pruned at epoch 3.[0m
[32m[I 2023-04-05 06:27:20,392][0m Trial 24 pruned. Trial was pruned at epoch 1.[0m


In [10]:
plot_optimization_history(study3)

In [11]:
plot_intermediate_values(study3)

In [12]:
plot_param_importances(study3)

In [13]:
print("Best trial:")
trial = study3.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Best trial:
  Value:  0.5344231215147662
  Params: 
    lstm_units_L1: 43
    lstm_units_L2: 7
    dropout_L1: 0.4409196142800319
    dropout_L2: 0.24465270409662043
    learning_rate: 0.006700108460756799


## Hyperband (Time)

In [14]:
#Fungsi Objektif
def objective_fun_time(trial):
  model=create_model(trial)

  start=time.time()
  history=model.fit(teks_latih_pad,label_latih,epochs=25,batch_size=16,callbacks=[TFKerasPruningCallback(trial, "val_loss")],
                    validation_data=(teks_validasi_pad,label_validasi),verbose=0)
  end=time.time()

  Label_pred=model.predict(teks_uji_pad)
  label_pred=Label_pred.argmax(axis=1)
  fscore=f1_score(label_uji, label_pred, average='macro')
  duration=end-start
  model.save('Hyperband (Model Training Time) Model-{}.h5'.format(trial.number))

  print("F1-Score:",fscore)
  print("Duration:",duration)

  return duration

In [15]:
study4=optuna.create_study(study_name="HB (Time)",
                          direction="minimize", 
                          #sampler=optuna.samplers.BaseSampler(), 
                          pruner=optuna.pruners.HyperbandPruner())
study4.optimize(objective_fun_time,n_trials=25)

[32m[I 2023-04-05 06:27:21,123][0m A new study created in memory with name: HB (Time)[0m
[32m[I 2023-04-05 06:28:57,945][0m Trial 0 finished with value: 94.70431709289551 and parameters: {'lstm_units_L1': 24, 'lstm_units_L2': 10, 'dropout_L1': 0.20902569696932932, 'dropout_L2': 0.24765636340515895, 'learning_rate': 0.00107035676816031}. Best is trial 0 with value: 94.70431709289551.[0m


F1-Score: 0.40938452954503496
Duration: 94.70431709289551


[32m[I 2023-04-05 06:30:34,143][0m Trial 1 finished with value: 93.36746597290039 and parameters: {'lstm_units_L1': 23, 'lstm_units_L2': 7, 'dropout_L1': 0.38133706820067537, 'dropout_L2': 0.34010691312508795, 'learning_rate': 0.009867156610579155}. Best is trial 1 with value: 93.36746597290039.[0m


F1-Score: 0.5022713676680401
Duration: 93.36746597290039


[32m[I 2023-04-05 06:32:12,228][0m Trial 2 finished with value: 96.0071051120758 and parameters: {'lstm_units_L1': 29, 'lstm_units_L2': 14, 'dropout_L1': 0.17114918277443697, 'dropout_L2': 0.33258013795135677, 'learning_rate': 0.0013607946613929944}. Best is trial 1 with value: 93.36746597290039.[0m


F1-Score: 0.4696009594523063
Duration: 96.0071051120758


[32m[I 2023-04-05 06:32:26,878][0m Trial 3 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:34:03,578][0m Trial 4 finished with value: 93.93247818946838 and parameters: {'lstm_units_L1': 18, 'lstm_units_L2': 9, 'dropout_L1': 0.22534401421102324, 'dropout_L2': 0.31122566067733565, 'learning_rate': 0.0064931375834277796}. Best is trial 1 with value: 93.36746597290039.[0m


F1-Score: 0.46368069185208505
Duration: 93.93247818946838


[32m[I 2023-04-05 06:35:39,060][0m Trial 5 finished with value: 93.38126730918884 and parameters: {'lstm_units_L1': 27, 'lstm_units_L2': 6, 'dropout_L1': 0.4552614439491725, 'dropout_L2': 0.3108827524691228, 'learning_rate': 0.00665241276018211}. Best is trial 1 with value: 93.36746597290039.[0m


F1-Score: 0.5251218868597312
Duration: 93.38126730918884


[32m[I 2023-04-05 06:35:53,095][0m Trial 6 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:36:36,825][0m Trial 7 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 06:36:50,590][0m Trial 8 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:37:04,811][0m Trial 9 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:37:18,595][0m Trial 10 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:37:34,500][0m Trial 11 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:37:57,374][0m Trial 12 pruned. Trial was pruned at epoch 3.[0m
[32m[I 2023-04-05 06:39:37,119][0m Trial 13 finished with value: 93.10871744155884 and parameters: {'lstm_units_L1': 21, 'lstm_units_L2': 8, 'dropout_L1': 0.17295053010239997, 'dropout_L2': 0.49213091242605994, 'learning_rate': 0.003730588213964443}. Best is trial 13 with value: 93.10871744155884.[0m


F1-Score: 0.5323912147338993
Duration: 93.10871744155884


[32m[I 2023-04-05 06:39:58,370][0m Trial 14 pruned. Trial was pruned at epoch 3.[0m
[32m[I 2023-04-05 06:40:17,321][0m Trial 15 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:41:00,877][0m Trial 16 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 06:41:21,746][0m Trial 17 pruned. Trial was pruned at epoch 3.[0m
[32m[I 2023-04-05 06:41:35,430][0m Trial 18 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:42:18,947][0m Trial 19 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 06:43:06,295][0m Trial 20 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 06:43:24,051][0m Trial 21 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:45:01,132][0m Trial 22 finished with value: 95.03678846359253 and parameters: {'lstm_units_L1': 22, 'lstm_units_L2': 11, 'dropout_L1': 0.487412691851696, 'dropout_L2': 0.3428081351042633, 'learning_rate': 0.005517468699365767}. Best is trial 13 with value: 93.10871744155884.[0m


F1-Score: 0.4988866853598977
Duration: 95.03678846359253


[32m[I 2023-04-05 06:46:36,310][0m Trial 23 finished with value: 92.408527135849 and parameters: {'lstm_units_L1': 21, 'lstm_units_L2': 7, 'dropout_L1': 0.04612179488717438, 'dropout_L2': 0.42131641419863686, 'learning_rate': 0.008944491017474214}. Best is trial 23 with value: 92.408527135849.[0m


F1-Score: 0.2480213013572766
Duration: 92.408527135849


[32m[I 2023-04-05 06:47:16,496][0m Trial 24 pruned. Trial was pruned at epoch 9.[0m


In [16]:
plot_optimization_history(study4)

In [17]:
plot_intermediate_values(study4)

In [18]:
plot_param_importances(study4)

In [19]:
print("Best trial:")
trial = study4.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Best trial:
  Value:  92.408527135849
  Params: 
    lstm_units_L1: 21
    lstm_units_L2: 7
    dropout_L1: 0.04612179488717438
    dropout_L2: 0.42131641419863686
    learning_rate: 0.008944491017474214
