# Data and Library Importing

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.metrics import confusion_matrix,classification_report,f1_score

import time
import optuna
from optuna.integration import TFKerasPruningCallback
from optuna.visualization import plot_intermediate_values
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_contour
from optuna.visualization import plot_pareto_front

from tensorflow.keras import Sequential
from tensorflow.keras.backend import clear_session
from tensorflow.keras.layers import Input,Dense,concatenate,Dropout,Conv1D,Bidirectional,Embedding,LSTM,SimpleRNN,GlobalAveragePooling1D
from tensorflow.keras.models import Model,load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.metrics import CategoricalCrossentropy
from tensorflow.random import set_seed

In [2]:
set_seed(123)
np.random.seed(123)

In [3]:
label_latih=np.load('../input/smsa-dataset/label_latih.npy', allow_pickle=True)
label_validasi=np.load('../input/smsa-dataset/label_validasi.npy', allow_pickle=True)
label_uji=np.load('../input/smsa-dataset/label_uji.npy', allow_pickle=True)

In [4]:
embedding_dim = 300
max_length = 96
trunc_type = 'post'
padding_type = 'post'
oov_tok = "<OOV>"

In [5]:
teks_latih_pad=np.load('../input/smsa-dataset/teks_latih_pad.npy', allow_pickle=True)
teks_latih=np.load('../input/smsa-dataset/teks_latih.npy', allow_pickle=True)
teks_validasi_pad=np.load('../input/smsa-dataset/teks_validasi_pad.npy', allow_pickle=True)
teks_validasi=np.load('../input/smsa-dataset/teks_validasi.npy', allow_pickle=True)
teks_uji_pad=np.load('../input/smsa-dataset/teks_uji_pad.npy', allow_pickle=True)
teks_uji=np.load('../input/smsa-dataset/teks_uji.npy', allow_pickle=True)

# Modelling

In [6]:
jumlah_token=17274

In [7]:
def create_model(trial):
  lstm_units=np.zeros(2, dtype=int)
  lstm_units[0]=trial.suggest_int("lstm_units_L1", 32, 160)
  lstm_units[1]=trial.suggest_int("lstm_units_L2", 16, 80)
  do1=trial.suggest_float("dropout_L1",0,0.5)
  do2=trial.suggest_float("dropout_L2",0,0.5)
  lr=trial.suggest_float("learning_rate",1e-5,1e-2)

  inputs=Input(shape=(max_length,))
  layers=Embedding(jumlah_token, embedding_dim,input_length=max_length,trainable=False)(inputs)
  layers=Bidirectional(LSTM(lstm_units[0], return_sequences=True))(layers)
  layers=Dropout(do1)(layers)
  layers=Bidirectional(LSTM(lstm_units[1]))(layers)
  layers=Dropout(do2)(layers)
  layers=Dense(32, activation='relu')(layers)
  output=Dense(3, activation='softmax')(layers)

  model=Model(inputs=[inputs],outputs=[output])

  model.compile(optimizer=Adam(learning_rate=lr),loss="sparse_categorical_crossentropy",metrics=["accuracy"])

  return model

## Hyperband (F1-score)

In [8]:
#Fungsi Objektif
def objective_fun_f1(trial):
  model=create_model(trial)

  start=time.time()
  history=model.fit(teks_latih_pad,label_latih,epochs=25,batch_size=16,callbacks=[TFKerasPruningCallback(trial, "val_loss")],
                    validation_data=(teks_validasi_pad,label_validasi),verbose=0)
  end=time.time()

  Label_pred=model.predict(teks_uji_pad)
  label_pred=Label_pred.argmax(axis=1)
  fscore=f1_score(label_uji, label_pred, average='macro')
  duration=end-start
  model.save('Hyperband (F1-score) Model-{}.h5'.format(trial.number))

  print("F1-Score:",fscore)
  print("Duration:",duration)

  return fscore

In [9]:
study3=optuna.create_study(study_name="HB (F1-score)",
                          direction="maximize", 
                          #sampler=optuna.samplers.BaseSampler(), 
                          pruner=optuna.pruners.HyperbandPruner())
study3.optimize(objective_fun_f1,n_trials=25)

[32m[I 2023-04-05 06:07:34,818][0m A new study created in memory with name: HB (F1-score)[0m
[32m[I 2023-04-05 06:13:39,328][0m Trial 0 finished with value: 0.6324875458599432 and parameters: {'lstm_units_L1': 119, 'lstm_units_L2': 37, 'dropout_L1': 0.2899014726903366, 'dropout_L2': 0.005723234385587472, 'learning_rate': 0.0038230092490018066}. Best is trial 0 with value: 0.6324875458599432.[0m


F1-Score: 0.6324875458599432
Duration: 358.29896664619446


[32m[I 2023-04-05 06:19:30,862][0m Trial 1 finished with value: 0.679148959619439 and parameters: {'lstm_units_L1': 117, 'lstm_units_L2': 30, 'dropout_L1': 0.25070459437642734, 'dropout_L2': 0.24738466690328859, 'learning_rate': 0.009787945966645766}. Best is trial 1 with value: 0.679148959619439.[0m


F1-Score: 0.679148959619439
Duration: 349.3953528404236


[32m[I 2023-04-05 06:20:06,386][0m Trial 2 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:20:38,347][0m Trial 3 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:21:24,792][0m Trial 4 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:27:45,543][0m Trial 5 finished with value: 0.6608183308218997 and parameters: {'lstm_units_L1': 88, 'lstm_units_L2': 73, 'dropout_L1': 0.09872879349573105, 'dropout_L2': 0.10694595972296822, 'learning_rate': 0.005989602145993093}. Best is trial 1 with value: 0.679148959619439.[0m


F1-Score: 0.6608183308218997
Duration: 364.642605304718


[32m[I 2023-04-05 06:33:42,876][0m Trial 6 finished with value: 0.19585687382297554 and parameters: {'lstm_units_L1': 147, 'lstm_units_L2': 73, 'dropout_L1': 0.27429298449901546, 'dropout_L2': 0.2092104544427567, 'learning_rate': 0.006999564603980996}. Best is trial 1 with value: 0.679148959619439.[0m


F1-Score: 0.19585687382297554
Duration: 354.5491921901703


[32m[I 2023-04-05 06:34:47,737][0m Trial 7 pruned. Trial was pruned at epoch 3.[0m
[32m[I 2023-04-05 06:35:23,952][0m Trial 8 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:35:54,040][0m Trial 9 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:42:23,117][0m Trial 10 finished with value: 0.5585914082173976 and parameters: {'lstm_units_L1': 111, 'lstm_units_L2': 49, 'dropout_L1': 0.02624092524568772, 'dropout_L2': 0.13220993566533112, 'learning_rate': 0.0004432596236695178}. Best is trial 1 with value: 0.679148959619439.[0m


F1-Score: 0.5585914082173976
Duration: 386.1034576892853


[32m[I 2023-04-05 06:42:55,857][0m Trial 11 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:45:38,657][0m Trial 12 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 06:46:36,748][0m Trial 13 pruned. Trial was pruned at epoch 3.[0m
[32m[I 2023-04-05 06:49:29,009][0m Trial 14 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 06:50:23,394][0m Trial 15 pruned. Trial was pruned at epoch 3.[0m
[32m[I 2023-04-05 06:50:56,499][0m Trial 16 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:51:55,613][0m Trial 17 pruned. Trial was pruned at epoch 3.[0m
[32m[I 2023-04-05 06:54:50,106][0m Trial 18 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 06:55:22,593][0m Trial 19 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 06:56:34,313][0m Trial 20 pruned. Trial was pruned at epoch 3.[0m
[32m[I 2023-04-05 06:58:47,966][0m Trial 21 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 06:59:21,826][0m Trial 

In [10]:
plot_optimization_history(study3)

In [11]:
plot_intermediate_values(study3)

In [12]:
plot_param_importances(study3)

In [13]:
print("Best trial:")
trial = study3.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Best trial:
  Value:  0.679148959619439
  Params: 
    lstm_units_L1: 117
    lstm_units_L2: 30
    dropout_L1: 0.25070459437642734
    dropout_L2: 0.24738466690328859
    learning_rate: 0.009787945966645766


## Hyperband (Time)

In [14]:
#Fungsi Objektif
def objective_fun_time(trial):
  model=create_model(trial)

  start=time.time()
  history=model.fit(teks_latih_pad,label_latih,epochs=25,batch_size=16,callbacks=[TFKerasPruningCallback(trial, "val_loss")],
                    validation_data=(teks_validasi_pad,label_validasi),verbose=0)
  end=time.time()

  Label_pred=model.predict(teks_uji_pad)
  label_pred=Label_pred.argmax(axis=1)
  fscore=f1_score(label_uji, label_pred, average='macro')
  duration=end-start
  model.save('Hyperband (Time) Model-{}.h5'.format(trial.number))

  print("F1-Score:",fscore)
  print("Duration:",duration)

  return duration

In [15]:
study4=optuna.create_study(study_name="HB (Time)",
                          direction="minimize", 
                          #sampler=optuna.samplers.BaseSampler(), 
                          pruner=optuna.pruners.HyperbandPruner())
study4.optimize(objective_fun_time,n_trials=25)

[32m[I 2023-04-05 07:02:48,376][0m A new study created in memory with name: HB (Time)[0m
[32m[I 2023-04-05 07:08:04,069][0m Trial 0 finished with value: 313.6685757637024 and parameters: {'lstm_units_L1': 39, 'lstm_units_L2': 66, 'dropout_L1': 0.25281743574250065, 'dropout_L2': 0.04646965700562372, 'learning_rate': 0.008016087948203921}. Best is trial 0 with value: 313.6685757637024.[0m


F1-Score: 0.7317743669595521
Duration: 313.6685757637024


[32m[I 2023-04-05 07:14:00,840][0m Trial 1 finished with value: 353.58001828193665 and parameters: {'lstm_units_L1': 157, 'lstm_units_L2': 77, 'dropout_L1': 0.04644126783526337, 'dropout_L2': 0.09132818216304905, 'learning_rate': 0.006128135929362766}. Best is trial 0 with value: 313.6685757637024.[0m


F1-Score: 0.6363493481312266
Duration: 353.58001828193665


[32m[I 2023-04-05 07:19:48,725][0m Trial 2 finished with value: 345.8484230041504 and parameters: {'lstm_units_L1': 121, 'lstm_units_L2': 65, 'dropout_L1': 0.1316239997978899, 'dropout_L2': 0.3962471304160229, 'learning_rate': 0.005716592806872711}. Best is trial 0 with value: 313.6685757637024.[0m


F1-Score: 0.6583703703703704
Duration: 345.8484230041504


[32m[I 2023-04-05 07:21:57,812][0m Trial 3 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 07:22:46,897][0m Trial 4 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 07:23:35,572][0m Trial 5 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 07:24:07,174][0m Trial 6 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 07:30:15,091][0m Trial 7 finished with value: 350.54929852485657 and parameters: {'lstm_units_L1': 90, 'lstm_units_L2': 68, 'dropout_L1': 0.09325428080755033, 'dropout_L2': 0.1753261631746459, 'learning_rate': 0.005157634178171709}. Best is trial 0 with value: 313.6685757637024.[0m


F1-Score: 0.6367487482196085
Duration: 350.54929852485657


[32m[I 2023-04-05 07:32:34,252][0m Trial 8 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 07:34:45,525][0m Trial 9 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 07:35:43,890][0m Trial 10 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 07:36:16,590][0m Trial 11 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 07:37:03,907][0m Trial 12 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 07:39:30,422][0m Trial 13 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 07:40:03,650][0m Trial 14 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 07:42:37,176][0m Trial 15 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 07:44:55,046][0m Trial 16 pruned. Trial was pruned at epoch 9.[0m
[32m[I 2023-04-05 07:45:27,571][0m Trial 17 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 07:46:01,891][0m Trial 18 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 07:47:13,716][0m Trial 19

F1-Score: 0.7007624237965383
Duration: 296.7433440685272


[32m[I 2023-04-05 07:53:14,789][0m Trial 21 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 07:53:58,222][0m Trial 22 pruned. Trial was pruned at epoch 1.[0m
[32m[I 2023-04-05 07:59:37,808][0m Trial 23 finished with value: 337.58162784576416 and parameters: {'lstm_units_L1': 101, 'lstm_units_L2': 53, 'dropout_L1': 0.4327065444428516, 'dropout_L2': 0.33468506960997435, 'learning_rate': 0.00414030215307238}. Best is trial 20 with value: 296.7433440685272.[0m


F1-Score: 0.6670861057622034
Duration: 337.58162784576416


[32m[I 2023-04-05 08:00:37,198][0m Trial 24 pruned. Trial was pruned at epoch 3.[0m


In [16]:
plot_optimization_history(study4)

In [17]:
plot_intermediate_values(study4)

In [18]:
plot_param_importances(study4)

In [19]:
print("Best trial:")
trial = study4.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Best trial:
  Value:  296.7433440685272
  Params: 
    lstm_units_L1: 34
    lstm_units_L2: 17
    dropout_L1: 0.45531966513431504
    dropout_L2: 0.0556451174810777
    learning_rate: 0.007307320674875555
