<a href="https://colab.research.google.com/github/naufalhawari/gemastik-data-minik-esteh/blob/main/esteh-juara-ver1ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Creating Dataset

## Installing Library Needs

In [2]:
!pip install tensorflow_text

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow_text
  Downloading tensorflow_text-2.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.0/6.0 MB[0m [31m90.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorflow_text
Successfully installed tensorflow_text-2.12.1


In [3]:
!pip install sastrawi

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sastrawi
  Downloading Sastrawi-1.0.1-py2.py3-none-any.whl (209 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.7/209.7 kB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sastrawi
Successfully installed sastrawi-1.0.1


## Import Libraries

In [4]:
import pandas as pd
import numpy as np

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text

import seaborn as sns
import matplotlib.pyplot as plt

pd.set_option("display.max.columns", 50)

## Retrieve and Preprocess Dataset

In [5]:
# file_path = "/content/drive/MyDrive/dataset/esteh-dataset.xlsx"
file_path = "https://raw.githubusercontent.com/naufalhawari/dataset-collection/main/esteh-dataset-1728.csv"


In [6]:
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

def stemming(teks):
  stemmerFactory = StemmerFactory().create_stemmer()

  return stemmerFactory.stem(teks)


def create_dataset(file_path, sheet_names = None) :

  # reading from csv through github
  dataset = pd.read_csv(file_path)
  
  dataset = dataset.copy().dropna() # dropping records if there is any missing values
  dataset["kritik"] = dataset[["keluhan", "saran"]].max(axis = 1)
  dataset = dataset.drop(["saran", "keluhan"], axis = 1)
  dataset["ulasan"] = dataset["ulasan"].str.lower()
  dataset["ulasan"] = dataset["ulasan"].apply(lambda x: stemming(x))
  dataset = dataset.loc[~dataset.duplicated()].copy() # removing duplicated record

  return dataset

In [7]:
# create new dataset
# df = create_dataset(file_path)

# import existing dataset
# df = pd.read_csv(file_path)
# df = df.loc[~df.duplicated()].copy()

# import existing splitted dataset
train = pd.read_csv("https://raw.githubusercontent.com/naufalhawari/gemastik-data-minik-esteh/main/esteh-train.csv")
test = pd.read_csv("https://raw.githubusercontent.com/naufalhawari/gemastik-data-minik-esteh/main/esteh-test.csv")

In [8]:
train.head()

Unnamed: 0,apresiasi,kritik,ulasan
0,0,1,dari cara masuk login nya eror terus di ulang ...
1,0,1,telah isi clm lalu aplikasi ini belum sempat s...
2,1,0,mantap sekali aplikasi bisa ngurus surat tanpa...
3,0,1,menu jobfair kaga bisa buka penuh tuh server h...
4,1,1,udah 4 bulan pake lumayan juga buat cari infor...


In [9]:
train.duplicated().sum(), test.duplicated().sum()

(0, 0)

In [10]:
len(train), len(test)

(1348, 337)

## Splitting Dataset

In [11]:
# create new splitted data

# from sklearn.model_selection import train_test_split

# X = df.ulasan
# y = df.drop("ulasan", axis = 1)

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, stratify = df.apresiasi)

# train = y_train.copy()
# test = y_test.copy()
    
# train["ulasan"] = X_train
# test["ulasan"] = X_test


In [12]:
# save splitted data

# train.to_csv("esteh-train.csv", index = False)
# test.to_csv("esteh-test.csv", index = False)

In [13]:
# split on existing train and test dataset

X_train = train["ulasan"]
X_test = test["ulasan"]
y_train = train.drop("ulasan", axis = 1)
y_test = test.drop("ulasan", axis = 1)

# RNN Model

## Build Text Tokenizer

In [54]:
VOCAB_SIZE = 1200
encoder = tf.keras.layers.TextVectorization(
    max_tokens=VOCAB_SIZE)
encoder.adapt(X_train)

In [55]:
len(encoder.get_vocabulary())

1200

## Creating Model Architecture

In [56]:
lstm = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(), dtype = "string"),
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=100,
        # Use masking to handle the variable sequence lengths
        mask_zero=True),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(2, activation='sigmoid'),
])

bi_lstm = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(), dtype = "string"),
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=100,
        # Use masking to handle the variable sequence lengths
        mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(2, activation='sigmoid'),
])

stacked_bi_lstm = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(), dtype = "string"),
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=100,
        # Use masking to handle the variable sequence lengths
        mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences = True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(2, activation='sigmoid'),
])

gru = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(), dtype = "string"),
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=100,
        # Use masking to handle the variable sequence lengths
        mask_zero=True),
    tf.keras.layers.GRU(64),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(2, activation='sigmoid'),
])

bi_gru= tf.keras.Sequential([
    tf.keras.layers.Input(shape=(), dtype = "string"),
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=100,
        # Use masking to handle the variable sequence lengths
        mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(2, activation='sigmoid'),
])

stacked_bi_gru = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(), dtype = "string"),
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=100,
        # Use masking to handle the variable sequence lengths
        mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64, return_sequences = True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(2, activation='sigmoid'),
])

## Model Compiling

In [57]:
lstm.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.00003),
    loss = tf.keras.losses.BinaryCrossentropy(),
    metrics = tf.keras.metrics.BinaryAccuracy(),
)


bi_lstm.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.00003),
    loss = tf.keras.losses.BinaryCrossentropy(),
    metrics = tf.keras.metrics.BinaryAccuracy(),
)



stacked_bi_lstm.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.00003),
    loss = tf.keras.losses.BinaryCrossentropy(),
    metrics = tf.keras.metrics.BinaryAccuracy(),
)



gru.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.00003),
    loss = tf.keras.losses.BinaryCrossentropy(),
    metrics = tf.keras.metrics.BinaryAccuracy(),
)



bi_gru.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.00003),
    loss = tf.keras.losses.BinaryCrossentropy(),
    metrics = tf.keras.metrics.BinaryAccuracy(),
)


stacked_bi_gru.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.00003),
    loss = tf.keras.losses.BinaryCrossentropy(),
    metrics = tf.keras.metrics.BinaryAccuracy(),
)


## Model Training

In [58]:
lstm.fit(
    x = X_train,
    y = y_train,
    batch_size = 16,
    validation_data = (X_test, y_test),
    epochs = 25,
)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x7f3b202806d0>

In [59]:
bi_lstm.fit(
    x = X_train,
    y = y_train,
    batch_size = 16,
    validation_data = (X_test, y_test),
    epochs = 25,
)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x7f3aa10942e0>

In [60]:
stacked_bi_lstm.fit(
    x = X_train,
    y = y_train,
    batch_size = 16,
    validation_data = (X_test, y_test),
    epochs = 25,
)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x7f3a86e48970>

In [61]:
gru.fit(
    x = X_train,
    y = y_train,
    batch_size = 16,
    validation_data = (X_test, y_test),
    epochs = 25,
)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x7f3a501a43d0>

In [62]:
bi_gru.fit(
    x = X_train,
    y = y_train,
    batch_size = 16,
    validation_data = (X_test, y_test),
    epochs = 25,
)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x7f3a48572da0>

In [63]:
stacked_bi_gru.fit(
    x = X_train,
    y = y_train,
    batch_size = 16,
    validation_data = (X_test, y_test),
    epochs = 25,
)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x7f3a38705e40>

## Model Evaluation

In [64]:
from sklearn.metrics import classification_report

class_labels = ["apresiasi", "kritik"]

y_pred = lstm.predict(X_test) > 0.5
print(classification_report(y_test, y_pred, target_names=class_labels))

              precision    recall  f1-score   support

   apresiasi       0.97      0.67      0.79       123
      kritik       0.95      0.92      0.93       262

   micro avg       0.95      0.84      0.89       385
   macro avg       0.96      0.80      0.86       385
weighted avg       0.95      0.84      0.89       385
 samples avg       0.96      0.89      0.91       385



In [65]:
y_pred = bi_lstm.predict(X_test) > 0.5
print(classification_report(y_test, y_pred, target_names=class_labels))

              precision    recall  f1-score   support

   apresiasi       0.92      0.64      0.76       123
      kritik       0.94      0.94      0.94       262

   micro avg       0.94      0.84      0.89       385
   macro avg       0.93      0.79      0.85       385
weighted avg       0.93      0.84      0.88       385
 samples avg       0.94      0.89      0.90       385



In [66]:
y_pred = stacked_bi_lstm.predict(X_test) > 0.5
print(classification_report(y_test, y_pred, target_names=class_labels))

              precision    recall  f1-score   support

   apresiasi       0.94      0.76      0.84       123
      kritik       0.96      0.93      0.94       262

   micro avg       0.95      0.88      0.91       385
   macro avg       0.95      0.85      0.89       385
weighted avg       0.95      0.88      0.91       385
 samples avg       0.96      0.91      0.92       385



In [67]:
y_pred = gru.predict(X_test) > 0.5
print(classification_report(y_test, y_pred, target_names=class_labels))

              precision    recall  f1-score   support

   apresiasi       0.86      0.59      0.70       123
      kritik       0.94      0.93      0.93       262

   micro avg       0.92      0.82      0.87       385
   macro avg       0.90      0.76      0.82       385
weighted avg       0.92      0.82      0.86       385
 samples avg       0.93      0.86      0.88       385



In [68]:
y_pred = bi_gru.predict(X_test) > 0.5
print(classification_report(y_test, y_pred, target_names=class_labels))

              precision    recall  f1-score   support

   apresiasi       0.94      0.74      0.83       123
      kritik       0.93      0.96      0.95       262

   micro avg       0.93      0.89      0.91       385
   macro avg       0.94      0.85      0.89       385
weighted avg       0.93      0.89      0.91       385
 samples avg       0.95      0.92      0.93       385



In [69]:
y_pred = stacked_bi_gru.predict(X_test) > 0.5
print(classification_report(y_test, y_pred, target_names=class_labels))

              precision    recall  f1-score   support

   apresiasi       0.93      0.82      0.87       123
      kritik       0.95      0.95      0.95       262

   micro avg       0.95      0.91      0.93       385
   macro avg       0.94      0.89      0.91       385
weighted avg       0.95      0.91      0.93       385
 samples avg       0.96      0.94      0.94       385



## Save Model

In [70]:
# lstm.save("lstm-1")
# bi_lstm.save("bi-lstm-1")
# stacked_bi_lstm.save("stacked-bi-lstm-1")
# gru.save("gru-1")
# bi_gru.save("bi-gru-1")
# stacked_bi_gru.save("stacked-bi-gru-1")

# BERT Model

## Mapping BERT Preprocesser and Encoder

In [14]:
map_name_to_handle = {
    'bert_en_uncased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3',
    'bert_en_cased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_cased_L-12_H-768_A-12/3',
    'bert_multi_cased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/3',
    'small_bert/bert_en_uncased_L-2_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-2_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-2_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-2_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-4_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-4_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-4_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-4_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-6_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-6_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-6_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-6_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-8_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-8_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-8_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-8_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-10_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-10_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-10_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-10_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-12_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-12_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-12_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-768_A-12/1',
    'albert_en_base':
        'https://tfhub.dev/tensorflow/albert_en_base/2',
    'electra_small':
        'https://tfhub.dev/google/electra_small/2',
    'electra_base':
        'https://tfhub.dev/google/electra_base/2',
    'experts_pubmed':
        'https://tfhub.dev/google/experts/bert/pubmed/2',
    'experts_wiki_books':
        'https://tfhub.dev/google/experts/bert/wiki_books/2',
    'talking-heads_base':
        'https://tfhub.dev/tensorflow/talkheads_ggelu_bert_en_base/1',
}

map_model_to_preprocess = {
    'bert_en_uncased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'bert_en_cased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_cased_preprocess/3',
    'small_bert/bert_en_uncased_L-2_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-2_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-2_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-2_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-4_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-4_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-4_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-4_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-6_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-6_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-6_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-6_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-8_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-8_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-8_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-8_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-10_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-10_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-10_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-10_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-12_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-12_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-12_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'small_bert/bert_en_uncased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'bert_multi_cased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_multi_cased_preprocess/3',
    'albert_en_base':
        'https://tfhub.dev/tensorflow/albert_en_preprocess/3',
    'electra_small':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'electra_base':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'experts_pubmed':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'experts_wiki_books':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
    'talking-heads_base':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
}

## Build Model Architecture

In [15]:
def build_bert_model(bert_model_name: str):
  tfhub_handle_encoder = map_name_to_handle[bert_model_name]
  tfhub_handle_preprocess = map_model_to_preprocess[bert_model_name]

  text_input = tf.keras.layers.Input(shape=(), dtype="string", name='text')
  preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')
  encoder_inputs = preprocessing_layer(text_input)
  encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')
  outputs = encoder(encoder_inputs)
  net = outputs['pooled_output']
  net = tf.keras.layers.Dropout(0.2)(net)
  net = tf.keras.layers.Dense(2, activation='sigmoid', name='classifier')(net)
  return tf.keras.Model(text_input, net)

In [16]:
bert_uncased_model = build_bert_model('bert_en_uncased_L-12_H-768_A-12')
small_bert_model = build_bert_model('small_bert/bert_en_uncased_L-4_H-512_A-8')
albert_model = build_bert_model('albert_en_base')
electra_model = build_bert_model('electra_base')

## Model Compiling

In [17]:
bert_uncased_model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.00003),
    loss = tf.keras.losses.BinaryCrossentropy(),
    metrics = tf.keras.metrics.BinaryAccuracy(),
)

small_bert_model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.00003),
    loss = tf.keras.losses.BinaryCrossentropy(),
    metrics = tf.keras.metrics.BinaryAccuracy(),
)

albert_model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.00003),
    loss = tf.keras.losses.BinaryCrossentropy(),
    metrics = tf.keras.metrics.BinaryAccuracy(),
)

electra_model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.00003),
    loss = tf.keras.losses.BinaryCrossentropy(),
    metrics = tf.keras.metrics.BinaryAccuracy(),
)

## Model Training

In [18]:
bert_uncased_model.fit(
    x = X_train,
    y = y_train,
    batch_size = 16,
    validation_data = (X_test, y_test),
    epochs = 3,
)


Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f4841cd7a60>

In [19]:
small_bert_model.fit(
    x = X_train,
    y = y_train,
    batch_size = 16,
    validation_data = (X_test, y_test),
    epochs = 3,
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f483cfdcd30>

In [20]:
albert_model.fit(
    x = X_train,
    y = y_train,
    batch_size = 16,
    validation_data = (X_test, y_test),
    epochs = 3,
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f47f85248b0>

In [21]:
electra_model.fit(
    x = X_train,
    y = y_train,
    batch_size = 16,
    validation_data = (X_test, y_test),
    epochs = 3,
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f47e83f6cb0>

## Model Evaluation

In [22]:
from sklearn.metrics import classification_report

class_labels = ["apresiasi", "kritik"]

y_pred = bert_uncased_model.predict(X_test) > 0.5
print(classification_report(y_test, y_pred, target_names=class_labels))

              precision    recall  f1-score   support

   apresiasi       0.88      0.84      0.86       123
      kritik       0.94      0.96      0.95       262

   micro avg       0.92      0.92      0.92       385
   macro avg       0.91      0.90      0.90       385
weighted avg       0.92      0.92      0.92       385
 samples avg       0.94      0.94      0.93       385



  _warn_prf(average, modifier, msg_start, len(result))


In [23]:
y_pred = small_bert_model.predict(X_test) > 0.5
print(classification_report(y_test, y_pred, target_names=class_labels))

              precision    recall  f1-score   support

   apresiasi       0.83      0.83      0.83       123
      kritik       0.95      0.92      0.94       262

   micro avg       0.91      0.89      0.90       385
   macro avg       0.89      0.88      0.88       385
weighted avg       0.91      0.89      0.90       385
 samples avg       0.93      0.92      0.91       385



In [24]:
y_pred = albert_model.predict(X_test) > 0.5
print(classification_report(y_test, y_pred, target_names=class_labels))

              precision    recall  f1-score   support

   apresiasi       0.84      0.33      0.48       123
      kritik       0.87      0.95      0.91       262

   micro avg       0.86      0.76      0.80       385
   macro avg       0.85      0.64      0.69       385
weighted avg       0.86      0.76      0.77       385
 samples avg       0.86      0.79      0.81       385



In [25]:
y_pred = electra_model.predict(X_test) > 0.5
print(classification_report(y_test, y_pred, target_names=class_labels))

              precision    recall  f1-score   support

   apresiasi       0.91      0.70      0.79       123
      kritik       0.93      0.98      0.96       262

   micro avg       0.93      0.89      0.91       385
   macro avg       0.92      0.84      0.87       385
weighted avg       0.93      0.89      0.90       385
 samples avg       0.95      0.92      0.92       385



## Save Model

In [26]:
# bert_uncased_model.save("bert-1")
# small_bert_model.save("small-bert-1")
# albert_model.save("albert-1")
# electra_model.save("electra-1")