In [1]:
import sys
!{sys.executable} -m pip install --upgrade pip --quiet
!{sys.executable} -m pip install torch torchvision torchtext --quiet
!{sys.executable} -m pip install tensorflow --quiet
!{sys.executable} -m pip install scikit-learn --quiet
!{sys.executable} -m pip install matplotlib --quiet

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import matplotlib.pyplot as plt
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score,
                             roc_curve, auc)

print('Library berhasil diimport.')


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m24.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m49.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m53.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m73.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m33.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m52.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m64.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
num_words = 30000
maxlen = 300

(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=num_words)
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=maxlen)

print('Train shape:', x_train.shape, 'Label train:', y_train.shape)
print('Test shape :', x_test.shape, 'Label test :', y_test.shape)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Train shape: (25000, 300) Label train: (25000,)
Test shape : (25000, 300) Label test : (25000,)


In [3]:
class IMDBDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.LongTensor(X)
        self.y = torch.LongTensor(y)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = IMDBDataset(x_train, y_train)
test_dataset = IMDBDataset(x_test, y_test)

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

class RNNModel(nn.Module):
    def __init__(self, vocab_size, embed_dim=128, hidden_dim=128, output_dim=1):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.rnn = nn.RNN(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    def forward(self, x):
        x = self.embedding(x)
        out, _ = self.rnn(x)
        out = out[:, -1, :]
        return torch.sigmoid(self.fc(out))

class LSTMModel(nn.Module):
    def __init__(self, vocab_size, embed_dim=128, hidden_dim=128, output_dim=1):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    def forward(self, x):
        x = self.embedding(x)
        out, (h, c) = self.lstm(x)
        out = out[:, -1, :]
        return torch.sigmoid(self.fc(out))

class GRUModel(nn.Module):
    def __init__(self, vocab_size, embed_dim=128, hidden_dim=128, output_dim=1):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.gru = nn.GRU(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    def forward(self, x):
        x = self.embedding(x)
        out, _ = self.gru(x)
        out = out[:, -1, :]
        return torch.sigmoid(self.fc(out))

print('PyTorch models (RNN, LSTM, GRU) defined.')

PyTorch models (RNN, LSTM, GRU) defined.


In [4]:
def train_pytorch_model(model, train_loader, test_loader, epochs=3, lr=0.001, device='cpu'):
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.to(device)
    train_losses, test_losses = [], []

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device).float()
            optimizer.zero_grad()
            outputs = model(X_batch).squeeze()
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        train_loss = running_loss / len(train_loader)
        train_losses.append(train_loss)

        model.eval()
        test_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device).float()
                outputs = model(X_batch).squeeze()
                loss = criterion(outputs, y_batch)
                test_loss += loss.item()
        test_loss = test_loss / len(test_loader)
        test_losses.append(test_loss)
        print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}")

    return train_losses, test_losses

print('PyTorch training function defined.')

PyTorch training function defined.


In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using device:', device)

# RNN
pytorch_rnn = RNNModel(vocab_size=num_words)
rnn_train_losses, rnn_test_losses = train_pytorch_model(
    pytorch_rnn, train_loader, test_loader, epochs=3, lr=0.001, device=device
)

# LSTM
pytorch_lstm = LSTMModel(vocab_size=num_words)
lstm_train_losses, lstm_test_losses = train_pytorch_model(
    pytorch_lstm, train_loader, test_loader, epochs=3, lr=0.001, device=device
)

# GRU
pytorch_gru = GRUModel(vocab_size=num_words)
gru_train_losses, gru_test_losses = train_pytorch_model(
    pytorch_gru, train_loader, test_loader, epochs=3, lr=0.001, device=device
)

Using device: cuda
Epoch [1/3], Train Loss: 0.6492, Test Loss: 0.6132
Epoch [2/3], Train Loss: 0.5799, Test Loss: 0.6173
Epoch [3/3], Train Loss: 0.5005, Test Loss: 0.5468
Epoch [1/3], Train Loss: 0.5788, Test Loss: 0.5640
Epoch [2/3], Train Loss: 0.4706, Test Loss: 0.4461
Epoch [3/3], Train Loss: 0.3716, Test Loss: 0.4616
Epoch [1/3], Train Loss: 0.5656, Test Loss: 0.6591
Epoch [2/3], Train Loss: 0.4616, Test Loss: 0.4246
Epoch [3/3], Train Loss: 0.2630, Test Loss: 0.3403


In [6]:
def create_tf_rnn_model(vocab_size, embed_dim=128, hidden_dim=128):
    model = keras.Sequential([
        layers.Embedding(vocab_size, embed_dim, input_length=maxlen),
        layers.SimpleRNN(hidden_dim, return_sequences=False),
        layers.Dense(1, activation='sigmoid')
    ])
    model.compile(
        loss='binary_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )
    return model

def create_tf_lstm_model(vocab_size, embed_dim=128, hidden_dim=128):
    model = keras.Sequential([
        layers.Embedding(vocab_size, embed_dim, input_length=maxlen),
        layers.LSTM(hidden_dim, return_sequences=False),
        layers.Dense(1, activation='sigmoid')
    ])
    model.compile(
        loss='binary_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )
    return model

def create_tf_gru_model(vocab_size, embed_dim=128, hidden_dim=128):
    model = keras.Sequential([
        layers.Embedding(vocab_size, embed_dim, input_length=maxlen),
        layers.GRU(hidden_dim, return_sequences=False),
        layers.Dense(1, activation='sigmoid')
    ])
    model.compile(
        loss='binary_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )
    return model

print('TensorFlow (Keras) model builders defined.')

TensorFlow (Keras) model builders defined.


In [7]:
tf_rnn = create_tf_rnn_model(num_words)
history_rnn = tf_rnn.fit(
    x_train, y_train,
    epochs=3,
    batch_size=64,
    validation_data=(x_test, y_test)
)

tf_lstm = create_tf_lstm_model(num_words)
history_lstm = tf_lstm.fit(
    x_train, y_train,
    epochs=3,
    batch_size=64,
    validation_data=(x_test, y_test)
)

tf_gru = create_tf_gru_model(num_words)
history_gru = tf_gru.fit(
    x_train, y_train,
    epochs=3,
    batch_size=64,
    validation_data=(x_test, y_test)
)



Epoch 1/3


FailedPreconditionError: Graph execution error:

Detected at node StatefulPartitionedCall defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelapp.py", line 712, in start

  File "/usr/local/lib/python3.11/dist-packages/tornado/platform/asyncio.py", line 205, in start

  File "/usr/lib/python3.11/asyncio/base_events.py", line 608, in run_forever

  File "/usr/lib/python3.11/asyncio/base_events.py", line 1936, in _run_once

  File "/usr/lib/python3.11/asyncio/events.py", line 84, in _run

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 499, in process_one

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 730, in execute_request

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/ipkernel.py", line 383, in do_execute

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/zmqshell.py", line 528, in run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "<ipython-input-7-0829199000e2>", line 2, in <cell line: 0>

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 371, in fit

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 219, in function

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 132, in multi_step_on_iterator

DNN library initialization failed. Look at the errors above for more details.
	 [[{{node StatefulPartitionedCall}}]] [Op:__inference_multi_step_on_iterator_2321]

In [None]:
# Fungsi evaluasi PyTorch
def evaluate_pytorch(model, data_loader, device='cpu'):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for X_batch, y_batch in data_loader:
            X_batch = X_batch.to(device)
            preds = model(X_batch).squeeze().cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(y_batch.numpy())
    return np.array(all_preds), np.array(all_labels)

# Fungsi evaluasi TF
def evaluate_tf(model, X, y):
    preds = model.predict(X).ravel()
    return preds, y

# Fungsi perhitungan metrik
def compute_metrics(probs, labels, threshold=0.5):
    preds = (probs >= threshold).astype(int)
    acc = accuracy_score(labels, preds)
    prec = precision_score(labels, preds)
    rec = recall_score(labels, preds)
    f1 = f1_score(labels, preds)
    fpr, tpr, _ = roc_curve(labels, probs)
    roc_auc = auc(fpr, tpr)
    return {
        'accuracy': acc,
        'precision': prec,
        'recall': rec,
        'f1_score': f1,
        'auc': roc_auc
    }, (fpr, tpr)

print('Evaluation functions ready.')

### Evaluasi PyTorch Models

In [None]:
# RNN PyTorch
rnn_probs, rnn_labels = evaluate_pytorch(pytorch_rnn, test_loader, device=device)
rnn_metrics, (rnn_fpr, rnn_tpr) = compute_metrics(rnn_probs, rnn_labels)
print('PyTorch RNN:', rnn_metrics)

# LSTM PyTorch
lstm_probs, lstm_labels = evaluate_pytorch(pytorch_lstm, test_loader, device=device)
lstm_metrics, (lstm_fpr, lstm_tpr) = compute_metrics(lstm_probs, lstm_labels)
print('PyTorch LSTM:', lstm_metrics)

# GRU PyTorch
gru_probs, gru_labels = evaluate_pytorch(pytorch_gru, test_loader, device=device)
gru_metrics, (gru_fpr, gru_tpr) = compute_metrics(gru_probs, gru_labels)
print('PyTorch GRU:', gru_metrics)

### Evaluasi TensorFlow Models

In [None]:
# RNN TF
tf_rnn_probs, tf_rnn_labels = evaluate_tf(tf_rnn, x_test, y_test)
tf_rnn_metrics, (tf_rnn_fpr, tf_rnn_tpr) = compute_metrics(tf_rnn_probs, tf_rnn_labels)
print('TF RNN:', tf_rnn_metrics)

# LSTM TF
tf_lstm_probs, tf_lstm_labels = evaluate_tf(tf_lstm, x_test, y_test)
tf_lstm_metrics, (tf_lstm_fpr, tf_lstm_tpr) = compute_metrics(tf_lstm_probs, tf_lstm_labels)
print('TF LSTM:', tf_lstm_metrics)

# GRU TF
tf_gru_probs, tf_gru_labels = evaluate_tf(tf_gru, x_test, y_test)
tf_gru_metrics, (tf_gru_fpr, tf_gru_tpr) = compute_metrics(tf_gru_probs, tf_gru_labels)
print('TF GRU:', tf_gru_metrics)


### Visualisasi Loss (Contoh PyTorch RNN dan TensorFlow RNN)

In [None]:
# Visualisasi PyTorch RNN loss
plt.figure(figsize=(10,4))
plt.plot(rnn_train_losses, label='Train Loss')
plt.plot(rnn_test_losses, label='Test Loss')
plt.title('PyTorch RNN Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Visualisasi TensorFlow RNN loss
plt.figure(figsize=(10,4))
plt.plot(history_rnn.history['loss'], label='Train Loss')
plt.plot(history_rnn.history['val_loss'], label='Val Loss')
plt.title('TensorFlow RNN Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

Persamaan Metrik Evaluasi

Definisi Dasar Klasifikasi

True Positive (TP): Jumlah prediksi positif yang benar

True Negative (TN): Jumlah prediksi negatif yang benar

False Positive (FP): Jumlah prediksi positif yang salah (kesalahan tipe I)

False Negative (FN): Jumlah prediksi negatif yang salah (kesalahan tipe II)

Accuracy (Akurasi)


$$\text{Accuracy} = \frac{TP + TN}{TP + TN + FP + FN}$$


Proporsi prediksi yang benar dari seluruh prediksi. Baik untuk dataset seimbang.

Accuracy untuk multi-kelas

$$\text{Accuracy} = \frac{\text{Jumlah prediksi yang benar}}{\text{Total jumlah prediksi}}$$


Generalisasi akurasi untuk kasus multi-kelas.

Precision (Presisi)

$$\text{Precision} = \frac{TP}{TP + FP}$$

Proporsi prediksi positif yang benar dari seluruh prediksi positif. Berguna
ketika biaya FP tinggi.

Macro-Precision

$$\text{Macro-Precision} = \frac{1}{C}\sum_{i=1}^{C} \text{Precision}_i$$


Rata-rata precision dari semua kelas, memberikan bobot yang sama untuk setiap kelas.

Recall (Sensitivity)

$$\text{Recall} = \frac{TP}{TP + FN}$$

Proporsi kasus positif yang teridentifikasi dari seluruh kasus positif sebenarnya. Berguna ketika biaya FN tinggi.

Macro-Recall

$$\text{Macro-Recall} = \frac{1}{C}\sum_{i=1}^{C} \text{Recall}_i$$

Rata-rata recall dari semua kelas, memberikan bobot yang sama untuk setiap kelas.

F1 Score

$$\text{F1} = 2 \times \frac{\text{Precision} \times \text{Recall}}{\text{Precision} + \text{Recall}}$$

Rata-rata harmonik dari precision dan recall. Memberikan keseimbangan antara kedua metrik tersebut.

Macro-F1

$$\text{Macro-F1} = \frac{1}{C}\sum_{i=1}^{C} \text{F1}_i$$

Rata-rata F1 score dari semua kelas.

Specificity (True Negative Rate)

$$\text{Specificity} = \frac{TN}{TN + FP}$$

Proporsi kasus negatif yang teridentifikasi dengan benar dari seluruh kasus negatif.

True Positive Rate (untuk ROC)

$$\text{TPR} = \frac{TP}{TP + FN} = \text{Recall}$$

Sama dengan Recall, mengukur kemampuan model menemukan semua kasus positif.

False Positive Rate (untuk ROC)

$$\text{FPR} = \frac{FP}{FP + TN} = 1 - \text{Specificity}$$

Proporsi kasus negatif yang salah diklasifikasikan sebagai positif.

AUC (Area Under Curve)

$$\text{AUC} = \int_{0}^{1} \text{TPR}(\text{FPR}^{-1}(t)) dt$$

Area di bawah kurva ROC, mengukur kemampuan model untuk membedakan antara kelas. Nilai berkisar dari 0.5 (acak) hingga 1 (sempurna).

Macro-AUC

$$\text{Macro-AUC} = \frac{1}{C}\sum_{i=1}^{C} \text{AUC}_i$$

Rata-rata AUC dari semua kelas dalam kasus multi-kelas.

Log Loss (Cross-Entropy Loss)

$$\text{Log Loss} = -\frac{1}{N} \sum_{i=1}^{N} \sum_{j=1}^{C} y_{ij} \log(p_{ij})$$

Mengukur performa model probabilistik. Menghukum keras prediksi yang salah dengan keyakinan tinggi. Di mana:

N adalah jumlah sampel

C adalah jumlah kelas

y_ij adalah indikator biner (0 atau 1) jika sampel i termasuk kelas j

p_ij adalah probabilitas prediksi bahwa sampel i termasuk kelas j

Cohen's Kappa

$$\kappa = \frac{p_o - p_e}{1 - p_e}$$

Mengukur kecocokan yang memperhitungkan kebetulan. Nilai berkisar dari -1 hingga 1, dengan 1 adalah kecocokan sempurna. Di mana:

p_o adalah kecocokan relatif yang diamati (akurasi)

p_e adalah kecocokan yang diharapkan secara kebetulan

Hubungan antar Metrik

Metrik-metrik ini saling melengkapi, dan pemilihannya bergantung pada karakteristik masalah:

Untuk dataset tidak seimbang, Precision, Recall, F1, dan AUC lebih informatif daripada Accuracy

Jika FP lebih bermasalah, fokus pada Precision

Jika FN lebih bermasalah, fokus pada Recall
F1 Score memberikan keseimbangan antara Precision dan Recall

AUC mengukur kemampuan membedakan kelas secara keseluruhan tanpa dipengaruhi threshold

Untuk klasifikasi multi-kelas, macro-average (rata-rata sederhana dari semua kelas) sering digunakan untuk memberikan bobot yang sama pada semua kelas, sementara weighted-average mempertimbangkan frekuensi kelas.