<a href="https://colab.research.google.com/github/mdzikrim/DeepLearning/blob/main/IMDB_GRU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout
from sklearn.metrics import classification_report, roc_auc_score, roc_curve
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Konfigurasi
num_words = 40000  # Bisa juga pakai 30000 - 50000
maxlen = 400       # Bisa juga 300 - 500

# Load IMDB
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

# Padding sequence
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

print(f"Train shape: {x_train.shape}, Test shape: {x_test.shape}")


Train shape: (25000, 400), Test shape: (25000, 400)


In [None]:
model = Sequential([
    Embedding(num_words, 128, input_length=maxlen),
    GRU(128, return_sequences=True),
    Dropout(0.3),
    GRU(64),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

model.summary()




In [None]:
history = model.fit(
    x_train, y_train,
    epochs=5,
    batch_size=128,
    validation_split=0.2
)


Epoch 1/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m500s[0m 3s/step - accuracy: 0.6362 - loss: 0.6075 - val_accuracy: 0.8556 - val_loss: 0.3555
Epoch 2/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m457s[0m 3s/step - accuracy: 0.9016 - loss: 0.2600 - val_accuracy: 0.7714 - val_loss: 0.4945
Epoch 3/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m498s[0m 3s/step - accuracy: 0.8990 - loss: 0.2643 - val_accuracy: 0.8718 - val_loss: 0.3523
Epoch 4/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m507s[0m 3s/step - accuracy: 0.9742 - loss: 0.0825 - val_accuracy: 0.8440 - val_loss: 0.4145
Epoch 5/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m493s[0m 3s/step - accuracy: 0.9820 - loss: 0.0624 - val_accuracy: 0.8592 - val_loss: 0.5162


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Prediksi probabilitas dan konversi ke label (threshold 0.5)
y_pred_prob = model.predict(x_test)
y_pred = (y_pred_prob > 0.5).astype("int32")

# Hitung metrik
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred_prob)

# Cetak hasil evaluasi
print(f"Akurasi   : {accuracy:.4f}")
print(f"Presisi   : {precision:.4f}")
print(f"Recall    : {recall:.4f}")
print(f"F1-Score  : {f1:.4f}")
print(f"AUC (ROC) : {auc:.4f}")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 169ms/step
Akurasi   : 0.8464
Presisi   : 0.8249
Recall    : 0.8795
F1-Score  : 0.8513
AUC (ROC) : 0.9214


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

import numpy as np


In [2]:
# Konfigurasi
num_words = 40000
maxlen = 400
batch_size = 128

# Load IMDB
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

# Padding
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

# Konversi ke tensor
X_train_tensor = torch.tensor(x_train, dtype=torch.long)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(x_test, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [3]:
class GRUClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim):
        super(GRUClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.gru1 = nn.GRU(embedding_dim, hidden_dim, batch_first=True, bidirectional=False)
        self.dropout1 = nn.Dropout(0.3)
        self.gru2 = nn.GRU(hidden_dim, hidden_dim//2, batch_first=True)
        self.dropout2 = nn.Dropout(0.5)
        self.fc = nn.Linear(hidden_dim//2, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.embedding(x)
        x, _ = self.gru1(x)
        x = self.dropout1(x)
        x, _ = self.gru2(x)
        x = self.dropout2(x)
        x = x[:, -1, :]
        x = self.fc(x)
        return self.sigmoid(x)

# Inisialisasi model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GRUClassifier(vocab_size=num_words, embedding_dim=128, hidden_dim=128).to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [4]:
epochs = 5

model.train()
for epoch in range(epochs):
    epoch_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()
        output = model(X_batch).squeeze()
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs} - Loss: {epoch_loss/len(train_loader):.4f}")


Epoch 1/5 - Loss: 0.6032
Epoch 2/5 - Loss: 0.4251
Epoch 3/5 - Loss: 0.3170
Epoch 4/5 - Loss: 0.2218
Epoch 5/5 - Loss: 0.1756


In [5]:
model.eval()
all_preds = []
all_probs = []
all_labels = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch).squeeze().cpu().numpy()
        preds = (outputs > 0.5).astype(int)
        all_preds.extend(preds)
        all_probs.extend(outputs)
        all_labels.extend(y_batch.numpy())

# Konversi ke numpy array
y_true = np.array(all_labels)
y_pred = np.array(all_preds)
y_prob = np.array(all_probs)

# Hitung metrik
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
auc = roc_auc_score(y_true, y_prob)

# Tampilkan
print(f"Akurasi   : {accuracy:.4f}")
print(f"Presisi   : {precision:.4f}")
print(f"Recall    : {recall:.4f}")
print(f"F1-Score  : {f1:.4f}")
print(f"AUC (ROC) : {auc:.4f}")


Akurasi   : 0.8746
Presisi   : 0.8961
Recall    : 0.8474
F1-Score  : 0.8711
AUC (ROC) : 0.9458
