<a href="https://colab.research.google.com/github/mdzikrim/DeepLearning/blob/main/IMDB_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

In [14]:
# Load IMDB dataset dari Keras
num_words = 40000
maxlen = 400

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

# Konversi ke tensor PyTorch
x_train = torch.tensor(x_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.float32)
x_test = torch.tensor(x_test, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.float32)

# Dataset & DataLoader
class IMDBDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

train_dataset = IMDBDataset(x_train, y_train)
test_dataset = IMDBDataset(x_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128)


In [15]:
class LSTMModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim):
        super(LSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.lstm1 = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.lstm2 = nn.LSTM(hidden_dim, hidden_dim // 2, batch_first=True)
        self.dropout = nn.Dropout(0.3)
        self.fc = nn.Linear(hidden_dim // 2, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.embedding(x)
        x, _ = self.lstm1(x)
        x, _ = self.lstm2(x)
        x = x[:, -1, :]  # Ambil output dari timestep terakhir
        x = self.dropout(x)
        x = self.fc(x)
        return self.sigmoid(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = LSTMModel(vocab_size=num_words, embed_dim=128, hidden_dim=128).to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [16]:
epochs = 5

for epoch in range(epochs):
    model.train()
    total_loss = 0
    for batch_x, batch_y in train_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device).unsqueeze(1)

        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}")


Epoch 1/5, Loss: 116.2108
Epoch 2/5, Loss: 111.4064
Epoch 3/5, Loss: 93.8419
Epoch 4/5, Loss: 86.8551
Epoch 5/5, Loss: 78.1458


In [17]:
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for batch_x, batch_y in test_loader:
        batch_x = batch_x.to(device)
        outputs = model(batch_x).cpu().numpy().ravel()
        all_preds.extend(outputs)
        all_labels.extend(batch_y.numpy())

all_preds = np.array(all_preds)
all_classes = (all_preds > 0.5).astype(int)
all_labels = np.array(all_labels)

# Hitung metrik
accuracy = accuracy_score(all_labels, all_classes)
precision = precision_score(all_labels, all_classes)
recall = recall_score(all_labels, all_classes)
f1 = f1_score(all_labels, all_classes)
auc = roc_auc_score(all_labels, all_preds)

# Cetak hasil
print("📊 Evaluasi Model:")
print(f"Accuracy  : {accuracy:.4f}")
print(f"Precision : {precision:.4f}")
print(f"Recall    : {recall:.4f}")
print(f"F1 Score  : {f1:.4f}")
print(f"AUC ROC   : {auc:.4f}")


📊 Evaluasi Model:
Accuracy  : 0.7695
Precision : 0.9039
Recall    : 0.6030
F1 Score  : 0.7235
AUC ROC   : 0.8850


In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve

In [None]:
# Load dataset
num_words = 30000
maxlen = 500
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

In [None]:
model = Sequential([
    Embedding(input_dim=num_words, output_dim=128, input_length=maxlen),
    Bidirectional(LSTM(128, return_sequences=True)),
    Dropout(0.5),
    Bidirectional(LSTM(64)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)




In [None]:
history = model.fit(
    x_train, y_train,
    epochs=5,
    batch_size=128,
    validation_data=(x_test, y_test)
)

# Print summary of last epoch
print("\nLast epoch metrics:")
print(f"Loss: {history.history['loss'][-1]:.4f}")
print(f"Accuracy: {history.history['accuracy'][-1]:.4f}")


Epoch 1/5
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1664s[0m 8s/step - accuracy: 0.6001 - loss: 0.6379 - val_accuracy: 0.8478 - val_loss: 0.3607
Epoch 2/5
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1801s[0m 9s/step - accuracy: 0.9014 - loss: 0.2641 - val_accuracy: 0.8594 - val_loss: 0.3646
Epoch 3/5
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1658s[0m 8s/step - accuracy: 0.9491 - loss: 0.1454 - val_accuracy: 0.8554 - val_loss: 0.4344
Epoch 4/5
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1686s[0m 8s/step - accuracy: 0.9617 - loss: 0.1091 - val_accuracy: 0.8685 - val_loss: 0.4225
Epoch 5/5
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1624s[0m 8s/step - accuracy: 0.9766 - loss: 0.0752 - val_accuracy: 0.8646 - val_loss: 0.5260

Last epoch metrics:
Loss: 0.0765
Accuracy: 0.9762


In [None]:
# Predict probabilities and binary output
y_pred_prob = model.predict(x_test).flatten()
y_pred = (y_pred_prob >= 0.5).astype(int)

# Hitung metrik evaluasi
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred_prob)

print("\nEvaluation Metrics:")
print(f"Accuracy  : {acc:.4f}")
print(f"Precision : {prec:.4f}")
print(f"Recall    : {rec:.4f}")
print(f"F1 Score  : {f1:.4f}")
print(f"AUC Score : {auc:.4f}")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m633s[0m 809ms/step

Evaluation Metrics:
Accuracy  : 0.8646
Precision : 0.8635
Recall    : 0.8662
F1 Score  : 0.8648
AUC Score : 0.9337
