In [25]:
import os
import json
import six
import torch
import matplotlib.pyplot as plt  
from rnn import RNNLanguageModel, train, validate, SentenceDataset
from transformers import AutoTokenizer

In [4]:
# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cpu


In [5]:
# Paths
train_data_path = "data/tiny_train_stories.json"
val_data_path = "data/tiny_valid_stories.json"
tokenizer_path = "my_tokenizer"

In [6]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
vocab_size = tokenizer.vocab_size

In [21]:
# Hyperparameters to test
configs = [
    {"embed_dim": 64, "hidden_dim": 64},
    {"embed_dim": 128, "hidden_dim": 128},
    {"embed_dim": 256, "hidden_dim": 256},
    {"embed_dim": 512, "hidden_dim": 512},
]

# Data
train_data = SentenceDataset(train_data_path)
val_data = SentenceDataset(val_data_path)
train_dataloader = torch.utils.data.DataLoader(train_data, batch_size=1, shuffle=False)
val_dataloader = torch.utils.data.DataLoader(val_data, batch_size=1, shuffle=False)

In [28]:
# Store results
train_losses = {}
val_losses = {}

AttributeError: module 'torch' has no attribute '_six'

In [29]:
# Train models for each configuration
for config in configs:
    print(f"Training with embed_dim={config['embed_dim']} and hidden_dim={config['hidden_dim']}...")
    lm = RNNLanguageModel(
        embed_dim=config["embed_dim"],
        hidden_dim=config["hidden_dim"],
        vocab_size=vocab_size,
        key_dim=32,
        value_dim=32,
    ).to(device)

    optimizer = torch.optim.Adam(lm.parameters(), lr=1e-3)
    train_loss, val_loss = train(
        lm,
        train_dataloader,
        val_dataloader,
        loss_fn,
        optimizer,
        num_sequences=128,
        batch_size=1,
    )

    train_losses[f"{config['embed_dim']}_{config['hidden_dim']}"] = train_loss
    val_losses[f"{config['embed_dim']}_{config['hidden_dim']}"] = val_loss

# Plotting
sequences = list(range(1, len(train_losses[next(iter(train_losses))]) + 1))

# Training loss plot
plt.figure(figsize=(10, 6))
for key, loss in train_losses.items():
    plt.plot(sequences, loss, label=f"Embed/Hidden Dim = {key.split('_')[0]}")
plt.xlabel("Number of Sequences")
plt.ylabel("Training Loss")
plt.title("Training Loss vs. Number of Sequences")
plt.legend()
plt.grid()
plt.show()

# Validation loss plot
plt.figure(figsize=(10, 6))
for key, loss in val_losses.items():
    plt.plot(sequences, loss, label=f"Embed/Hidden Dim = {key.split('_')[0]}")
plt.xlabel("Number of Sequences")
plt.ylabel("Validation Loss")
plt.title("Validation Loss vs. Number of Sequences")
plt.legend()
plt.grid()
plt.show()

Training with embed_dim=64 and hidden_dim=64...


AttributeError: module 'torch' has no attribute '_six'