#### 1. Import Libraries

In [17]:
pip uninstall torch torchtext -y

Found existing installation: torch 2.5.1+cu121
Uninstalling torch-2.5.1+cu121:
  Successfully uninstalled torch-2.5.1+cu121
Found existing installation: torchtext 0.18.0
Uninstalling torchtext-0.18.0:
  Successfully uninstalled torchtext-0.18.0


In [1]:
pip install torch torchtext --index-url https://download.pytorch.org/whl/cu117

Looking in indexes: https://download.pytorch.org/whl/cu117


In [2]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [3]:
path = '/content/drive/My Drive/TransformerModel/'

In [4]:
!ls "{path}"

data  datasets	main.py  models  plotting  results  run.ipynb  scripts	tests  training


In [5]:
import sys
sys.path.append(path)

In [6]:
import torch
from torch.utils.data import DataLoader
from models.transformer import TransformerModel
from datasets import WMT14Dataset
from training.train_model import train, evaluate
from training.utils import create_optimizer, setup_tensorboard, save_checkpoint, load_checkpoint

#### 2. Define Configuration Parameters

In [7]:
ntokens = 1000  # size of the vocabulary
ninp = 512      # embedding dimension
nhead = 8       # number of heads in the multi-head attention models
nhid = 2048     # dimension of the feedforward network model in nn.TransformerEncoder
nlayers = 6     # number of nn.TransformerEncoderLayer in nn.TransformerEncoder
dropout = 0.2   # dropout probability

# Hyperparameters
batch_size = 64
learning_rate = 0.005
epochs = 10

#### 3. Initialize the Model

In [9]:
model = TransformerModel(ntokens, ninp, nhead, nhid, nlayers, dropout)

#### 4. Define Training Logic

In [10]:
# Training setup
train_data_path = 'data/train/wmt14_translate_fr-en_train.csv'
train_dataset = WMT14Dataset(filepath=train_data_path)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=train_dataset.collate_fn)

optimizer = create_optimizer(model, lr=learning_rate)
criterion = torch.nn.CrossEntropyLoss()
writer = setup_tensorboard('runs/Transformer')

# Training loop
for epoch in range(1, epochs + 1):
    train_loss = train(model, train_loader, optimizer, criterion, epoch, writer)
    print(f'Epoch {epoch}: Training Loss {train_loss:.4f}')
    writer.add_scalar('Training Loss', train_loss, epoch)

writer.close()

# Save the trained model
save_checkpoint({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, filename='final_model.pth')

OSError: [E050] Can't find model 'fr_core_news_sm'. It doesn't seem to be a Python package or a valid path to a data directory.

#### 5. Define Evaluation Logic

In [None]:
# Evaluation setup
test_data_path = 'data/test/wmt14_translate_fr-en_test.csv'
test_dataset = WMT14Dataset(filepath=test_data_path)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=test_dataset.collate_fn)
criterion = torch.nn.CrossEntropyLoss()

# Load model from checkpoint
checkpoint_path = 'final_model.pth'
model, _, _ = load_checkpoint(model, None, checkpoint_path)
model.eval()

# Evaluation loop
test_loss = evaluate(model, test_loader, criterion)
print(f'Final Evaluation Loss: {test_loss:.4f}')

#### 6. Choose Mode (Train or Evaluate)

In [None]:
# Mode Selection
mode = 'train'  # Change to 'evaluate' for evaluation

if mode == 'train':
    # Train the model
    train_data_path = 'data/train/wmt14_translate_fr-en_train.csv'
    # Execute training logic
    pass  # Training logic is defined in the Training Logic cell
elif mode == 'evaluate':
    # Evaluate the model
    test_data_path = 'data/test/wmt14_translate_fr-en_test.csv'
    # Execute evaluation logic
    pass  # Evaluation logic is defined in the Evaluation Logic cell