In [1]:
import torch
from torch.utils.data import DataLoader, random_split
from bert.dataset import BERTDataset, collate_batch
from turkish_tokenizer.turkish_tokenizer import TurkishTokenizer

# Determine device
if torch.cuda.is_available():
    device = 'cuda'
    num_workers = 4
elif torch.backends.mps.is_available():
    device = 'mps'
    num_workers = 0  # Use 0 workers with MPS
else:
    device = 'cpu'
    num_workers = 4

print(f"Using device: {device}")

tokenizer = TurkishTokenizer()

# Create dataset with device
dataset = BERTDataset(
    corpus_path="combined_reviews.csv",  # Changed from beyazperde_yorumlar.csv
    tokenizer=tokenizer,
    seq_len=512,
    device=device
)

# Create train/test split
train_size = int(0.9 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create dataloaders with custom collate function
train_loader = DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True,
    num_workers=num_workers,
    collate_fn=collate_batch
)

test_loader = DataLoader(
    test_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=num_workers,
    collate_fn=collate_batch
)

# Print dataset information
print(f"Total samples: {len(dataset)}")
print(f"Training samples: {len(train_dataset)}")
print(f"Testing samples: {len(test_dataset)}")

Using device: mps
Total samples: 2001
Training samples: 1800
Testing samples: 201


In [2]:
from bert import BERT, BERTLM, BERTTrainer

# Model hyperparameters
vocab_size = 32768
d_model = 768
n_layers = 12
heads = 12
dropout = 0.1
seq_length = 512

# Create models
bert = BERT(
    vocab_size=vocab_size,
    d_model=d_model,
    n_layers=n_layers,
    heads=heads,
    dropout=dropout,
    seq_len=seq_length
)

bert_lm = BERTLM(
    bert=bert,
    vocab_size=vocab_size
)

# Create trainer
trainer = BERTTrainer(
    model=bert_lm,
    train_dataloader=train_loader,
    test_dataloader=test_loader,
    lr=1e-4,
    warmup_steps=10000,
    device=device  # It will automatically use MPS if available
)

trainer

Using device: mps
Total Parameters: 135404290


<bert.trainer.BERTTrainer at 0x1108b8880>

In [3]:
# Training
num_epochs = 1
for epoch in range(num_epochs):
    trainer.train(epoch)
    trainer.test(epoch)

EP_train:0:   2%|| 1/57 [00:10<09:28, 10.14s/it]

{'epoch': 0, 'iter': 0, 'avg_loss': 1.927762746810913, 'avg_acc': 53.125, 'loss': 1.927762746810913}


EP_train:0:  19%|| 11/57 [00:46<01:59,  2.59s/it]

{'epoch': 0, 'iter': 10, 'avg_loss': 1.8806931539015337, 'avg_acc': 47.72727272727273, 'loss': 1.831728458404541}


EP_train:0:  37%|| 21/57 [01:09<01:21,  2.26s/it]

{'epoch': 0, 'iter': 20, 'avg_loss': 1.763727653594244, 'avg_acc': 48.80952380952381, 'loss': 1.61106276512146}


EP_train:0:  54%|| 31/57 [01:32<00:59,  2.29s/it]

{'epoch': 0, 'iter': 30, 'avg_loss': 1.6815672382231681, 'avg_acc': 49.395161290322584, 'loss': 1.508908748626709}


EP_train:0:  72%|| 41/57 [01:54<00:36,  2.26s/it]

{'epoch': 0, 'iter': 40, 'avg_loss': 1.6300582420535203, 'avg_acc': 49.84756097560975, 'loss': 1.5275278091430664}


EP_train:0:  89%|| 51/57 [02:18<00:13,  2.31s/it]

{'epoch': 0, 'iter': 50, 'avg_loss': 1.5857696626700608, 'avg_acc': 50.0, 'loss': 1.3152116537094116}


EP_train:0: 100%|| 57/57 [02:30<00:00,  2.64s/it]


EP0, train:             avg_loss=1.5580961871565433,             total_acc=50.27777777777778


EP_test:0:  14%|| 1/7 [00:01<00:07,  1.26s/it]

{'epoch': 0, 'iter': 0, 'avg_loss': 1.3927658796310425, 'avg_acc': 50.0, 'loss': 1.3927658796310425}


EP_test:0: 100%|| 7/7 [03:08<00:00, 26.91s/it]

EP0, test:             avg_loss=1.3189232349395752,             total_acc=45.27363184079602



