# Import Required Modules

In [1]:
import torch
import os
from model.LLMModel import LLMModel
from model.LLMLayer import LLMLayer
from utils.tokenizer import get_tokenizer
from utils.training_data_generator import TrainingDataGen
from utils.trainer import Trainer
from utils.read_config import get_config

In [2]:
config = get_config()

# Create Tokenizer object

In [3]:
tokenizer = get_tokenizer()

# Initialize Model

In [4]:

embedding_dimension = 768
max_sequence_length = 2048
number_of_tokens = tokenizer.get_vocab_size()
batch_size = 2
data_gen_config_path = config['checkpoint_path']
checkpoint_files = os.listdir(config['checkpoint_path'])
data_gen_config_path = config['checkpoint_path']
model_checkpoint_path = None
for file in checkpoint_files:
    if file.find(".pt") > -1:
        model_checkpoint_path = config['checkpoint_path']+file

# Create the model
model = LLMModel(LLMLayer(
    embedding_dimension=embedding_dimension,
    number_of_tokens=number_of_tokens,
    number_of_heads=4,
    number_of_layers=8,
    decoder_stacks=4,
    dropout_rate=0.1,
    max_sequence_length=max_sequence_length,
    feed_forward_dimension=4*embedding_dimension,
    training = True
))

optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)


# Initialize the Data Generator

In [5]:
if data_gen_config_path:
    if os.path.exists(os.path.join(data_gen_config_path,config['data_genrerator_config_name'])):
        training_data_generator = TrainingDataGen(config_path=data_gen_config_path)
    else:
        training_data_generator = TrainingDataGen(base_path=config['data_path'])
else:
    training_data_generator = TrainingDataGen(base_path=config['data_path'])

# Calculate Model Parameters

In [6]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(int(trainable_params)/pow(10,9))

0.83445752


# Start Training

In [7]:
trainer = Trainer(model, tokenizer, optimizer, max_sequence_length, model_checkpoint_path, batch_size)
trainer.train(training_data_generator, epochs=100, batch_size=batch_size)

  0%|          | 0/100 [12:48<?, ?it/s]

Current File:  d:/Data/books/0.txt
Step 16 Input Tensor: [' blue sweater—maybe it was black, the bad lighting made it hard to tell—seated in front of a blank beige wall. On the table in front of him rested three wine bottles and a small, dark bucket that looked like it might have once held a potted plant. His skin was sallow in the sickly fluorescent light that barely illuminated his face, but he had a wide, optimistic smile. Looking straight into the Flip cam, he announced himself to his nonexistent audience in a subdued, serious, but friendly voice: "Hello, everybody, and welcome to the very first episode of _ Wine Library TV_ . I\'m Gary Vaynerchuk." The echo was so intense he might have been filming inside a cave.\n\n Over time the show got more dynamic and exciting. The host started to let his huge personality shine. He wore Jets shirts. He paired wines with Lucky Charms and described flavor profiles in colorful, colloquial language, like grape- flavored Nerd candy and dead deer m