# Import Required Modules

In [5]:
import torch
import os
from model.LLMModel import LLMModel
from model.LLMLayer import LLMLayer
from utils.tokenizer import get_tokenizer
from utils.training_data_generator import TrainingDataGen
from utils.trainer import Trainer
from utils.read_config import get_config

In [6]:
config = get_config()

# Create Tokenizer object

In [7]:
tokenizer = get_tokenizer()

# Initialize Model

In [8]:

embedding_dimension = 512
max_sequence_length = 2048
number_of_tokens = tokenizer.get_vocab_size()
batch_size = 1
data_gen_config_path = config['checkpoint_path']
checkpoint_files = os.listdir(config['checkpoint_path'])
data_gen_config_path = config['checkpoint_path']
model_checkpoint_path = None
for file in checkpoint_files:
    if file.find(".pt") > -1:
        model_checkpoint_path = config['checkpoint_path']+file

# Create the model
model = LLMModel(LLMLayer(
    embedding_dimension=embedding_dimension,
    number_of_tokens=number_of_tokens,
    number_of_heads=4,
    number_of_layers=64,
    decoder_stacks=2,
    dropout_rate=0.1,
    max_sequence_length=max_sequence_length,
    feed_forward_dimension=4*embedding_dimension,
    training = True
))

optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)


# Initialize the Data Generator

In [9]:
if data_gen_config_path:
    if os.path.exists(os.path.join(data_gen_config_path,config['data_genrerator_config_name'])):
        training_data_generator = TrainingDataGen(config_path=data_gen_config_path)
    else:
        training_data_generator = TrainingDataGen(base_path=config['data_path'])
else:
    training_data_generator = TrainingDataGen(base_path=config['data_path'])

# Calculate Model Parameters

In [10]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(int(trainable_params)/pow(10,9))

0.489420208


# Start Training

In [11]:
trainer = Trainer(model, tokenizer, optimizer, max_sequence_length, model_checkpoint_path, batch_size)
trainer.train(training_data_generator, epochs=100, batch_size=batch_size)

  0%|          | 0/100 [00:00<?, ?it/s]

Current File:  d:/Data/books/1.txt
Step 3 Input Tensor: [' # BeccaToldMeTo\n\n\n ## PREFACE\n\n Thank you for reading this. I wanted the book to start with Darren\'s Foreword, but thought that I should give you some insight into how all this came together before you jump into it. Like the book itself, I find it\'s an interesting story in building human connection. I\'ll try to be concise. I\'m only one paragraph in and I already feel like a speaker at an award ceremony who is getting rushed off stage and the microphone is disappearing into it.\n\n As a first- time author, the path to get published was anything but linear. I have been fascinated with the concept of self- publishing for 2 years but never published a book of my own. After reading countless books on the subject, listening to podcasts, buying courses and Skyping one- on- one with 2 bestselling authors - I gave myself a wicked case of analysis paralysis.\n\n When I started this audacious goal of writing books, my wife told m