# Import Required Modules

In [1]:
import torch
import os
from model.LLMModel import LLMModel
from model.LLMLayer import LLMLayer
from utils.tokenizer import get_tokenizer
from utils.training_data_generator import TrainingDataGen
from utils.trainer import Trainer
from utils.read_config import get_config

In [2]:
config = get_config()

# Create Tokenizer object

In [3]:
tokenizer = get_tokenizer()

# Initialize Model

In [4]:

embedding_dimension = 1024
max_sequence_length = 2048
number_of_tokens = tokenizer.get_vocab_size()
batch_size = 2
data_gen_config_path = config['checkpoint_path']
checkpoint_files = os.listdir(config['checkpoint_path'])
data_gen_config_path = config['checkpoint_path']
model_checkpoint_path = None
for file in checkpoint_files:
    if file.find(".pt") > -1:
        model_checkpoint_path = config['checkpoint_path']+file

# Create the model
model = LLMModel(LLMLayer(
    embedding_dimension=embedding_dimension,
    number_of_tokens=number_of_tokens,
    number_of_heads=4,
    number_of_layers=8,
    decoder_stacks=4,
    dropout_rate=0.1,
    max_sequence_length=max_sequence_length,
    feed_forward_dimension=4*embedding_dimension,
    training = True
))

optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)


# Initialize the Data Generator

In [5]:
if data_gen_config_path:
    if os.path.exists(os.path.join(data_gen_config_path,config['data_genrerator_config_name'])):
        training_data_generator = TrainingDataGen(config_path=data_gen_config_path)
    else:
        training_data_generator = TrainingDataGen(base_path=config['data_path'])
else:
    training_data_generator = TrainingDataGen(base_path=config['data_path'])

# Calculate Model Parameters

In [6]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(int(trainable_params)/pow(10,9))

0.787113392


# Start Training

In [7]:
trainer = Trainer(model, tokenizer, optimizer, max_sequence_length, model_checkpoint_path, batch_size)
trainer.train(training_data_generator, epochs=100, batch_size=batch_size)

  0%|          | 0/100 [1:48:07<?, ?it/s]

Current File:  d:/Data/books/0.txt
Step 144 Input Tensor: ['. That lasted eighty- nine episodes. And then he realized he really was done. He loved wine but he was an entrepreneur first, and there were just too many other things to do.\n\n That character was me, of course. At the time I really did think I was done with daily video blogging forever. I could imagine the occasional interview or one- off video ( have you ever seen my "Monday Morning Motivational" spot?) , but a full show was too much. There was only one thing I didn\'t count on: you, Vayner Nation. I missed you! I missed talking to you every day. I mean, sure I could talk to you anytime I wanted to on Twitter, Instagram, or Facebook, but video elicits a different energy and encourages a spontaneity and vibrancy that can\'t be replicated on any other platform. I should have known that something was missing in my world when I realized that during every forty- five- minute speaking engagement the part I most looked forward to 