# Import Required Modules

In [1]:
import torch
import os
from model.LLMModel import LLMModel
from model.LLMLayer import LLMLayer
from utils.tokenizer import get_tokenizer
from utils.training_data_generator import TrainingDataGen
from utils.trainer import Trainer
from utils.read_config import get_config



In [2]:
config = get_config()

# Create Tokenizer object

In [3]:
tokenizer = get_tokenizer()

# Initialize Model

In [4]:

embedding_dimension = 512
max_sequence_length = 2048
number_of_tokens = tokenizer.get_vocab_size()
batch_size = 3
data_gen_config_path = config['checkpoint_path']
checkpoint_files = os.listdir(config['checkpoint_path'])
data_gen_config_path = config['checkpoint_path']
model_checkpoint_path = None
for file in checkpoint_files:
    if file.find(".pt") > -1:
        model_checkpoint_path = config['checkpoint_path']+file

# Create the model
model = LLMModel(LLMLayer(
    embedding_dimension=embedding_dimension,
    number_of_tokens=number_of_tokens,
    number_of_heads=4,
    number_of_layers=64,
    decoder_stacks=4,
    dropout_rate=0.1,
    max_sequence_length=max_sequence_length,
    feed_forward_dimension=4*embedding_dimension,
    training = True
))

optimizer = torch.optim.SGD(model.parameters(), lr=0.2)


# Initialize the Data Generator

In [5]:
if data_gen_config_path:
    if os.path.exists(os.path.join(data_gen_config_path,config['data_genrerator_config_name'])):
        training_data_generator = TrainingDataGen(config_path=data_gen_config_path)
    else:
        training_data_generator = TrainingDataGen(base_path=config['data_path'])
else:
    training_data_generator = TrainingDataGen(base_path=config['data_path'])

# Calculate Model Parameters

In [6]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(int(trainable_params)/pow(10,9))

1.403365296


# Start Training

In [7]:
trainer = Trainer(model,training_data_generator, tokenizer, optimizer, max_sequence_length, model_checkpoint_path, batch_size)
trainer.train(epochs=100, batch_size=batch_size)

  0%|          | 0/100 [40:30<?, ?it/s]

Current File:  d:/Data/books/0.txt
Step 46 Input Tensor: [' link. What matters is how many of them BUY your stuff. One could argue that this moment was the seed behind what everything my life, my companies, and this book is about. This was when I realized it wasn\'t about width, it was all about depth. It\'s about how many people care, not how many people you have. And let\'s call it what it is: When I say "care," what I really mean is "who will convert for me in a transaction."\n So how do you judge an influencer? By a ton of things. Who is following them? If you\'re going after some fifteen- year- old kid with a fan base of screaming girls, but you sell Pampers to middle- aged moms, you\'re going to miss the mark. So you need to know who they\'re reaching. Once you know that, you need to look past that top- line number of followers and look at the actual engagement happening on each individual post. First off, on a quantitative level, you\'re looking for what that engagement is as a 