# Import Required Modules

In [1]:
import torch
import os
from model.LLMModel import LLMModel
from model.LLMLayer import LLMLayer
from utils.tokenizer import get_tokenizer
from utils.training_data_generator import TrainingDataGen
from utils.trainer import Trainer
from utils.read_config import get_config

In [2]:
config = get_config()

# Create Tokenizer object

In [3]:
tokenizer = get_tokenizer()

# Initialize Model

In [4]:

embedding_dimension = 512
max_sequence_length = 2048
number_of_tokens = tokenizer.get_vocab_size()
batch_size = 1
data_gen_config_path = config['checkpoint_path']
checkpoint_files = os.listdir(config['checkpoint_path'])
data_gen_config_path = config['checkpoint_path']
model_checkpoint_path = None
for file in checkpoint_files:
    if file.find(".pt") > -1:
        model_checkpoint_path = config['checkpoint_path']+file

# Create the model
model = LLMModel(LLMLayer(
    embedding_dimension=embedding_dimension,
    number_of_tokens=number_of_tokens,
    number_of_heads=4,
    number_of_layers=64,
    decoder_stacks=2,
    dropout_rate=0.1,
    max_sequence_length=max_sequence_length,
    feed_forward_dimension=4*embedding_dimension,
    training = True
))

optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)


# Initialize the Data Generator

In [5]:
if data_gen_config_path:
    if os.path.exists(os.path.join(data_gen_config_path,config['data_genrerator_config_name'])):
        training_data_generator = TrainingDataGen(config_path=data_gen_config_path)
    else:
        training_data_generator = TrainingDataGen(base_path=config['data_path'])
else:
    training_data_generator = TrainingDataGen(base_path=config['data_path'])

# Calculate Model Parameters

In [6]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(int(trainable_params)/pow(10,9))

0.455855536


# Start Training

In [7]:
trainer = Trainer(model,training_data_generator, tokenizer, optimizer, max_sequence_length, model_checkpoint_path, batch_size)
trainer.train(epochs=100, batch_size=batch_size)

  0%|          | 0/100 [18:39<?, ?it/s]

Current File:  d:/Data/books/0.txt
Step 2781 Input Tensor: [' the market.\n** 3.** My last few books have been narrowly focused on sharing with you the marketing strategies and tactics that work, and documenting the growth and development of social media. This book will also cover all the most up- to- date information on platforms and tech, and how to create native content that gets people\'s attention. But it will go broader and deeper as well, revealing what I\'ve learned not just in my role as entrepreneur and marketer, but as a leader, manager, and family man. I hope this book will offer a perfect blend of motivation, inspiration, data, strategy, and executable information.\n** 4.** We\'ve also included many brand- new questions and answers pulled from fans across our channels as well as from our own employees. So you\'ll find some familiar stuff in this book, and a lot that is new. Some questions have been consolidated or rephrased for clarity. Some are short and simple and silly,