# Import Required Modules

In [1]:
import torch
import os
from model.LLMModel import LLMModel
from model.LLMLayer import LLMLayer
from utils.tokenizer import get_tokenizer
from utils.training_data_generator import TrainingDataGen
from utils.trainer import Trainer
from utils.read_config import get_config

In [2]:
config = get_config()

# Create Tokenizer object

In [3]:
tokenizer = get_tokenizer()

# Initialize Model

In [4]:

embedding_dimension = 512
max_sequence_length = 2048
number_of_tokens = tokenizer.get_vocab_size()
batch_size = 1
data_gen_config_path = config['checkpoint_path']
checkpoint_files = os.listdir(config['checkpoint_path'])
data_gen_config_path = config['checkpoint_path']
model_checkpoint_path = None
for file in checkpoint_files:
    if file.find(".pt") > -1:
        model_checkpoint_path = config['checkpoint_path']+file

# Create the model
model = LLMModel(LLMLayer(
    embedding_dimension=embedding_dimension,
    number_of_tokens=number_of_tokens,
    number_of_heads=4,
    number_of_layers=64,
    decoder_stacks=2,
    dropout_rate=0.1,
    max_sequence_length=max_sequence_length,
    feed_forward_dimension=4*embedding_dimension,
    training = True
))

optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)


# Initialize the Data Generator

In [5]:
if data_gen_config_path:
    if os.path.exists(os.path.join(data_gen_config_path,config['data_genrerator_config_name'])):
        training_data_generator = TrainingDataGen(config_path=data_gen_config_path)
    else:
        training_data_generator = TrainingDataGen(base_path=config['data_path'])
else:
    training_data_generator = TrainingDataGen(base_path=config['data_path'])

# Calculate Model Parameters

In [6]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(int(trainable_params)/pow(10,9))

0.455855536


# Start Training

In [7]:
trainer = Trainer(model,training_data_generator, tokenizer, optimizer, max_sequence_length, model_checkpoint_path, batch_size)
trainer.train(epochs=100, batch_size=batch_size)

  0%|          | 0/100 [10:00:19<?, ?it/s]

Current File:  d:/Data/books/0.txt
Step 950 Input Tensor: [' in this book, and it\'s a good one—a classic example of how to get yourself to the next professional level. He followed me around for a day and produced a gorgeous short film that perfectly encapsulates my entire philosophy about business and entrepreneurship. It\'s called "Clouds and Dirt." I enjoyed working with DRock so much I hired him to create videos for me full- time.\n\n Then I looked around and saw that by bringing him on board I had accidentally formed the perfect content creation team—David for video, Steve Unwin for copy ( a job eventually shared with India Kieser) , and Zak Moy for design. On a whim, I pulled them aside and announced that I wanted to film a video, and they were going to help me.\n\n And that\'s how on July 31, 2014, on a YouTube channel with 30,000 subscribers, one of the world\'s first business Q& A video blogs was launched. It opened on a guy in a blue- striped golf shirt smiling into the camer