# Import Libraries

In [3]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from model import GPTLanguageModel
import argparse
from rouge import Rouge
from utils import *
import string

ImportError: libcusparseLt.so.0: cannot open shared object file: No such file or directory

# Load the Parsing Parameter Utilities

In [3]:
def parse_option():
    parser = argparse.ArgumentParser('argument for training')

    parser.add_argument('--batch_size', type=int, default=256,
                        help='batch_size')
    parser.add_argument('--block_size', type=int, default=256,
                        help='Size of blocks to process vocabulary')

    parser.add_argument('--max_iters', type=int, default=256,
                        help='Max Iterations of the Training Process')

    parser.add_argument('--eval_interval', type=int, default=256,
                        help='Max Iterations of the Training Process')


    # optimization
    parser.add_argument('--learning_rate', type=float, default=3e-4,
                        help='learning rate')
    parser.add_argument('--dropout', type=float, default=0.2,
                        help='dropout')

    parser.add_argument('--momentum', type=float, default=0.9,
                        help='momentum')

    # model dataset
    parser.add_argument('--model', type=str, default='basic')
    parser.add_argument('--save_file', type=str, default='./models/test.pth')
    parser.add_argument('--ckpt', type=str, default='')
    parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam'], default='SGD')
    parser.add_argument('--n_heads', type=int, default=6, help='Number of Heads in Attention Block')
    parser.add_argument('--n_layer', type=int, default=6, help='Number of Layers in Attention Block')
    parser.add_argument('--n_embd', type=int, default=384, help='Embedding dimension')
    parser.add_argument('--loss', type=str, default='NLL')
    parser.add_argument('--training_file', type=str, default='./train_data/shakespeare.txt')
    parser.add_argument('--device', type=str, default='cuda:0')
    parser.add_argument('--dataset', type=str, default='shakespeare',choices=['shakespeare'], help='dataset')
    parser.add_argument('--testing_file_prompt', type=str, default='./test_data/test_prompt_1.txt')
    parser.add_argument('--testing_file_answer', type=str, default='./test_data/test_response_1.txt')


    opt = parser.parse_args([])


    return opt
opt = parse_option()

NameError: name 'argparse' is not defined

# Load the Dataset

In [16]:
#### Load a Text File with the Data of Interest ####
with open('./train_data/shakespeare.txt','r', encoding='utf-8') as f:
    text = f.read()

#### Get the Length of the text ####
print(len(text))

#### Get the text itself ####
print(text[0], text[1], text[2])

#### Get a List of all Potential Characters ####
ascii = string.printable
print(ascii)

# Convert the string to a list
chars = list(ascii)
print(chars)

vocab_size = len(chars)

#### create a mapping from characters to integers ####
stoi = {ch: i for i, ch in enumerate(chars)}
itos = {i: ch for i, ch in enumerate(chars)}


#### Create a function that will convert between integer character encodings and the original characters ####
encode = lambda s: [stoi[c] for c in s]
decode = lambda l: ''.join([itos[i] for i in l])

#### Create a Training and Validation Set ####

data = torch.tensor(encode(text), dtype=torch.long)
n = int(0.9 * len(data))
train_data = data[:n]
val_data = data[n:]

1115394
F i r
0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ 	

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~', ' ', '\t', '\n', '\r', '\x0b', '\x0c']


# Load the GPT Model

In [19]:
#### Initialize the Language Model ####
# Vocab Size: Number of Different Characters that are possible within the language corpus of interest (Upper Case + Lower Case + Numbers + Punctuation)
# n_embd: Size of the vectors used to represent the text within the model (Generated with Neural Networks)
# block_size: Number of characters to use as context when predicting the next probable character
# dropout: number of weights to drop from specific layer (used to help generalization of model)
# device: The gpu that the model will be loaded onto.

model = GPTLanguageModel(vocab_size, n_embd=384, block_size=256, dropout=0.2, device= 'cuda:0')
model = model.to('cuda:0')

RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx

# Perform Model Training

In [None]:
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4)

for iter in range(opt.max_iters):

    # sample a batch of data
    xb, yb = get_batch('train',train_data,val_data,opt)

    # evaluate the loss
    logits, loss = model(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

# Load Prompt and the Associated Answer to test the Model

In [None]:
with open(opt.testing_file_prompt, 'r', encoding='utf-8') as f:
    text_test_prompt = f.read()

with open(opt.testing_file_answer, 'r', encoding='utf-8') as f:
    text_test_answer= f.read()

#### Encode the Prompt into a numerical form that can be input into your model ####
context = torch.tensor(encode(text_test_prompt), device='cuda:0').unsqueeze(dim=-1)

# Generate the Response from your LLM

In [None]:
#### You want your model to generate the same number of characters as values in your response ####


#### Note that the testing files will not contain any characters that are not in the vocabulary of the LLM! ####
### This is a common problem with LLM systems. How can we overcome it? ####
number_gen = len(text_test_answer)
response = decode(model.generate(context, max_new_tokens=number_gen,block_size=opt.block_size)[0].tolist())

# Compute a Metric between the True Answer and your Response

In [None]:
#### We use the Rouge Metric in this Example ####
rouge = Rouge()
scores = rouge.get_scores(response, text_test_answer)
print(scores)