In [6]:
import torch
import torch.nn as nn
import  torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence

# Now to make a GPT model
class GPTLanguageModel(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        # Create a self vocab_size variable to save it into the class
        self.vocab_size = vocab_size
        # Make an embedding table
        self.token_embedding_table = nn.Embedding(self.vocab_size, n_embd)#.to(device)
        # Adding a positional embedding table as well
        self.positional_embedding_table = nn.Embedding(block_size, n_embd)#.to(device)  # added new parameter, n_embd
        # Adding 4 decoder layers
        self.blocks = nn.Sequential(*(Block(n_embd, n_head=n_head) for _ in range(n_layer)))
        # final layer normalization
        self.lm_f = nn.LayerNorm(n_embd)#.to(device)
        # unsure what this is below
        self.lm_head = nn.Linear(n_embd, self.vocab_size)#.to(device)

        # std variables to help training converge better
        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
    # Linear layers are initialized with normal distribution, and embedding layers are initialized with normal distribution as well.

    def forward(self, index, targets=None):
        B, T = index.shape
        # index represents the sequence of tokens 

        # Add in token and positional embeddings
        token_embd = self.token_embedding_table(index)  # (B, T, C)
        # This layer is an embedding table for token embeddings. Given an input index (representing a token), it retrieves the corresponding embedding vector from the table.
        pos_embd = self.positional_embedding_table(torch.arange(T))#, device=device))  # (T, C)
        # Embedding table for positional embeddings. It helps the model take into account the order or position of tokens in the sequence
        x = token_embd + pos_embd  # (B, T, C)
        x = self.blocks(x)  # (B, T, C)
        x = self.lm_f(x)  # (B, T, C)
        logits = self.lm_head(x)  # (B, T, vocab_size)

        if targets is None:
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)
        return logits, loss

    def generate(self, index, max_new_tokens):
        # model is iteratively called to predict the next token, and the predicted token is concatenated to the input sequence
        for _ in range(max_new_tokens):
            logits, loss = self.forward(index)
            logits = logits[:, -1, :]
            # print(f"Logits shape: {logits.shape}")
            # These are the raw scores produced by the model before applying the softmax function. Each entry in the logits tensor represents 
            # the model's prediction for the likelihood of a particular token in the vocabulary. The dimensions of logits are (B, T, vocab_size),
            #  where B is the batch size, T is the sequence length, and vocab_size is the size of the vocabulary.
            probabilities = F.softmax(logits, dim=-1)

            # Ensure generated index is within the vocabulary size
            valid_indices = torch.arange(self.vocab_size)#.to(device)
            # print('vocab size', vocab_size)
            index_next = torch.multinomial(probabilities[:, valid_indices], num_samples=1)
            index_next = valid_indices[index_next]  # Map back to the original indices
            
            index = torch.cat((index, index_next), dim=1)

        return index

In [8]:
# use the model already created (model-01.pkl) and fine tune it by creating a QA model
import pickle

with open('model-05.pkl', 'rb') as f:
    model = torch.load(f)

RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.

In [5]:
from langchain.llms import Ollama

llm = Ollama(model="llama2:13b")
llm("The first man on the moon was ... think step by step")

In [4]:
# Example fine-tuning loop using GPT-like model
