# Embeddings

Assume that you have the correct embeddings and create the structure around it.

Need to check what is the structure of the embeddings with Nanogpt.

In [21]:
import torch
import torch.nn.functional as F

def tensor_distance(tensor1, tensor2, distance_type="L2"):
    """
    Compute the L2 (Euclidean) distance between two tensors of the same shape.
    
    Args:
    - tensor1 (torch.Tensor): The first tensor.
    - tensor2 (torch.Tensor): The second tensor.
    - distance_type (str): The type of distance to compute. Currently only
    
    Returns:
    - float: The distance / similarity between the two tensors.
    """
    
    if tensor1.shape != tensor2.shape:
        raise ValueError("Both tensors must have the same shape.")

    if distance_type == "L2":
        distance = torch.norm(tensor1 - tensor2)
    elif distance_type == "Manhattan":
        distance = torch.sum(torch.abs(tensor1 - tensor2))
    elif distance_type == "Cosine":
        similarity = F.cosine_similarity(tensor1, tensor2)
        distance = 1 - similarity
    elif distance_type == "Minkowski":
        distance = torch.norm(tensor1 - tensor2, p=3)
    
    return distance


In [23]:
# Example usage:
tensor_a = torch.rand((1, 3, 15))
tensor_b = torch.rand((1, 3, 15)) 

distance_types = ["L2", "Manhattan", "Cosine", "Minkowski"]

for dist in distance_types:
    print(f"{dist} distance between tensor_a and tensor_b:", tensor_distance(tensor_a, tensor_b, dist))

L2 distance between tensor_a and tensor_b: tensor(2.3165)
Manhattan distance between tensor_a and tensor_b: tensor(12.9438)
Cosine distance between tensor_a and tensor_b: tensor([[0.0208, 0.0554, 0.0341, 0.0544, 0.2788, 0.4471, 0.2120, 0.0249, 0.3111,
         0.1120, 0.0279, 0.2833, 0.4034, 0.1088, 0.0700]])
Minkowski distance between tensor_a and tensor_b: tensor(1.3923)


Old pipeline

In [2]:
# imports

from transformers import pipeline
import torch

import os
import torch
from model import GPT, GPTConfig

from transformers import GPT2Tokenizer, GPT2LMHeadModel

**Loading model**

In [3]:
generator = pipeline("text-generation")
res = generator("dommage")

No model was supplied, defaulted to gpt2 and revision 6c0e608 (https://huggingface.co/gpt2).
Using a pipeline without specifying a model name and revision in production is not recommended.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [4]:
%run '/home/etien/Documents/EPFLcourses/MA3/Meditron/nanoGPT/model.py'

In [6]:
def load_model(checkpoint_path=None, config=None):
    """
    Load the GPT model. If a checkpoint path is provided, load weights from the checkpoint.
    
    Args:
    - checkpoint_path (str, optional): Path to the model checkpoint.
    - config (GPTConfig, optional): Configuration for the model. If not provided, uses the default configuration.

    Returns:
    - model (GPT): The instantiated model.
    """
    
    # Use the provided config or create a default one
    if config is None:
        config = GPTConfig()

    # Instantiate the model
    model = GPT(config)

    # If a saved checkpoint is provided, load it
    if checkpoint_path and os.path.exists(checkpoint_path):
        checkpoint = torch.load(checkpoint_path)
        model.load_state_dict(checkpoint['model'])
    
    return model

# Usage
model = load_model("/home/etien/Documents/EPFLcourses/MA3/Meditron/nanoGPT/checkpoint/checkpoint.ckpt")

number of parameters: 123.69M


KeyError: 'model'

In [None]:
torch.save(model.state_dict(), "/home/etien/Documents/EPFLcourses/MA3/Meditron/nanoGPT/checkpoint/checkpoint.ckpt")

In [None]:
model.eval()

GPT(
  (transformer): ModuleDict(
    (wte): Embedding(50304, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-11): 12 x Block(
        (ln_1): LayerNorm()
        (attn): CausalSelfAttention(
          (c_attn): Linear(in_features=768, out_features=2304, bias=True)
          (c_proj): Linear(in_features=768, out_features=768, bias=True)
          (attn_dropout): Dropout(p=0.0, inplace=False)
          (resid_dropout): Dropout(p=0.0, inplace=False)
        )
        (ln_2): LayerNorm()
        (mlp): MLP(
          (c_fc): Linear(in_features=768, out_features=3072, bias=True)
          (gelu): GELU(approximate='none')
          (c_proj): Linear(in_features=3072, out_features=768, bias=True)
          (dropout): Dropout(p=0.0, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm()
  )
  (lm_head): Linear(in_features=768, out_features=50304, bias=False)
)

In [None]:
model.generate("Bonjour", max_length=100, do_sample=True, temperature=0.9)

NameError: name 'model' is not defined