In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
import torch
import numpy as np
import math
import json
import os

# Load Data

In [3]:
# val_data = pd.read_csv('/content/drive/MyDrive/NLP Project/DRAMA LAMA-Data/Validation/validation.csv')
# train_data = pd.read_csv('/content/drive/MyDrive/NLP Project/DRAMA LAMA-Data/Train/train.csv')
# test_data = pd.read_csv('/content/drive/MyDrive/NLP Project/DRAMA LAMA-Data/Test/test.csv')

In [5]:
val_data = pd.read_csv('Validation/validation.csv')
train_data = pd.read_csv('Train/train.csv')
test_data = pd.read_csv('Test/test.csv')

In [6]:
len(train_data)

20899

In [7]:
len(test_data)

4479

In [8]:
len(val_data)

4479

# Compute Perplexity

In [9]:
def compute_token_probability(model, tokenizer, sentence):
    # Tokenize the input sentence
    inputs = tokenizer(sentence, return_tensors='pt').to(model.device)

    # Get model logits (output probabilities before softmax)
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits

    # Shift logits and input_ids to align predicted tokens with actual tokens
    shifted_logits = logits[:, :-1, :]
    shifted_labels = inputs['input_ids'][:, 1:]

    # Compute probabilities for each token
    probs = torch.softmax(shifted_logits, dim=-1)

    # Get probabilities for the actual tokens in the sentence
    token_probs = torch.gather(probs, 2, shifted_labels.unsqueeze(-1)).squeeze(-1)

    # Get tokens
    tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0].tolist())

    # Exclude the first token <s> or [CLS]
    return list(zip(tokens[1:], token_probs.squeeze().tolist()))

In [10]:
def process_val_data(model, tokenizer, val_data):
  # for all rows
  token_to_prob = {}

  for index, row in val_data.iterrows():
    input_text = row['Instruction']
    token_probs = compute_token_probability(model, tokenizer, input_text)
    token_to_prob[index] = token_probs

  return token_to_prob

In [11]:
def calculate_sentence_probability(token_probs):
    log_probs = [math.log(prob) for prob in token_probs]
    sentence_probability = sum(log_probs)

    return sentence_probability

In [12]:
def calculate_perplexity(sentence_log_prob, num_tokens):
    return math.exp(-sentence_log_prob / num_tokens)

In [11]:
# def batch_process(data, batch_size=32):
#     for i in range(0, len(data), batch_size):
#         yield data[i:i + batch_size]

# def process_val_data_in_batches(model, tokenizer, val_data, batch_size=32):
#     results = {}
#     for batch_idx, batch_data in enumerate(batch_process(val_data, batch_size)):
#         batch_results = process_val_data(model, tokenizer, batch_data)
#         results.update(batch_results)

#     return results

# def calculate_perplexity_overall(model, tokenizer, val_data, batch_size=32):
#     results = process_val_data_in_batches(model, tokenizer, val_data, batch_size)
#     overall_perplexity = []

#     for idx, tokens in results.items():
#         token_probabilities = [prob for _, prob in tokens]
#         sentence_prob = calculate_sentence_probability(token_probabilities)
#         perplexity_value = calculate_perplexity(sentence_prob, len(token_probabilities))
#         overall_perplexity.append(perplexity_value)
#     return overall_perplexity


In [13]:
# Function to save checkpoint after each batch
def save_checkpoint(perplexity_values, batch_idx, checkpoint_file):
    checkpoint_data = {
        "perplexity_values": perplexity_values,
        "last_processed_batch": batch_idx
    }
    with open(checkpoint_file, 'w') as f:
        json.dump(checkpoint_data, f)

# Function to load from the checkpoint
def load_checkpoint(checkpoint_file):
    if os.path.exists(checkpoint_file):
        with open(checkpoint_file, 'r') as f:
            checkpoint_data = json.load(f)
            return checkpoint_data['perplexity_values'], checkpoint_data['last_processed_batch']
    else:
        return [], -1  # No checkpoint found, start from scratch

def batch_process(data, batch_size=32):
    for i in range(0, len(data), batch_size):
        yield data[i:i + batch_size]

def process_val_data_in_batches(model, tokenizer, val_data, batch_size=32):
    results = {}
    for batch_idx, batch_data in enumerate(batch_process(val_data, batch_size)):
        batch_results = process_val_data(model, tokenizer, batch_data)
        results.update(batch_results)

    return results

def calculate_perplexity_overall(model, tokenizer, val_data, batch_size=32, checkpoint_file=None):
    # Load the checkpoint if it exists
    overall_perplexity, last_processed_batch = load_checkpoint(checkpoint_file)

    # Start from the next batch after the checkpoint
    for batch_idx, batch_data in enumerate(batch_process(val_data, batch_size)):
        if batch_idx <= last_processed_batch:
            continue  # Skip batches that have already been processed

        # Process current batch
        batch_results = process_val_data(model, tokenizer, batch_data)
        for idx, tokens in batch_results.items():
            token_probabilities = [prob for _, prob in tokens]
            sentence_prob = calculate_sentence_probability(token_probabilities)
            perplexity_value = calculate_perplexity(sentence_prob, len(token_probabilities))
            overall_perplexity.append(perplexity_value)

        # Save checkpoint after each batch
        save_checkpoint(overall_perplexity, batch_idx, checkpoint_file)
        print(f"Batch {batch_idx + 1} processed, perplexity checkpoint saved at {checkpoint_file_path}.")

    return overall_perplexity


# Llama-3.2-1B

In [14]:
# ! huggingface-cli login
# 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
# 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
# ! huggingface-cli login --token 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

zsh:1: command not found: huggingface-cli


In [16]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B")

model.eval()

  from .autonotebook import tqdm as notebook_tqdm


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 2048)
    (layers): ModuleList(
      (0-15): 16 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=512, bias=False)
          (v_proj): Linear(in_features=2048, out_features=512, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (up_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm):

In [19]:
print(torch.backends.mps.is_available())

True


In [21]:
# Check if GPU is available
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using MPS (Apple Silicon GPU)")
else:
    device = torch.device("cpu")
    print("Using CPU")

# Move model to GPU
model.to(device)

Using MPS (Apple Silicon GPU)


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 2048)
    (layers): ModuleList(
      (0-15): 16 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=512, bias=False)
          (v_proj): Linear(in_features=2048, out_features=512, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (up_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm):

In [None]:
# Average Perplexity for Val Data
overall_perplexity_val = calculate_perplexity_overall(model, tokenizer, val_data, batch_size=32)
avg_perplexity_val = sum(overall_perplexity_val)/len(overall_perplexity_val)
avg_perplexity_val

5.399934840751501

In [None]:
with open('/content/drive/MyDrive/NLP Project/DRAMA LAMA-Data/Validation/overall_perplexity.txt', 'w') as f:
  f.write(str(overall_perplexity_val))

In [50]:
# Average Perplexity for Test Data
overall_perplexity_test = calculate_perplexity_overall(model, tokenizer, test_data, batch_size=32)
avg_perplexity_test = sum(overall_perplexity_test)/len(overall_perplexity_test)
avg_perplexity_test

5.3134342561858965

In [51]:
with open('/content/drive/MyDrive/NLP Project/DRAMA LAMA-Data/Test/overall_perplexity.txt', 'w') as f:
  f.write(str(overall_perplexity_test))

In [53]:
# Average Perplexity for Train Data
overall_perplexity_train = calculate_perplexity_overall(model, tokenizer, train_data, batch_size=32)
avg_perplexity_train = sum(overall_perplexity_train)/len(overall_perplexity_train)
avg_perplexity_train

5.299837264929394

In [54]:
with open('/content/drive/MyDrive/NLP Project/DRAMA LAMA-Data/Train/overall_perplexity.txt', 'w') as f:
  f.write(str(overall_perplexity_train))

# Llama 3.2-3B Intstruct

In [13]:
! huggingface-cli login --token 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

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [14]:
# Load model directly
# Note : Set to High RAM and GPU
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")

model.eval()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 3072)
    (layers): ModuleList(
      (0-27): 28 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (k_proj): Linear(in_features=3072, out_features=1024, bias=False)
          (v_proj): Linear(in_features=3072, out_features=1024, bias=False)
          (o_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=3072, out_features=8192, bias=False)
          (up_proj): Linear(in_features=3072, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=3072, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
      )
    )
    (norm

In [15]:
# Check if GPU is available
if torch.cuda.is_available():
  device = torch.device("cuda")
  print("Using GPU")
else:
  device = torch.device("cpu")
  print("Using CPU")

# Move model to GPU
model.to(device)

Using GPU


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 3072)
    (layers): ModuleList(
      (0-27): 28 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (k_proj): Linear(in_features=3072, out_features=1024, bias=False)
          (v_proj): Linear(in_features=3072, out_features=1024, bias=False)
          (o_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=3072, out_features=8192, bias=False)
          (up_proj): Linear(in_features=3072, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=3072, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
      )
    )
    (norm

In [15]:
# Average Perplexity for Val Data
overall_perplexity_val_llama_3b = calculate_perplexity_overall(model, tokenizer, val_data, batch_size=32)
avg_perplexity_val_3b = sum(overall_perplexity_val_llama_3b)/len(overall_perplexity_val_llama_3b)
avg_perplexity_val_3b

We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


5.198684905275907

In [16]:
with open('/content/drive/MyDrive/NLP Project/DRAMA LAMA-Data/Validation/overall_perplexity_llama-3b.txt', 'w') as f:
  f.write(str(overall_perplexity_val_llama_3b))

In [24]:
# Average Perplexity for Test Data
checkpoint_file_path = "/content/drive/MyDrive/NLP Project/DRAMA LAMA-Data/Validation/perplexity_checkpoint.json"
overall_perplexity_test_llama_3b = calculate_perplexity_overall(model, tokenizer, test_data, batch_size=32,checkpoint_file=checkpoint_file_path)
avg_perplexity_test_llama_3b = sum(overall_perplexity_test_llama_3b)/len(overall_perplexity_test_llama_3b)
avg_perplexity_test_llama_3b

OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 14.75 GiB of which 7.06 MiB is free. Process 53834 has 14.74 GiB memory in use. Of the allocated memory 14.57 GiB is allocated by PyTorch, and 46.91 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
with open('/content/drive/MyDrive/NLP Project/DRAMA LAMA-Data/Test/overall_perplexity_llama-3b.txt.txt', 'w') as f:
  f.write(str(overall_perplexity_test_llama_3b))