<a href="https://colab.research.google.com/github/shubha07m/LLM_Dialogue_Generation/blob/main/tuned_llm_benchmarking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Testing the performance of new tuned LLM

In [None]:
!pip install datasets



In [None]:
# Importing library and drive

from google.colab import drive
from transformers import BlenderbotForConditionalGeneration, BlenderbotTokenizer
import torch
import numpy as np
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Define the path to the model and tokenizer in Google Drive
model_path = '/content/drive/MyDrive/blenderbot_llm'

# Load the fine-tuned model and tokenizer
model = BlenderbotForConditionalGeneration.from_pretrained(model_path)
tokenizer = BlenderbotTokenizer.from_pretrained(model_path)

# Ensure decoder_start_token_id is set in the model
model.config.decoder_start_token_id = tokenizer.convert_tokens_to_ids('<s>')


In [None]:
# Encode input
inputs = tokenizer("Lex Fridman: What is the future of AI?\nLee Cronin:", return_tensors='pt')

# Generate output with proper handling of special tokens
outputs = model.generate(
    inputs['input_ids'],
    max_length=60,
    num_beams=10,  # Increase number of beams for diversity
    length_penalty=1.0,
    do_sample=True,
    top_p=0.9,  # Increase top-p for diversity
    top_k=50,  # Use top-k sampling
    temperature=1.5,  # Increase temperature for more randomness
    early_stopping=True,
    decoder_start_token_id=model.config.decoder_start_token_id,
    pad_token_id=tokenizer.pad_token_id,
    eos_token_id=model.config.eos_token_id
)

# Decode the output
decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Generated Output:", decoded_output)

Generated Output:  lee cronin:  what do you do for a living?  i work as a graphic designer


## Using advanced prompt engineering for conversation generation

In [None]:
def generate_conversation(prompt, model, tokenizer, max_turns=12, max_length=60, max_input_length=128):
    conversation = prompt
    current_speaker = "Lee Cronin"
    for _ in range(max_turns):
        # Encode input with truncation
        inputs = tokenizer(conversation, return_tensors='pt', truncation=True, max_length=max_input_length)

        # Generate output
        outputs = model.generate(
            inputs['input_ids'],
            max_length=max_length,
            num_beams=10,
            length_penalty=1.0,
            do_sample=True,
            top_p=0.9,
            top_k=50,
            temperature=1.5,
            early_stopping=True,
            decoder_start_token_id=model.config.decoder_start_token_id,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=model.config.eos_token_id
        )

        # Decode the output and clean up the response
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        generated_text = generated_text.replace(current_speaker.lower(), "").replace(current_speaker, "").strip().split('. ')[0] + '.'

        # Update conversation with new speaker
        conversation += f"\n{current_speaker}: {generated_text}"

        # Alternate speaker
        current_speaker = "Lex Fridman" if current_speaker == "Lee Cronin" else "Lee Cronin"

        # Ensure conversation does not exceed max length
        if tokenizer(conversation, return_tensors='pt', truncation=True, max_length=max_input_length)['input_ids'].shape[1] > max_input_length:
            conversation = tokenizer.decode(tokenizer(conversation, return_tensors='pt', truncation=True, max_length=max_input_length)['input_ids'][0, -max_input_length:])

    return conversation

In [None]:
# Load model and tokenizer
tokenizer = BlenderbotTokenizer.from_pretrained('/content/drive/MyDrive/blenderbot_llm')
model = BlenderbotForConditionalGeneration.from_pretrained('/content/drive/MyDrive/blenderbot_llm')

# Define initial prompt with one sentence from each speaker
initial_prompt = """Lex Fridman: What is the future of AI?
Lee Cronin: It’s a fascinating question, and it touches on many aspects of science and technology."""

# Generate conversation
conversation = generate_conversation(initial_prompt, model, tokenizer)
print("Generated Conversation:\n", conversation)

Generated Conversation:
 Lex Fridman: What is the future of AI?
Lee Cronin: It’s a fascinating question, and it touches on many aspects of science and technology.
Lee Cronin: :  i think it is fascinating that we have the ability to look at the past and think about the future.
Lex Fridman: I think the future is going to look very different than the past because in the future we will have to look for the future in the present and then we will look back in the past to see what it will look like.
Lee Cronin: That is a very interesting way of looking at it.
Lex Fridman: Listening to the soundtrack on my smartphone right now.
Lee Cronin: Listening to some of my favorite musicians right now, one of my favorites is Led Zeppelin, what about you?.
Lex Fridman: Listening to some of my favorite music from the past right now is the Beatles.
Lee Cronin: Lets see what the future has in store for us.
Lex Fridman: Lets look forward to the future and look at how the future will look and what will it loo

## Testing the performance of EWC tuned LLM on new data

In [None]:
# Define the path to the model and tokenizer in Google Drive
model_path = '/content/drive/MyDrive/ewc_trained_llm'

# Load the fine-tuned model and tokenizer
model = BlenderbotForConditionalGeneration.from_pretrained(model_path)
tokenizer = BlenderbotTokenizer.from_pretrained(model_path)

# Ensure decoder_start_token_id is set in the model
model.config.decoder_start_token_id = tokenizer.convert_tokens_to_ids('<s>')


In [None]:
def generate_conversation(prompt, model, tokenizer, max_turns=12, max_length=60, max_input_length=128):
    conversation = prompt
    current_speaker = "Lisa Randall"

    for _ in range(max_turns):
        # Encode input with truncation
        inputs = tokenizer(conversation, return_tensors='pt', truncation=True, max_length=max_input_length)

        # Generate output
        outputs = model.generate(
            inputs['input_ids'].to(model.device),
            max_length=max_length,
            num_beams=10,
            length_penalty=1.0,
            do_sample=True,
            top_p=0.9,
            top_k=50,
            temperature=1.5,
            early_stopping=True,
            decoder_start_token_id=model.config.decoder_start_token_id,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=model.config.eos_token_id
        )

        # Decode the output and clean up the response
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        generated_text = generated_text.replace(current_speaker.lower(), "").replace(current_speaker, "").strip().split('. ')[0] + '.'

        # Update conversation with new speaker
        conversation += f"\n{current_speaker}: {generated_text}"

        # Alternate speaker
        current_speaker = "Lex Fridman" if current_speaker == "Lisa Randall" else "Lisa Randall"

        # Ensure conversation does not exceed max length
        if tokenizer(conversation, return_tensors='pt', truncation=True, max_length=max_input_length)['input_ids'].shape[1] > max_input_length:
            conversation = tokenizer.decode(tokenizer(conversation, return_tensors='pt', truncation=True, max_length=max_input_length)['input_ids'][0, -max_input_length:])

    return conversation

In [None]:
# Define the initial prompt
initial_prompt = """Lex Fridman: How do you envision the future of our understanding of the universe?
Lisa Randall: The future of our understanding lies in the mysteries we have yet to uncover, particularly with dark matter and other cosmic phenomena."""

# Generate conversation
generated_conversation = generate_conversation(initial_prompt, model, tokenizer)
print(generated_conversation)

Lex Fridman: How do you envision the future of our understanding of the universe?
Lisa Randall: The future of our understanding lies in the mysteries we have yet to uncover, particularly with dark matter and other cosmic phenomena.
Lisa Randall: the to is you and that.
Lex Fridman: the is that of a it to.
Lisa Randall: is the to i you so that.
Lex Fridman: the are you is i of.
Lisa Randall: is that the to and it.
Lex Fridman: the of and a have is.
Lisa Randall: that the is in and life i.
Lex Fridman: and is the that i it of.
Lisa Randall: the a and that is in.
Lex Fridman: the you i to is that and.
Lisa Randall: it is the to i and.
Lex Fridman: that is you the in so.


# Comparing forgetfulness with perplexity score

In [None]:
# Define paths
old_model_path = '/content/drive/MyDrive/blenderbot_llm'
new_model_path = '/content/drive/MyDrive/ewc_trained_llm'

# Load the tokenizers
tokenizer_old = BlenderbotTokenizer.from_pretrained(old_model_path)
tokenizer_new = BlenderbotTokenizer.from_pretrained(new_model_path)

# Load the models
model_old = BlenderbotForConditionalGeneration.from_pretrained(old_model_path)
model_new = BlenderbotForConditionalGeneration.from_pretrained(new_model_path)

# Ensure the models are on the correct device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_old.to(device)
model_new.to(device)

BlenderbotForConditionalGeneration(
  (model): BlenderbotModel(
    (shared): BlenderbotScaledWordEmbedding(8008, 1280, padding_idx=0)
    (encoder): BlenderbotEncoder(
      (embed_tokens): BlenderbotScaledWordEmbedding(8008, 1280, padding_idx=0)
      (embed_positions): BlenderbotLearnedPositionalEmbedding(128, 1280)
      (layers): ModuleList(
        (0-1): 2 x BlenderbotEncoderLayer(
          (self_attn): BlenderbotAttention(
            (k_proj): Linear(in_features=1280, out_features=1280, bias=True)
            (v_proj): Linear(in_features=1280, out_features=1280, bias=True)
            (q_proj): Linear(in_features=1280, out_features=1280, bias=True)
            (out_proj): Linear(in_features=1280, out_features=1280, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=1280, out_features=5120, bias=True)
          (fc2): Linear(in_features=5

In [None]:
def calculate_perplexity(model, tokenizer, dataloader, device):
    model.eval()  # Set model to evaluation mode
    total_loss = 0
    total_words = 0

    with torch.no_grad():
        for batch in dataloader:
            input_ids = torch.stack(batch['input_ids']).to(device)
            attention_mask = torch.stack(batch['attention_mask']).to(device)
            labels = torch.stack(batch['labels']).to(device)

            # Forward pass
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss

            total_loss += loss.item() * input_ids.size(0)  # Multiply loss by batch size
            total_words += input_ids.size(0) * input_ids.size(1)  # Number of tokens

    # Average loss
    average_loss = total_loss / total_words

    # Perplexity is the exponentiation of the average loss
    perplexity = np.exp(average_loss)
    return perplexity

In [None]:
import pickle
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Load the dataloaders
with open('/content/drive/MyDrive/saved_dataloader_data/old_dataloader.pkl', 'rb') as f:
    old_dataloader = pickle.load(f)

with open('/content/drive/MyDrive/saved_dataloader_data/new_dataloader.pkl', 'rb') as f:
    new_dataloader = pickle.load(f)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Assuming old_dataloader is prepared similarly to new_dataloader

# Evaluate on old data
perplexity_old_model_on_old_data = calculate_perplexity(model_old, tokenizer_old, old_dataloader, device)
print(f"Perplexity of Old Model on Old Data: {perplexity_old_model_on_old_data:.4f}")

perplexity_new_model_on_old_data = calculate_perplexity(model_new, tokenizer_new, old_dataloader, device)
print(f"Perplexity of New Model on Old Data: {perplexity_new_model_on_old_data:.4f}")

# Evaluate on new data
perplexity_old_model_on_new_data = calculate_perplexity(model_old, tokenizer_old, new_dataloader, device)
print(f"Perplexity of Old Model on New Data: {perplexity_old_model_on_new_data:.4f}")

perplexity_new_model_on_new_data = calculate_perplexity(model_new, tokenizer_new, new_dataloader, device)
print(f"Perplexity of New Model on New Data: {perplexity_new_model_on_new_data:.4f}")

Perplexity of Old Model on Old Data: 5.9073
Perplexity of New Model on Old Data: 1.3986
Perplexity of Old Model on New Data: 5.7196
Perplexity of New Model on New Data: 1.4309
