<a href="https://colab.research.google.com/github/shubha07m/LLM_Dialogue_Generation/blob/main/model_testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Importing libraries and API keys
from transformers import (
    GPT2LMHeadModel, GPT2Tokenizer,
    AutoModelForCausalLM, AutoTokenizer,
    BlenderbotTokenizer, BlenderbotForConditionalGeneration
)
import os
import gc
import torch
from huggingface_hub import login

# Login to Hugging Face Hub
hf_token = os.getenv('HF_TOKEN')
login(token=hf_token)

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# Loading a specific LLM
def load_model(model_name):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    if model_name == 'GPT-2':
        tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
        model = GPT2LMHeadModel.from_pretrained('gpt2').to(device)
        max_length = 1024
    elif model_name == 'DialoGPT':
        tokenizer = AutoTokenizer.from_pretrained('microsoft/DialoGPT-large')
        model = AutoModelForCausalLM.from_pretrained('microsoft/DialoGPT-large').to(device)
        max_length = 128
    elif model_name == 'BlenderBot':
        tokenizer = BlenderbotTokenizer.from_pretrained('facebook/blenderbot-400M-distill')
        model = BlenderbotForConditionalGeneration.from_pretrained('facebook/blenderbot-400M-distill').to(device)
        max_length = 128
    elif model_name == 'GPT-Neo':
        tokenizer = AutoTokenizer.from_pretrained('EleutherAI/gpt-neo-1.3B')
        model = AutoModelForCausalLM.from_pretrained('EleutherAI/gpt-neo-1.3B').to(device)
        max_length = 2048
    return tokenizer, model, max_length

In [None]:
# Generating responses from a pretrained LLM
def generate_response(model_name, model, tokenizer, prompt, max_new_tokens=50):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)
    max_length = model.config.max_position_embeddings  # Use model's configuration for max_length
    input_ids = input_ids[:, -max_length:]  # Truncate if necessary

    # Ensure pad_token_id is set
    if not hasattr(tokenizer, 'pad_token_id') or tokenizer.pad_token_id is None:
        tokenizer.pad_token_id = tokenizer.eos_token_id

    attention_mask = (input_ids != tokenizer.pad_token_id).to(device)  # Add attention_mask

    if model_name == 'DialoGPT':
        outputs = model.generate(
            input_ids,
            max_length=len(input_ids[0]) + max_new_tokens,
            num_return_sequences=1,
            pad_token_id=tokenizer.eos_token_id,
            top_p=0.9,
            top_k=0,
            temperature=0.7,
            repetition_penalty=1.2,
            do_sample=True
        )
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    else:
        outputs = model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_new_tokens=max_new_tokens,
            num_return_sequences=1,
            pad_token_id=tokenizer.eos_token_id,
            top_p=0.9,
            top_k=0,
            temperature=0.7,
            repetition_penalty=1.2,
            do_sample=True
        )
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    sentences = generated_text.split('.')
    if len(sentences) > 1:
        return sentences[0] + '.'
    return generated_text

In [None]:
# function for testing model and memory management
def test_model(model_name, initial_prompt, speakers, max_new_tokens=50):
    # Free up GPU memory before each model execution
    torch.cuda.empty_cache()
    gc.collect()

    print(f"Testing model: {model_name}")

    tokenizer, model, max_length = load_model(model_name)

    response = initial_prompt
    for _ in range(3):  # Number of exchanges to test
        current_speaker_idx = 0 if response.endswith(speakers[1]) else 1
        generated_sentence = generate_response(model_name, model, tokenizer, response, max_new_tokens)

        # Check if the generated sentence is too similar to previous ones
        if generated_sentence.strip() in response:
            print(f"Skipping repetitive response for {model_name}")
            break

        response += " " + generated_sentence
        response += " " + speakers[current_speaker_idx] if current_speaker_idx == 1 else speakers[1 - current_speaker_idx]

    print(f"\n{model_name} Response:\n{response}")

    # Free up GPU memory after each model run
    del model
    torch.cuda.empty_cache()

    # Free up system RAM
    gc.collect()
    # Also remove the tokenizer to free up memory
    del tokenizer
    gc.collect()

In [None]:
# Checking the response for a model
initial_prompt = ("Lex Fridman: What do you think about the origins of life? "
                  "Lee Cronin: The concept of chemical assembly is fascinating. "
                  "Lex Fridman: Can you explain how this assembly works in simple terms? "
                  "Lee Cronin: Sure, it's about molecules coming together to form complex structures.")
speakers = ["Lex Fridman:", "Lee Cronin:"]

model_names = ['GPT-2', 'DialoGPT', 'BlenderBot', 'GPT-Neo']
for model_name in model_names:
    test_model(model_name, initial_prompt, speakers)


Testing model: GPT-2
Skipping repetitive response for GPT-2

GPT-2 Response:
Lex Fridman: What do you think about the origins of life? Lee Cronin: The concept of chemical assembly is fascinating. Lex Fridman: Can you explain how this assembly works in simple terms? Lee Cronin: Sure, it's about molecules coming together to form complex structures.
Testing model: DialoGPT
Skipping repetitive response for DialoGPT

DialoGPT Response:
Lex Fridman: What do you think about the origins of life? Lee Cronin: The concept of chemical assembly is fascinating. Lex Fridman: Can you explain how this assembly works in simple terms? Lee Cronin: Sure, it's about molecules coming together to form complex structures.
Testing model: BlenderBot

BlenderBot Response:
Lex Fridman: What do you think about the origins of life? Lee Cronin: The concept of chemical assembly is fascinating. Lex Fridman: Can you explain how this assembly works in simple terms? Lee Cronin: Sure, it's about molecules coming together t