# Installation

In [1]:
# pip install -r requirements.txt

In [2]:
# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

In [3]:
import torch

print("torch + cuda:", torch.__version__) # Check PyTorch version
print("Is cuda avialable:", torch.cuda.is_available())

torch + cuda: 2.7.1+cu126
Is cuda avialable: True


# LLM Loading

Auth Token Setting:

- HugginigFace Token
- OpenAI Token

In [4]:
# pip install flash-attention protobuf einops pytest

In [5]:
import os
from dotenv import load_dotenv

# Load environment variables from .env
load_dotenv()

hf_token = os.getenv("HF_TOKEN")

In [6]:
llms_info = {
    "Gemma-7B-Instruct": {
        "remote_model_name": "google/gemma-7b-it",
        "model_path": "./LLMs/Gemma-7B-Instruct",
        "tokenizer_path": "./Tokenizers/Gemma-7B-Instruct",
        "hf_token": hf_token,
        "additional_config": {
            "torch_dtype": "auto",
            "device": "auto"
        }
    },
    "Mistral-7B-Instruct": {
        "remote_model_name": "mistralai/Mistral-7B-Instruct-v0.2",
        "model_path": "./LLMs/Mistral-7B-Instruct",
        "tokenizer_path": "./Tokenizers/Mistral-7B-Instruct",
        "hf_token": hf_token,
        "additional_config": {
            "torch_dtype": "auto",
            "device": "auto"
        }
    },
    "Phi3-Small-7B-Instruct": {
        "remote_model_name": "microsoft/Phi-3-small-128k-instruct",
        "model_path": "./LLMs/Phi3-Small-7B-Instruct",
        "tokenizer_path": "./Tokenizers/Phi3-Small-7B-Instruct",
        "additional_config": {
            "torch_dtype": "auto",
            "trust_remote_code": True,
            "device": "auto",
            "attn_implementation": "flash_attention_2"
        }
    },
    "Qwen2-7B-Instruct": {
        "remote_model_name": "Qwen/Qwen2-7B-Instruct",
        "model_path": "./LLMs/Qwen2-7B-Instruct",
        "tokenizer_path": "./Tokenizers/Qwen2-7B-Instruct",
        "additional_config": {
            "torch_dtype": "auto",
            "trust_remote_code": True,
            "device": "auto"
        }
    },
    

    
    "Llama3-1-8B": {# Size: - , system RAM: 20 GB (Windows)
        "remote_model_name": "meta-llama/Meta-Llama-3.1-8B",
        "model_path": "./LLMs/Llama3-1-8B",
        "tokenizer_path": "./Tokenizers/Llama3-1-8B",
        "additional_config": {
            
           "rope_scaling": {
            "type": "linear",
            "factor": 8.0
            }
        }
    },
    
    "Llama3-1-8B-Instruct": {# Size: - , system RAM: 20 GB (Windows)
        "remote_model_name": "meta-llama/Meta-Llama-3.1-8B-Instruct",
        "model_path": "./LLMs/Llama3-1-8B-Instruct",
        "tokenizer_path": "./Tokenizers/Llama3-1-8B-Instruct",
        "additional_config": {
            
           "rope_scaling": {
            "type": "linear",
            "factor": 8.0
            }
        }
    },
    
    "Vicuna-33B": {
        "remote_model_name": "lmsys/vicuna-33b-v1.3",
        "model_path": "./LLMs/Vicuna-33B",
        "tokenizer_path": "./Tokenizers/Vicuna-33B",
        "hf_token": hf_token,
        "additional_config": {
            "torch_dtype": "auto",
            "device": "auto"
        }
    }
}

In [7]:
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

def load_model(model_key):
    model_info = llms_info[model_key]
    # Use .get() to provide a default empty dictionary if "additional_config" is not present
    config = model_info["additional_config"]

    # Check if the directories for the model and tokenizer exist
    model_dir_exists = os.path.isdir(model_info["model_path"])
    tokenizer_dir_exists = os.path.isdir(model_info["tokenizer_path"])

    if model_dir_exists and tokenizer_dir_exists:
        print(f"{model_key} model and tokenizer are already present.")
    else:
        print(f"Downloading and saving model and tokenizer for {model_key}.")
        # Include the token in the download process if applicable
        hf_token = model_info.get("hf_token", None)
        model = AutoModelForCausalLM.from_pretrained(
            model_info["remote_model_name"],
            cache_dir=model_info["model_path"],
            torch_dtype=getattr(torch, config.get("torch_dtype", "auto")) if config.get("torch_dtype", "auto") != "auto" else None,
            use_auth_token=hf_token
        )
        tokenizer = AutoTokenizer.from_pretrained(
            model_info["remote_model_name"],
            cache_dir=model_info["tokenizer_path"],
            use_auth_token=hf_token
        )
        # Ensure directories are created during download
        if not model_dir_exists:
            os.makedirs(model_info["model_path"], exist_ok=True)
        if not tokenizer_dir_exists:
            os.makedirs(model_info["tokenizer_path"], exist_ok=True)
        # # Save them locally
        model.save_pretrained(model_info["model_path"])
        tokenizer.save_pretrained(model_info["tokenizer_path"])

    # Load model and tokenizer from local storage
    model = AutoModelForCausalLM.from_pretrained(model_info["model_path"])
    tokenizer = AutoTokenizer.from_pretrained(model_info["tokenizer_path"])
    return model, tokenizer


In [8]:
# load_model("Gemma-7B-Instruct")

In [9]:
import warnings
import logging
import torch

warnings.filterwarnings("ignore")
logging.getLogger('transformers').setLevel(logging.ERROR)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


# Benchmarking

In [10]:
from transformers import pipeline
from tqdm import tqdm
import pandas as pd
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [11]:
Dataset = "FoCus"             # Synthetic-PersonaChat, Blended Skill Talk, PEC, ConvAI2, FoCus, IT-ConvAI2

## Response Generation

In [12]:
import pandas as pd
from transformers import pipeline
from tqdm import tqdm

# Load the data frame
df = pd.read_csv(f'./datasets/{Dataset}/ds_cleaned.csv')

print(df.isnull().sum())

df.dropna(inplace=True)

# Save the prompts
df.to_csv('./Prompts/' + Dataset + '.csv', index=False)

df.head(2)

personas        0
context         0
act_response    0
dtype: int64


Unnamed: 0,personas,context,act_response
0,I would like to visit the Nazareth House again...,User1: I think Ive been there before but I don...,User2: The history of the house you are intere...
1,I have been to Vermont a few times to go skiin...,"User1: Wow, this is amazing! What is this?\nUs...",User2: This house was use as a stop for slaves...


In [13]:
import pandas as pd
import warnings
import logging
import torch
import torch

torch.cuda.empty_cache()


warnings.filterwarnings("ignore")
logging.getLogger('transformers').setLevel(logging.ERROR)


# Loading the prompt
df = pd.read_csv(f'./Prompts/{Dataset}.csv')

df.head(10)

Unnamed: 0,personas,context,act_response
0,I would like to visit the Nazareth House again...,User1: I think Ive been there before but I don...,User2: The history of the house you are intere...
1,I have been to Vermont a few times to go skiin...,"User1: Wow, this is amazing! What is this?\nUs...",User2: This house was use as a stop for slaves...
2,I am fascinated by the Spanish Colonial Reviva...,"User1: Wow, this is amazing! What is this?\nUs...","User2: Sure, you will like to know that this p..."
3,I want to become a college student.I want to s...,User1: Where is this place?\nUser2: Hello! Wel...,User2: Technische Universität Darmstadt in the...
4,I like to visit england.I love church.I would ...,User1: Where is this place?\nUser2: This place...,"User2: I suggest a place, for your wish of see..."
5,I would like to go to University.I live in Mic...,User1: I think Ive been there before but I don...,User2: They offer 132 bachelors degree program...
6,I love nice hotels.I would like to go to Calif...,User1: I think Ive been there before but I don...,User2: Its current owner is Anbang Insurance G...
7,I have the fantasy about valley.I like lakes.I...,"User1: I know this place, but I dont remember ...",User2: You can view Hat Creek valley and the T...
8,I am willing to start a seminary in New Brunsw...,User1: I think Ive been there before but I don...,User2: It was closely connected with Rutgers U...
9,I like the National War Memorial.I hope to mov...,"User1: I know this place, but I dont remember ...",User2: You have interest in history and will l...


In [14]:
# Function to generate a prompt
def create_prompt(personas, context, include_cot=False):

    prompt = (
        "I will provide you with a conversation context and the personas of the participants, that can be annotated with speaker information.\n"
        "As a participant in this conversation, your task is to generate a personalized response, considering the conversation context and personas.\n\n"
        "Participant Personas:\n"
        f"{personas}\n\n"
        "Conversation Context:\n"
        f"{context}\n\n"
        "Task Instruction:\n"
        "* Provide an unannotated response.\n"
        "* If only one persona is available, personalize the response accordingly.\n"
        "* If the conversation context is a single query, respond appropriately to the query.\n"
    )
    
    if include_cot:
        prompt += (
            "* Apply Chain of Thought reasoning to reflect on the alignment of your response with the personas.\n"
        )

    prompt += (
        "\nOutput Format: only give a JSON of the following format:\n"
        "{\n"
    )
    
    if include_cot:
        prompt += (
            '  "reasoning": "briefly describe your personalization process (in 110 words or less)."\n'
        )
        
    prompt += (
        '  "response": "provide the personalized natural language response here (in 110 words or less)."\n'
        "}\n"
    )

    return prompt


### Open-Source LLMs

In [15]:
LLM_name = "Gemma-7B-Instruct"                  # Mistral-7B-Instruct, Llama3-1-8B-Instruct, Qwen2-7B-Instruct, Gemma-7B-Instruct

COT_SETUP = False                                  # Chain Of Thoughts Configuration

# Assuming load_model is defined and works as expected
model, tokenizer = load_model(LLM_name)


MAX_NEW_TOKEN = 220 if COT_SETUP else 110

generation_params = {
    
    "max_new_tokens": MAX_NEW_TOKEN,      # Based on max response length + reasoning
    "temperature": 0,                     # Based on FELM paper (Greedy Setup)
    "do_sample": False
}


Gemma-7B-Instruct model and tokenizer are already present.


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [16]:
first_row = df.iloc[120]
personas = first_row['personas']
context = first_row['context']

# Example usage
prompt = create_prompt(personas, context, include_cot=COT_SETUP)
print(prompt)


I will provide you with a conversation context and the personas of the participants, that can be annotated with speaker information.
As a participant in this conversation, your task is to generate a personalized response, considering the conversation context and personas.

Participant Personas:
I like football.I have not been to South Africa.I am interested in the World Cup.I hope to become an architect.I would like to attend a rugby match.

Conversation Context:
User1: Where is this place?
User2: The Mbombela Stadium is located in South Africa; a country you have yet to visit.
User1: Why was the stadium built?
User2: The stadium was built to host matches from the 2010 FIFA World Cup. This may excite you since you are interested in the world cup.
User1: Are any other sports played here?
User2: Yes the stadium is also used to host rugby matches. Maybe you should attend one here since I know you would like to attend a rugby match.
User1: What is the capacity of the stadium?
User2: The st

In [None]:
from transformers import pipeline

# Initialize the text generation pipeline
generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=device)

# Generate a response
output = generator(prompt, **generation_params)
response = output[0]['generated_text'][len(prompt):]

# Print the response
print(response)


In [None]:
import time
from transformers import pipeline
from tqdm import tqdm
import pandas as pd

# Iterate through the DataFrame and generate responses
gen_responses = []
response_times = []

for index, row in tqdm(df.iterrows(), total=len(df), desc="Generating responses"):
    personas = row['personas']
    context = row['context']
    
    # Create the prompt
    prompt = create_prompt(personas, context, COT_SETUP)
    
    # Measure the start time
    start_time = time.time()
    
    # Generate a response
    output = generator(prompt, **generation_params)[0]['generated_text']
    
    # Measure the end time and calculate the duration
    end_time = time.time()
    response_time = end_time - start_time
    
    response = output[len(prompt):]

    # Store the generated response and response time
    gen_responses.append(response)
    response_times.append(response_time)

# Create a DataFrame with the responses and response times
response_df = pd.DataFrame({
    'gen_response': gen_responses,
    'response_time': response_times
})  

COT_ = "-COT" if COT_SETUP else "" 

# Save the response DataFrame to a CSV and Excel file
response_df.to_csv(f'./Raw Responses/{Dataset}/{LLM_name}{COT_}.csv', index = False)

Generating responses: 100%|██████████| 1183/1183 [1:42:45<00:00,  5.21s/it]


In [None]:
response_df.head(20)

Unnamed: 0,gen_response,response_time
0,"```\n\n**Example:**\n\n```\n{\n ""response"": ""...",5.293113
1,"```\n\n**Example:**\n\n```\n{\n ""response"": ""...",5.285029
2,"```\n\n**Example:**\n\n```\n{\n ""response"": ""...",5.283818
3,"```\n\n**Example:**\n\n```\n{\n ""response"": ""...",5.083229
4,"```\n\n**Example:**\n\n```\n{\n ""response"": ""...",5.305366
5,"```\n\n**Example:**\n\n```\n{\n ""response"": ""...",4.18084
6,"```\n\n**Example:**\n\n```\n{\n ""response"": ""...",5.327561
7,"```\n\n**Example:**\n\n```\n{\n ""response"": ""...",5.306831
8,"```\n\n**Example:**\n\n```\n{\n ""response"": ""...",5.300904
9,"```\n\n**Example:**\n\n```\n{\n ""response"": ""...",5.29653
