# LLM Loading

Auth Token Setting:

- HugginigFace Token
- OpenAI Token

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from dotenv import load_dotenv
from tqdm import tqdm
import pandas as pd
import warnings
import logging
import torch
import os

warnings.filterwarnings("ignore")
logging.getLogger('transformers').setLevel(logging.ERROR)

load_dotenv()

hf_token = os.getenv("HF_TOKEN")

In [2]:
llms_info = {
    "Mistral-7B-Instruct": {
        "remote_model_name": "mistralai/Mistral-7B-Instruct-v0.2",
        "model_path": "./LLMs/Mistral-7B-Instruct",
        "tokenizer_path": "./Tokenizers/Mistral-7B-Instruct",
        "hf_token": hf_token,
        "additional_config": {
            "torch_dtype": "auto",
            "device": "auto"
        }
    },
    "Qwen2-7B-Instruct": {
        "remote_model_name": "Qwen/Qwen2-7B-Instruct",
        "model_path": "./LLMs/Qwen2-7B-Instruct",
        "tokenizer_path": "./Tokenizers/Qwen2-7B-Instruct",
        "hf_token": hf_token,
        "additional_config": {
            "torch_dtype": "auto",
            "trust_remote_code": True,
            "device": "auto"
        }
    },
    "Qwen2-5B-Instruct": {
        "remote_model_name": "Qwen/Qwen2-0.5B-Instruct",
        "model_path": "./LLMs/Qwen2-5B-Instruct",
        "tokenizer_path": "./Tokenizers/Qwen2-5B-Instruct",
        "hf_token": hf_token,
        "additional_config":{
            "trust_remote_code": True,
        }
    }, 
    "Llama3-1-8B-Instruct": {
    "remote_model_name": "meta-llama/Meta-Llama-3.1-8B-Instruct",
    "model_path": "./LLMs/Llama3-1-8B-Instruct",
    "tokenizer_path": "./Tokenizers/Llama3-1-8B-Instruct",
    "hf_token": hf_token,
    "additional_config": {
        "torch_dtype": "auto",
        "device_map": "auto",
        "rope_scaling": {
            "type": "linear",  # or "dynamic" — depending on your use case
            "factor": 8.0
        }
    }
},

}

In [3]:
def load_model(model_key):
    model_info = llms_info[model_key]
    config = model_info["additional_config"]

    # Check if the directories for the model and tokenizer exist
    model_dir_exists = os.path.isdir(model_info["model_path"])
    tokenizer_dir_exists = os.path.isdir(model_info["tokenizer_path"])

    if model_dir_exists and tokenizer_dir_exists:
        print(f"{model_key} model and tokenizer are already present.")
    else:
        print(f"Downloading and saving model and tokenizer for {model_key}.")
        # Include the token in the download process if applicable
        hf_token = model_info.get("hf_token", None)
        model = AutoModelForCausalLM.from_pretrained(
            model_info["remote_model_name"],
            cache_dir=model_info["model_path"],
            torch_dtype=getattr(torch, config.get("torch_dtype", "auto")) if config.get("torch_dtype", "auto") != "auto" else None,
            use_auth_token=hf_token
        )
        tokenizer = AutoTokenizer.from_pretrained(
            model_info["remote_model_name"],
            cache_dir=model_info["tokenizer_path"],
            use_auth_token=hf_token
        )
        # Ensure directories are created during download
        if not model_dir_exists:
            os.makedirs(model_info["model_path"], exist_ok=True)
        if not tokenizer_dir_exists:
            os.makedirs(model_info["tokenizer_path"], exist_ok=True)
        # Save them locally
        model.save_pretrained(model_info["model_path"])
        tokenizer.save_pretrained(model_info["tokenizer_path"])

    # Load model and tokenizer from local storage
    model = AutoModelForCausalLM.from_pretrained(model_info["model_path"])
    tokenizer = AutoTokenizer.from_pretrained(model_info["tokenizer_path"])
    return model, tokenizer


In [4]:
# load_model("Qwen2-5B-Instruct")

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


# Benchmarking

In [6]:
import torch
import pandas as pd

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [7]:
DATASET = "FoCus"
SET = "train"           #train,   valid       

In [8]:
LLM = "Qwen2-5B-Instruct"                  # Mistral-7B-Instruct, Llama3-1-8B-Instruct, Qwen2-7B-Instruct, Qwen2-5B-Instruct

COT_SETUP = False                               

## Response Generation

In [9]:
torch.cuda.empty_cache()

# Loading the prompt
df = pd.read_csv(f'./Prompts/{DATASET}-{SET}.csv')

df.head(3)

Unnamed: 0,personas,context,act_response
0,Id like to visit a historic place.I want to vi...,"User1: Wow, this is amazing! What is this?\nUs...",User2: You can access Descent of the Ganges vi...
1,I hope to see some rock in Little Rock.I like ...,"User1: I know this place, but I dont remember ...",User2: It was Sherman School.
2,I love cool lakes.I would like to visit the Hi...,"User1: I know this place, but I dont remember ...",User2: It formed from being a tributary of the...


In [10]:
# Function to generate a prompt
def create_benchmarking_prompt(personas, context, include_cot=False):

    prompt = (
        "I will provide you with a conversation context and the personas of the participants, that can be annotated with speaker information.\n"
        "As a participant in this conversation, your task is to generate a personalized response, considering the conversation context and personas.\n\n"
        "Participant Personas:\n"
        f"{personas}\n\n"
        "Conversation Context:\n"
        f"{context}\n\n"
        "Task Instruction:\n"
        "* Provide an unannotated response.\n"
        "* If only one persona is available, personalize the response accordingly.\n"
        "* If the conversation context is a single query, respond appropriately to the query.\n"
    )
    
    if include_cot:
        prompt += (
            "* Apply Chain of Thought reasoning to reflect on the alignment of your response with the personas.\n"
        )

    prompt += (
        "\nOutput Format: only give a JSON of the following format:\n"
        "{\n"
    )
    
    if include_cot:
        prompt += (
            '  "reasoning": "briefly describe your personalization process (in 110 words or less)."\n'
        )
        
    prompt += (
        '  "response": "provide the personalized natural language response here (in 110 words or less)."\n'
        "}\n"
    )

    return prompt


In [11]:
# Assuming load_model is defined and works as expected
model, tokenizer = load_model(LLM)

MAX_NEW_TOKEN = 220 if COT_SETUP else 110

generation_params = {
    
    "max_new_tokens": MAX_NEW_TOKEN,      # Based on max response length + reasoning
    "temperature": 0,                     # Based on FELM paper (Greedy Setup)
    "do_sample": False
}

Downloading and saving model and tokenizer for Qwen2-5B-Instruct.


config.json:   0%|          | 0.00/659 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

In [12]:
first_row = df.iloc[120]
personas = first_row['personas']
context = first_row['context']

# Example usage
prompt = create_benchmarking_prompt(personas, context, include_cot=COT_SETUP)
print(prompt)


I will provide you with a conversation context and the personas of the participants, that can be annotated with speaker information.
As a participant in this conversation, your task is to generate a personalized response, considering the conversation context and personas.

Participant Personas:
I like university.I love old libraries.I would like to visit Iceland.I have never been to Reykjavik.I love books.

Conversation Context:
User1: I think Ive been there before but I dont remember the name of this place.
User2: Youve never been here before, actually. This is The National University Library of Iceland, located in Reykjavik.
User1: How old is it?
User2: Youll love this old library, which dates back to 1818.
User1: Does it have a lot of books?
User2: Yes it does. Youll love the selection of books here, as this is the largest library in the country with millions of items in various collections.
User1: What is all in their collection?
User2: Their collection includes books, of course, b

In [14]:
# Initialize the text generation pipeline
generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=device)

# Generate a response
output = generator(prompt, **generation_params)
response = output[0]['generated_text'][len(prompt):]

# Print the response
print(response)

Note: Please use real responses instead of generated ones.

{
  "response": "The National University Library of Iceland is a beautiful and historic building that houses over 3 million books. It has a wide range of collections including history, literature, science, and more. The library is open to the public and offers free library cards for those who want to explore its vast collection."
}


In [16]:
import time

# Iterate through the DataFrame and generate responses
gen_responses = []
response_times = []

for index, row in tqdm(df.iterrows(), total=len(df), desc="Generating responses"):
    personas = row['personas']
    context = row['context']
    
    # Create the prompt
    prompt = create_benchmarking_prompt(personas, context, COT_SETUP)
    
    # Measure the start time
    start_time = time.time()
    
    # Generate a response
    output = generator(prompt, **generation_params)[0]['generated_text']
    
    # Measure the end time and calculate the duration
    end_time = time.time()
    response_time = end_time - start_time
    
    response = output[len(prompt):]

    # Store the generated response and response time
    gen_responses.append(response)
    response_times.append(response_time)

# Create a DataFrame with the responses and response times
response_df = pd.DataFrame({
    'gen_response': gen_responses,
    'response_time': response_times
})  

COT_ = "-COT" if COT_SETUP else "" 
 
response_df.head(20)

Generating responses: 100%|██████████| 4000/4000 [51:25<00:00,  1.30it/s]


Unnamed: 0,gen_response,response_time
0,Note: The response should not contain any pers...,1.002536
1,Note: You may need to add additional details i...,0.959511
2,Note: The response should be personal and tail...,1.06996
3,Note: The response should not contain any pers...,0.751485
4,Note: The response should not contain any pers...,0.904021
5,Note: You may need to break down the response ...,0.451411
6,Note: Please include all necessary elements in...,0.894746
7,Note: The response should not contain any pers...,0.708804
8,Note: The response should be personal and tail...,0.902463
9,Note: Please use real responses instead of gen...,0.603102


In [17]:
## Save the response DataFrame to a CSV and Excel file
response_df.to_csv(f'./Raw Responses/{DATASET}/{LLM}-benchmark{COT_}.csv', index = False)

# Post Processing

In [19]:
ds = response_df
print("Shape:", ds.shape)

print("\nMissing Values:\n", ds.isnull().sum())

Shape: (4000, 2)

Missing Values:
 gen_response     0
response_time    0
dtype: int64


In [20]:
import re
import json
import pandas as pd

# Define functions
def find_first_valid_json(text):
    if not isinstance(text, str) or text.strip() == "":
        return None  # skip if not a string or empty string
    
    json_objects = re.findall(r'\{.*?\}', text, re.DOTALL)
    for obj in json_objects:
        try:
            json_obj = json.loads(obj)
            if "response" in json_obj:  # Only check for "response"
                return json_obj
        except json.JSONDecodeError:
            continue
    return None

def get_response(text):
    if text is not None:  # Check if text is not None
        try:
            return text['response']
        except (ValueError, SyntaxError, KeyError):
            return None
    return None



# Replace empty strings in 'gen_response' with None
ds.loc[ds['gen_response'] == '', 'gen_response'] = None

# Apply the find_first_valid_json function
ds['gen_response'] = ds['gen_response'].apply(lambda x: find_first_valid_json(x))

# Convert gen_response to None if it's not a valid string
ds['gen_response'] = ds['gen_response'].apply(lambda x: None if pd.isna(x) or x == 'nan' or isinstance(x, float) else x)

# Extract 'response' from the JSON objects
ds['gen_response'] = ds['gen_response'].apply(lambda x: get_response(x))

# Keep the 'response_time' column unchanged
ds['response_time'] = ds['response_time']

# Define the new column order
new_column_order = ['gen_response', 'response_time']

# Reorder the columns
ds = ds[new_column_order]

print("\nMissing Values:\n", ds.isnull().sum())
print(ds.shape)
ds



Missing Values:
 gen_response     1536
response_time       0
dtype: int64
(4000, 2)


Unnamed: 0,gen_response,response_time
0,Hello! Welcome to Descent of the Ganges. It's ...,1.002536
1,The name of the place is Little Rock Central H...,0.959511
2,,1.069960
3,Camp Randall Stadium is an outdoor stadium loc...,0.751485
4,,0.904021
...,...,...
3995,,0.878556
3996,Kõpu Lighthouse is a beautiful historical land...,0.657697
3997,St. Matthews Lutheran Church is a historic chu...,0.820268
3998,The bell has been restored to its original pos...,0.336838


In [21]:
ds.to_csv(f'Responses/{DATASET}/{LLM}-benchmark{COT_}.csv', index=False)