# LLM Loading

Auth Token Setting:

- HugginigFace Token
- OpenAI Token

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from dotenv import load_dotenv
from tqdm import tqdm
import pandas as pd
import warnings
import logging
import torch
import os

warnings.filterwarnings("ignore")
logging.getLogger('transformers').setLevel(logging.ERROR)

load_dotenv()

hf_token = os.getenv("HF_TOKEN")

  from pandas.core import (


In [2]:
llms_info = {
    "Mistral-7B-Instruct": {
        "remote_model_name": "mistralai/Mistral-7B-Instruct-v0.2",
        "model_path": "./LLMs/Mistral-7B-Instruct",
        "tokenizer_path": "./Tokenizers/Mistral-7B-Instruct",
        "additional_config": {
            "torch_dtype": "auto",
            "device": "auto"
        }
    },
    "Qwen2-7B-Instruct": {
        "remote_model_name": "Qwen/Qwen2-7B-Instruct",
        "model_path": "./LLMs/Qwen2-7B-Instruct",
        "tokenizer_path": "./Tokenizers/Qwen2-7B-Instruct",
        "additional_config": {
            "torch_dtype": "auto",
            "trust_remote_code": True,
            "device": "auto"
        }
    },
    "Qwen2-5B-Instruct": {
        "remote_model_name": "Qwen/Qwen2-0.5B-Instruct",
        "model_path": "./LLMs/Qwen2-5B-Instruct",
        "tokenizer_path": "./Tokenizers/Qwen2-5B-Instruct",
        "additional_config":{
            "trust_remote_code": True,
        }
    }, 
    "Llama3-1-8B-Instruct": {
    "remote_model_name": "meta-llama/Llama-3.1-8B-Instruct",
    "model_path": "./LLMs/Llama3-1-8B-Instruct",
    "tokenizer_path": "./Tokenizers/Llama3-1-8B-Instruct",
    # "hf_token": hf_token,
    "additional_config": {
        "torch_dtype": "auto",
        "device_map": "auto",
        "rope_scaling": {
            "type": "linear",  # or "dynamic" — depending on your use case
            "factor": 8.0
        }
    }
},

}

In [3]:
def load_model(model_key):
    model_info = llms_info[model_key]
    config = model_info["additional_config"]

    # Check if the directories for the model and tokenizer exist
    model_dir_exists = os.path.isdir(model_info["model_path"])
    tokenizer_dir_exists = os.path.isdir(model_info["tokenizer_path"])

    if model_dir_exists and tokenizer_dir_exists:
        print(f"{model_key} model and tokenizer are already present.")
    else:
        print(f"Downloading and saving model and tokenizer for {model_key}.")
        # Include the token in the download process if applicable
        hf_token = model_info.get("hf_token", None)
        model = AutoModelForCausalLM.from_pretrained(
            model_info["remote_model_name"],
            cache_dir=model_info["model_path"],
            torch_dtype=getattr(torch, config.get("torch_dtype", "auto")) if config.get("torch_dtype", "auto") != "auto" else None,
            use_auth_token=hf_token
        )
        tokenizer = AutoTokenizer.from_pretrained(
            model_info["remote_model_name"],
            cache_dir=model_info["tokenizer_path"],
            use_auth_token=hf_token
        )
        # Ensure directories are created during download
        if not model_dir_exists:
            os.makedirs(model_info["model_path"], exist_ok=True)
        if not tokenizer_dir_exists:
            os.makedirs(model_info["tokenizer_path"], exist_ok=True)
        # Save them locally
        model.save_pretrained(model_info["model_path"])
        tokenizer.save_pretrained(model_info["tokenizer_path"])

    # Load model and tokenizer from local storage
    model = AutoModelForCausalLM.from_pretrained(model_info["model_path"])
    tokenizer = AutoTokenizer.from_pretrained(model_info["tokenizer_path"])
    return model, tokenizer


In [4]:
# load_model("Llama3-1-8B-Instruct")

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


# Benchmarking

In [6]:
import torch
import pandas as pd

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [7]:
DATASET = "FoCus"
SET = "train"           #train,   valid       

In [8]:
LLM = "Mistral-7B-Instruct"                  # Mistral-7B-Instruct, Llama3-1-8B-Instruct, Qwen2-7B-Instruct, Qwen2-5B-Instruct

COT_SETUP = False                               

## Response Generation

In [9]:
torch.cuda.empty_cache()

# Loading the prompt
df = pd.read_csv(f'./Prompts/{DATASET}-{SET}.csv')


print(df.shape)
df.head(3)

(1500, 3)


Unnamed: 0,personas,context,act_response
0,I want to learn more about the Royal Navy.I ha...,"User1: I know this place, but I dont remember ...",User2: The shit was controversial he scrapped ...
1,I collect lighthouses.Ive never been to to Eng...,"User1: Wow, this is amazing! What is this?\nUs...",User2: Yes! An episode from ChuckleVision and ...
2,I am interested in region.I would like to visi...,User1: Where is this place?\nUser2: This is Ac...,User2: Waterways are essential to the commerci...


In [10]:
# Function to generate a prompt
def create_benchmarking_prompt(personas, context, include_cot=False):

    prompt = (
        "I will provide you with a conversation context and the personas of the participants, that can be annotated with speaker information.\n"
        "As a participant in this conversation, your task is to generate a personalized response, considering the conversation context and personas.\n\n"
        "Participant Personas:\n"
        f"{personas}\n\n"
        "Conversation Context:\n"
        f"{context}\n\n"
        "Task Instruction:\n"
        "* Provide an unannotated response.\n"
        "* If only one persona is available, personalize the response accordingly.\n"
        "* If the conversation context is a single query, respond appropriately to the query.\n"
    )
    
    if include_cot:
        prompt += (
            "* Apply Chain of Thought reasoning to reflect on the alignment of your response with the personas.\n"
        )

    prompt += (
        "\nOutput Format: only give a JSON of the following format:\n"
        "{\n"
    )
    
    if include_cot:
        prompt += (
            '  "reasoning": "briefly describe your personalization process (in 110 words or less)."\n'
        )
        
    prompt += (
        '  "response": "provide the personalized natural language response here (in 110 words or less)."\n'
        "}\n"
    )

    return prompt


In [11]:
# Assuming load_model is defined and works as expected
model, tokenizer = load_model(LLM)

MAX_NEW_TOKEN = 220 if COT_SETUP else 110

generation_params = {
    
    "max_new_tokens": MAX_NEW_TOKEN,      # Based on max response length + reasoning
    "temperature": 0,                     # Based on FELM paper (Greedy Setup)
    "do_sample": False
}

Downloading and saving model and tokenizer for Mistral-7B-Instruct.


config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [12]:
first_row = df.iloc[120]
personas = first_row['personas']
context = first_row['context']

# Example usage
prompt = create_benchmarking_prompt(personas, context, include_cot=COT_SETUP)
print(prompt)

I will provide you with a conversation context and the personas of the participants, that can be annotated with speaker information.
As a participant in this conversation, your task is to generate a personalized response, considering the conversation context and personas.

Participant Personas:
I have the fantasy about mountain.I like hill fort.I have heard about Celtic centres.I would like to go to Germany.I like rivers.

Conversation Context:
User1: Wow, this is amazing! What is this?
User2: This will be your favorite place. The Heuneburg is a prehistoric hillfort in Germany, the country you want to visit.
User1: Nice! Where is exactly this place located?
User2: Its located in Hundersingen near Herbertingen, between Ulm and Sigmaringen, Baden-Württemberg, in the south of Germany.
User1: Tell me more about the surroundings of the site.
User2: Its on the side of the river Danube. Also, it is near to the modern borders with Switzerland and Austria.
User1: What is the significance of the

In [13]:
# Initialize the text generation pipeline
generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=device)

# Generate a response
output = generator(prompt, **generation_params)
response = output[0]['generated_text'][len(prompt):]

# Print the response
print(response)


Unannotated Response:
{
  "response": "The Heuneburg hillfort is located in Hundersingen, Germany, near the Danube river. It's considered one of the most important early Celtic centres in Central Europe, abandoned in the 5th century BC. The site offers a stunning view of the surrounding landscape and is near the modern borders with Switzerland and Austria."
}

Annotated Response (for User1):
{
  "response": "The Heuneb


In [14]:
import time

# Iterate through the DataFrame and generate responses
gen_responses = []
response_times = []

for index, row in tqdm(df.iterrows(), total=len(df), desc="Generating responses"):
    personas = row['personas']
    context = row['context']
    
    # Create the prompt
    prompt = create_benchmarking_prompt(personas, context, COT_SETUP)
    
    # Measure the start time
    start_time = time.time()
    
    # Generate a response
    output = generator(prompt, **generation_params)[0]['generated_text']
    
    # Measure the end time and calculate the duration
    end_time = time.time()
    response_time = end_time - start_time
    
    response = output[len(prompt):]

    # Store the generated response and response time
    gen_responses.append(response)
    response_times.append(response_time)

# Create a DataFrame with the responses and response times
response_df = pd.DataFrame({
    'gen_response': gen_responses,
    'response_time': response_times
})  

COT_ = "-COT" if COT_SETUP else ""

response_df.head(20)

Generating responses: 100%|██████████| 1500/1500 [1:51:04<00:00,  4.44s/it]


Unnamed: 0,gen_response,response_time
0,"\nResponse:\n{\n ""response"": ""The HMS Plymout...",4.395349
1,"\nUnannotated Response:\n{\n ""response"": ""The...",4.356761
2,"\nUnannotated Response:\n{\n ""response"": ""Aca...",4.65339
3,"\nResponse:\n{\n ""response"": ""Vail Ski Resort...",4.440063
4,"\n{\n ""response"": ""Mesa Verde National Park, ...",4.460657
5,"\nUnannotated Response:\n{\n ""response"": ""Gre...",4.370527
6,"\nUnannotated Response:\n{\n ""response"": ""The...",4.379969
7,"\nOutput:\n{\n ""response"": ""Harlaxton Manor i...",4.46367
8,"\nUnannotated Response:\n{\n ""response"": ""The...",4.526228
9,"\nResponse:\n{\n ""response"": ""Wat Traphang Th...",4.131498


In [15]:
print("\nMissing Values:\n", response_df.isnull().sum())


Missing Values:
 gen_response     0
response_time    0
dtype: int64


In [16]:
## Save the response DataFrame to a CSV and Excel file
response_df.to_csv(f'./Raw Responses/{DATASET}/{LLM}-train{COT_}.csv', index = False)

# Post Processing

In [17]:
ds = response_df
print("Shape:", ds.shape)

print("\nMissing Values:\n", ds.isnull().sum())

Shape: (1500, 2)

Missing Values:
 gen_response     0
response_time    0
dtype: int64


In [18]:
import re
import json
import pandas as pd

# Define functions
def find_first_valid_json(text):
    if not isinstance(text, str) or text.strip() == "":
        return None  # skip if not a string or empty string
    
    json_objects = re.findall(r'\{.*?\}', text, re.DOTALL)
    for obj in json_objects:
        try:
            json_obj = json.loads(obj)
            if "response" in json_obj:  # Only check for "response"
                return json_obj
        except json.JSONDecodeError:
            continue
    return None

def get_response(text):
    if text is not None:  # Check if text is not None
        try:
            return text['response']
        except (ValueError, SyntaxError, KeyError):
            return None
    return None


# Replace empty strings in 'gen_response' with None
ds.loc[ds['gen_response'] == '', 'gen_response'] = None

# Apply the find_first_valid_json function
ds['gen_response'] = ds['gen_response'].apply(lambda x: find_first_valid_json(x))

# Convert gen_response to None if it's not a valid string
ds['gen_response'] = ds['gen_response'].apply(lambda x: None if pd.isna(x) or x == 'nan' or isinstance(x, float) else x)

# Extract 'response' from the JSON objects
ds['gen_response'] = ds['gen_response'].apply(lambda x: get_response(x))

# Keep the 'response_time' column unchanged
ds['response_time'] = ds['response_time']

# Define the new column order
new_column_order = ['gen_response', 'response_time']

# Reorder the columns
ds = ds[new_column_order]

print("\nMissing Values:\n", ds.isnull().sum())
print(ds.shape)
ds



Missing Values:
 gen_response     928
response_time      0
dtype: int64
(1500, 2)


Unnamed: 0,gen_response,response_time
0,,4.395349
1,,4.356761
2,,4.653390
3,Vail Ski Resort in Colorado is the perfect pla...,4.440063
4,,4.460657
...,...,...
1495,,4.504809
1496,,4.504092
1497,,4.736598
1498,,5.180323


In [19]:
ds.to_csv(f'Responses/{DATASET}/{LLM}-train{COT_}.csv', index=False)