In [2]:
!pip install transformers peft datasets scikit-learn tqdm huggingface_hub rouge evaluate

Collecting peft
  Using cached peft-0.14.0-py3-none-any.whl.metadata (13 kB)
Collecting huggingface_hub
  Using cached huggingface_hub-0.27.0-py3-none-any.whl.metadata (13 kB)
Collecting rouge
  Using cached rouge-1.0.1-py3-none-any.whl.metadata (4.1 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers)
  Using cached tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting accelerate>=0.21.0 (from peft)
  Using cached accelerate-1.2.1-py3-none-any.whl.metadata (19 kB)
Using cached peft-0.14.0-py3-none-any.whl (374 kB)
Using cached huggingface_hub-0.27.0-py3-none-any.whl (450 kB)
Using cached rouge-1.0.1-py3-none-any.whl (13 kB)
Using cached accelerate-1.2.1-py3-none-any.whl (336 kB)
Using cached tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
Installing collected packages: rouge, huggingface_hub, tokenizers, accelerate, peft
[31mERROR: pip's dependency resolver does not currently take into account 

In [3]:
!pip install pyro-ppl

Collecting pyro-ppl
  Using cached pyro_ppl-1.9.1-py3-none-any.whl.metadata (7.8 kB)
Collecting opt-einsum>=2.3.2 (from pyro-ppl)
  Using cached opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collecting pyro-api>=0.1.1 (from pyro-ppl)
  Using cached pyro_api-0.1.2-py3-none-any.whl.metadata (2.5 kB)
Using cached pyro_ppl-1.9.1-py3-none-any.whl (755 kB)
Using cached opt_einsum-3.4.0-py3-none-any.whl (71 kB)
Using cached pyro_api-0.1.2-py3-none-any.whl (11 kB)
Installing collected packages: pyro-api, opt-einsum, pyro-ppl
Successfully installed opt-einsum-3.4.0 pyro-api-0.1.2 pyro-ppl-1.9.1


In [None]:
import torch

torch.cuda.set_device(2)  # Set to GPU 2
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


if torch.cuda.is_available():
    current_device = torch.cuda.current_device()  # Get the current CUDA device index
    device_name = torch.cuda.get_device_name(current_device)  # Get the device name
    print(f"CUDA is currently using device {current_device}: {device_name}")
else:
    print("CUDA is not available.")

### QA-SA EVCL (SA)

In [15]:
import json
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
from datasets import Dataset
from torch.utils.data import DataLoader
import torch
import pyro
import pyro.distributions as dist
from tqdm import tqdm
import os
from huggingface_hub import login

# Load the JSON file


login("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")
# Extract input-output pairs from JSON
file_path = "task1312_amazonreview_polarity_classification.json"
with open(file_path, "r") as f:
    data = json.load(f)

instruct1=(
    "\nInstruction: You will be given a sentence describing a review. "
    "Classify its sentiment as either 'positive' or 'negative'. "
    "Respond only with the sentiment label ('positive' or 'negative') without any additional information. "
    "Ensure the output is a one word answer "
    "\nReview: "
)
instruct2="\nSentiment: "

# Extract input-output pairs from JSON
instances = data["Instances"][4500:5000]
test_inputs = [instruct1+instance["input"]+instruct2 for instance in instances]
test_outputs = [instance["output"][0] for instance in instances]

# Split the data into train and test sets

# Convert data to Hugging Face Dataset format
test_ds = Dataset.from_dict({"input": test_inputs, "output": test_outputs})

# Tokenizer setup
base_model_path = "meta-llama/Meta-Llama-3-8B"  # Replace with actual model path
tokenizer = AutoTokenizer.from_pretrained(base_model_path)

# Check if tokenizer has a padding token, if not, set the eos_token as padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Tokenization function
def tokenize_function(examples):
    model_inputs = tokenizer(
        examples["input"],
        truncation=True,
        padding="max_length",
        max_length=512,
    )
    labels = tokenizer(
        examples["output"],
        truncation=True,
        padding="max_length",
        max_length=512,
    )["input_ids"]
    model_inputs["labels"] = labels
    return model_inputs


# Create DataLoaders
batch_size = 16  # Adjust as needed

# Define the model and load weights
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_enable_fp32_cpu_offload=True,
)
fine_tuned_weights_path = "finetuned_weights"

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    quantization_config=bnb_config,
    device_map="auto"
)
model = PeftModel.from_pretrained(base_model, fine_tuned_weights_path)
pyro.get_param_store().load('pyro_param_store_task2_evcl_best.pt')
DEVICE = model.device

# Generate predictions
predictions = []
references = []

output_file_path = "predictions.json"

if os.path.exists(output_file_path):
    with open(output_file_path, "r") as f:
        saved_data = json.load(f)
else:
    saved_data = {"predictions": [], "references": []}

print("Generating predictions incrementally:")

print("Generating predictions:")

for i in tqdm(range(0, len(test_inputs), batch_size)):  # Loop in batches
    batch_inputs = test_inputs[i:i + batch_size]
    batch_references = test_outputs[i:i + batch_size]

    # Tokenize the inputs in a batch
    inputs_tokenized = tokenizer(batch_inputs, padding=True, truncation=True, return_tensors="pt").to(DEVICE)

    with torch.no_grad():
        with torch.cuda.amp.autocast():
            # Apply Pyro parameters to LoRA layers
            for name, module in model.named_modules():
                if hasattr(module, "lora_A"):
                    for key in module.lora_A:
                        loc = pyro.param(f"{name}.lora_A.{key}_loc")
                        scale = pyro.param(f"{name}.lora_A.{key}_scale")
                        sampled_weight = pyro.sample(
                            f"{name}.lora_A.{key}",
                            dist.Normal(loc, scale).to_event(loc.dim())
                        )
            
                        module.lora_A[key].weight.data.copy_(sampled_weight)

                if hasattr(module, "lora_B"):
                    for key in module.lora_B:
                        loc = pyro.param(f"{name}.lora_B.{key}_loc")
                        scale = pyro.param(f"{name}.lora_B.{key}_scale")
                        sampled_weight = pyro.sample(
                            f"{name}.lora_B.{key}",
                            dist.Normal(loc, scale).to_event(loc.dim())
                        )
                        
                        module.lora_B[key].weight.data.copy_(sampled_weight)

            # Generate predictions using the tokenized inputs
            generated_ids = model.generate(
                input_ids=inputs_tokenized["input_ids"],
                attention_mask=inputs_tokenized["attention_mask"],
                max_new_tokens=10,  
                min_length=10,  
                no_repeat_ngram_size=2,  
                num_return_sequences=1,
                top_p=0.9,  
                temperature=0.7  
            )

        # Decode generated IDs
        batch_predictions = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
        predictions.extend(batch_predictions)
        references.extend(batch_references)

    saved_data["predictions"].extend(batch_predictions)
    saved_data["references"].extend(batch_references)

    # Save incrementally to JSON
    with open(output_file_path, "w") as json_file:
        json.dump(saved_data, json_file, indent=4)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Generating predictions incrementally:
Generating predictions:


  0%|          | 0/32 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
  with torch.cuda.amp.autocast():
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
  3%|▎         | 1/32 [00:02<01:14,  2.41s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
  6%|▋         | 2/32 [00:04<01:12,  2.42s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_sid

In [16]:
import pandas as pd
from evaluate import load

# Load the CSV file
csv_file_path = 'sentiment_extraction.csv'
df = pd.read_csv(csv_file_path)

# Ensure necessary columns exist
assert "Sentiment" in df.columns, "Sentiment column is missing in the CSV."
assert "Reference" in df.columns, "Reference column is missing in the CSV."

# Load ROUGE metric from evaluate package
rouge = load("rouge")

# Calculate ROUGE scores
results = rouge.compute(predictions=df["Sentiment"].tolist(), references=df["Reference"].tolist())

print("ROUGE Scores:", results)


ROUGE Scores: {'rouge1': 0.5907422799422799, 'rouge2': 0.0, 'rougeL': 0.5908138528138529, 'rougeLsum': 0.5892117604617604}


### LORA only QA-SA (SA)

In [18]:
import json
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
from datasets import Dataset
from torch.utils.data import DataLoader
import torch
from tqdm import tqdm
import os
from huggingface_hub import login

# Load the JSON file

login("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")
# Extract input-output pairs from JSON
file_path = "task1312_amazonreview_polarity_classification.json"
with open(file_path, "r") as f:
    data = json.load(f)

instruct1=(
    "\nInstruction: You will be given a sentence describing a review. "
    "Classify its sentiment as either 'positive' or 'negative'. "
    "Respond only with the sentiment label ('positive' or 'negative') without any additional information. "
    "Ensure the output is a one word answer "
    "\nReview: "
)
instruct2="\nSentiment: "

# Extract input-output pairs from JSON
instances = data["Instances"][4500:5000]
test_inputs = [instruct1+instance["input"]+instruct2 for instance in instances]
test_outputs = [instance["output"][0] for instance in instances]

# Split the data into train and test sets

# Convert data to Hugging Face Dataset format
test_ds = Dataset.from_dict({"input": test_inputs, "output": test_outputs})

# Tokenizer setup
base_model_path = "meta-llama/Meta-Llama-3-8B"  # Replace with actual model path
tokenizer = AutoTokenizer.from_pretrained(base_model_path)

# Check if tokenizer has a padding token, if not, set the eos_token as padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Tokenization function
def tokenize_function(examples):
    model_inputs = tokenizer(
        examples["input"],
        truncation=True,
        padding="max_length",
        max_length=512,
    )
    labels = tokenizer(
        examples["output"],
        truncation=True,
        padding="max_length",
        max_length=512,
    )["input_ids"]
    model_inputs["labels"] = labels
    return model_inputs

# Create DataLoaders
batch_size = 16  # Adjust as needed

# Define the model and load weights
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_enable_fp32_cpu_offload=True,
)
fine_tuned_weights_path = "finetuned_weights_lora"

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    quantization_config=bnb_config,
    device_map="auto"
)
model = PeftModel.from_pretrained(base_model, fine_tuned_weights_path)
DEVICE = model.device

# Generate predictions
predictions = []
references = []

output_file_path = "predictions_SA.json"

if os.path.exists(output_file_path):
    with open(output_file_path, "r") as f:
        saved_data = json.load(f)
else:
    saved_data = {"predictions": [], "references": []}

print("Generating predictions incrementally:")

print("Generating predictions:")

for i in tqdm(range(0, len(test_inputs), batch_size)):  # Loop in batches
    batch_inputs = test_inputs[i:i + batch_size]
    batch_references = test_outputs[i:i + batch_size]

    # Tokenize the inputs in a batch
    inputs_tokenized = tokenizer(batch_inputs, padding=True, truncation=True, return_tensors="pt").to(DEVICE)

    with torch.no_grad():  # Standard torch.no_grad() without pyro
        # Generate predictions using the tokenized inputs
        generated_ids = model.generate(
            input_ids=inputs_tokenized["input_ids"],
            attention_mask=inputs_tokenized["attention_mask"],
            max_new_tokens=10,  
            min_length=10,  
            no_repeat_ngram_size=2,  
            num_return_sequences=1,
            top_p=0.9,  
            temperature=0.7  
        )

        # Decode generated IDs
        batch_predictions = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
        predictions.extend(batch_predictions)
        references.extend(batch_references)

    saved_data["predictions"].extend(batch_predictions)
    saved_data["references"].extend(batch_references)

    # Save incrementally to JSON
    with open(output_file_path, "w") as json_file:
        json.dump(saved_data, json_file, indent=4)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]



Generating predictions incrementally:
Generating predictions:


  0%|          | 0/32 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
  3%|▎         | 1/32 [00:02<01:12,  2.33s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
  6%|▋         | 2/32 [00:05<01:29,  2.98s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the to

In [19]:
import pandas as pd
from evaluate import load

# Load the CSV file
csv_file_path = 'sentiment_references_extraction.csv'
df = pd.read_csv(csv_file_path)

# Ensure necessary columns exist
assert "Sentiment" in df.columns, "Sentiment column is missing in the CSV."
assert "Reference" in df.columns, "Reference column is missing in the CSV."

# Load ROUGE metric from evaluate package
rouge = load("rouge")

# Calculate ROUGE scores
results = rouge.compute(predictions=df["Sentiment"].tolist(), references=df["Reference"].tolist())

print("ROUGE Scores:", results)


ROUGE Scores: {'rouge1': 0.34111904761904766, 'rouge2': 0.0, 'rougeL': 0.34017142857142846, 'rougeLsum': 0.3394904761904761}


### EVCL QA-SA (QA) 

In [32]:
import json
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
from datasets import Dataset
from torch.utils.data import DataLoader
import torch
import pyro
import pyro.distributions as dist
from tqdm import tqdm
import os
from huggingface_hub import login

# Load the JSON file


login("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")
# Extract input-output pairs from JSON
file_path = "task024_cosmosqa_answer_generation.json"
with open(file_path, "r") as f:
    data = json.load(f)

instruct1=(
    "Craft one correct answer to the question given in input."
    "To make it more interesting, try to use non-stereotypical language if possible. "
    "Make sure your correct answer is reasonably long, consistent with the context, and requires common sense (instead of explicit extraction from the context) "
    "Use a response that is uncommon/non-stereotypical, so that it is less predictable"
    "To be less repetitive, please vary your language for each question."
    "\nReview: "
)
instruct2="\nGenerate: "

# Extract input-output pairs from JSON
instances = data["Instances"][4500:5000]
test_inputs = [instruct1+instance["input"]+instruct2 for instance in instances]
test_outputs = [instance["output"][0] for instance in instances]

# Split the data into train and test sets

# Convert data to Hugging Face Dataset format
test_ds = Dataset.from_dict({"input": test_inputs, "output": test_outputs})

# Tokenizer setup
base_model_path = "meta-llama/Meta-Llama-3-8B"  # Replace with actual model path
tokenizer = AutoTokenizer.from_pretrained(base_model_path)

# Check if tokenizer has a padding token, if not, set the eos_token as padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Tokenization function
def tokenize_function(examples):
    model_inputs = tokenizer(
        examples["input"],
        truncation=True,
        padding="max_length",
        max_length=512,
    )
    labels = tokenizer(
        examples["output"],
        truncation=True,
        padding="max_length",
        max_length=512,
    )["input_ids"]
    model_inputs["labels"] = labels
    return model_inputs


# Create DataLoaders
batch_size = 16  # Adjust as needed

# Define the model and load weights
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_enable_fp32_cpu_offload=True,
)
fine_tuned_weights_path = "finetuned_weights"

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    quantization_config=bnb_config,
    device_map="auto"
)
model = PeftModel.from_pretrained(base_model, fine_tuned_weights_path)
pyro.get_param_store().load('pyro_param_store_task2_evcl_best.pt')
DEVICE = model.device

# Generate predictions
predictions = []
references = []

output_file_path = "predictions_QA.json"

if os.path.exists(output_file_path):
    with open(output_file_path, "r") as f:
        saved_data = json.load(f)
else:
    saved_data = {"predictions": [], "references": []}

print("Generating predictions incrementally:")

print("Generating predictions:")

for i in tqdm(range(0, len(test_inputs), batch_size)):  # Loop in batches
    batch_inputs = test_inputs[i:i + batch_size]
    batch_references = test_outputs[i:i + batch_size]

    # Tokenize the inputs in a batch
    inputs_tokenized = tokenizer(batch_inputs, padding=True, truncation=True, return_tensors="pt").to(DEVICE)

    with torch.no_grad():
        with torch.cuda.amp.autocast():
            # Apply Pyro parameters to LoRA layers
            for name, module in model.named_modules():
                if hasattr(module, "lora_A"):
                    for key in module.lora_A:
                        loc = pyro.param(f"{name}.lora_A.{key}_loc")
                        scale = pyro.param(f"{name}.lora_A.{key}_scale")
                        sampled_weight = pyro.sample(
                            f"{name}.lora_A.{key}",
                            dist.Normal(loc, scale).to_event(loc.dim())
                        )
            
                        module.lora_A[key].weight.data.copy_(sampled_weight)

                if hasattr(module, "lora_B"):
                    for key in module.lora_B:
                        loc = pyro.param(f"{name}.lora_B.{key}_loc")
                        scale = pyro.param(f"{name}.lora_B.{key}_scale")
                        sampled_weight = pyro.sample(
                            f"{name}.lora_B.{key}",
                            dist.Normal(loc, scale).to_event(loc.dim())
                        )
                        
                        module.lora_B[key].weight.data.copy_(sampled_weight)

            # Generate predictions using the tokenized inputs
            generated_ids = model.generate(
                input_ids=inputs_tokenized["input_ids"],
                attention_mask=inputs_tokenized["attention_mask"],
                max_new_tokens=15,  
                min_length=10,  
                no_repeat_ngram_size=2,  
                num_return_sequences=1,
                top_p=0.9,  
                temperature=0.7  
            )

        # Decode generated IDs
        batch_predictions = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
        predictions.extend(batch_predictions)
        references.extend(batch_references)

    saved_data["predictions"].extend(batch_predictions)
    saved_data["references"].extend(batch_references)

    # Save incrementally to JSON
    with open(output_file_path, "w") as json_file:
        json.dump(saved_data, json_file, indent=4)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

  state = torch.load(input_file, map_location)


Generating predictions incrementally:
Generating predictions:


  0%|          | 0/32 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
  with torch.cuda.amp.autocast():
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
  3%|▎         | 1/32 [00:03<01:48,  3.49s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
  6%|▋         | 2/32 [00:07<01:45,  3.53s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_sid

In [35]:
import pandas as pd
from evaluate import load

# Load the CSV file
csv_file_path = 'extracted_answers.csv'
df = pd.read_csv(csv_file_path)

# Ensure necessary columns exist
assert "Answers" in df.columns, "Sentiment column is missing in the CSV."
assert "References" in df.columns, "Reference column is missing in the CSV."

# Load ROUGE metric from evaluate package
rouge = load("rouge")

# Calculate ROUGE scores
results = rouge.compute(predictions=df["Answers"].tolist(), references=df["References"].tolist())

print("ROUGE Scores:", results)


ROUGE Scores: {'rouge1': 0.15205543057270776, 'rouge2': 0.027119491820603664, 'rougeL': 0.1360292092279667, 'rougeLsum': 0.1366083126514292}


### Lora Only QA-SA (QA)

In [4]:
import json
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
from datasets import Dataset
from torch.utils.data import DataLoader
import torch
from tqdm import tqdm
import os
from huggingface_hub import login

# Load the JSON file

login("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")
# Extract input-output pairs from JSON
file_path = "task024_cosmosqa_answer_generation.json"
with open(file_path, "r") as f:
    data = json.load(f)

instruct1=(
    "Craft one correct answer to the question given in input."
    "To make it more interesting, try to use non-stereotypical language if possible. "
    "Make sure your correct answer is reasonably long, consistent with the context, and requires common sense (instead of explicit extraction from the context) "
    "Use a response that is uncommon/non-stereotypical, so that it is less predictable"
    "To be less repetitive, please vary your language for each question."
    "\nReview: "
)
instruct2="\nGenerate: "

# Extract input-output pairs from JSON
instances = data["Instances"][4500:5000]
test_inputs = [instruct1+instance["input"]+instruct2 for instance in instances]
test_outputs = [instance["output"][0] for instance in instances]

# Split the data into train and test sets

# Convert data to Hugging Face Dataset format
test_ds = Dataset.from_dict({"input": test_inputs, "output": test_outputs})

# Tokenizer setup
base_model_path = "meta-llama/Meta-Llama-3-8B"  # Replace with actual model path
tokenizer = AutoTokenizer.from_pretrained(base_model_path)

# Check if tokenizer has a padding token, if not, set the eos_token as padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Tokenization function
def tokenize_function(examples):
    model_inputs = tokenizer(
        examples["input"],
        truncation=True,
        padding="max_length",
        max_length=512,
    )
    labels = tokenizer(
        examples["output"],
        truncation=True,
        padding="max_length",
        max_length=512,
    )["input_ids"]
    model_inputs["labels"] = labels
    return model_inputs

# Create DataLoaders
batch_size = 16  # Adjust as needed

# Define the model and load weights
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_enable_fp32_cpu_offload=True,
)
fine_tuned_weights_path = "finetuned_weights_lora"

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    quantization_config=bnb_config,
    device_map="auto"
)
model = PeftModel.from_pretrained(base_model, fine_tuned_weights_path)
DEVICE = model.device

# Generate predictions
predictions = []
references = []

output_file_path = "predictions_Lora_QA.json"

if os.path.exists(output_file_path):
    with open(output_file_path, "r") as f:
        saved_data = json.load(f)
else:
    saved_data = {"predictions": [], "references": []}

print("Generating predictions incrementally:")

print("Generating predictions:")

for i in tqdm(range(0, len(test_inputs), batch_size)):  # Loop in batches
    batch_inputs = test_inputs[i:i + batch_size]
    batch_references = test_outputs[i:i + batch_size]

    # Tokenize the inputs in a batch
    inputs_tokenized = tokenizer(batch_inputs, padding=True, truncation=True, return_tensors="pt").to(DEVICE)

    with torch.no_grad():  # Standard torch.no_grad() without pyro
        # Generate predictions using the tokenized inputs
        generated_ids = model.generate(
            input_ids=inputs_tokenized["input_ids"],
            attention_mask=inputs_tokenized["attention_mask"],
            max_new_tokens=15,  
            min_length=10,  
            no_repeat_ngram_size=2,  
            num_return_sequences=1,
            top_p=0.9,  
            temperature=0.7  
        )

        # Decode generated IDs
        batch_predictions = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
        predictions.extend(batch_predictions)
        references.extend(batch_references)

    saved_data["predictions"].extend(batch_predictions)
    saved_data["references"].extend(batch_references)

    # Save incrementally to JSON
    with open(output_file_path, "w") as json_file:
        json.dump(saved_data, json_file, indent=4)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]



Generating predictions incrementally:
Generating predictions:


  0%|          | 0/32 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
  3%|▎         | 1/32 [00:05<02:54,  5.64s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
  6%|▋         | 2/32 [00:08<02:06,  4.22s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the to

In [10]:
import pandas as pd
from evaluate import load

# Load the CSV file
csv_file_path = 'Extracted_Answers_with_References.csv'
df = pd.read_csv(csv_file_path)

# Ensure necessary columns exist
assert "Generate_Answer" in df.columns, "Sentiment column is missing in the CSV."
assert "Reference_Answer" in df.columns, "Reference column is missing in the CSV."

# Load ROUGE metric from evaluate package
rouge = load("rouge")

# Calculate ROUGE scores
results = rouge.compute(predictions=df["Generate_Answer"].tolist(), references=df["Reference_Answer"].tolist())

print("ROUGE Scores:", results)


ROUGE Scores: {'rouge1': 0.10703696677478106, 'rouge2': 0.019555833881274846, 'rougeL': 0.09630928934111785, 'rougeLsum': 0.0965816343461406}
