<a href="https://colab.research.google.com/github/suleiman-odeh/NLP_Project_Team16/blob/main/Gemma_2/zero_shot_indirect_Gemma_2_9B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q -U transformers bitsandbytes accelerate

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.0/12.0 MB[0m [31m52.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
"""
This cell is for loading the model
I have run it already but cleared the output since it cant be upload to github
"""
import torch
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from huggingface_hub import login

# logging using user access token
login()

#  Define 4-Bit Configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,              # Loading in 4-bit
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16, # Compute in 16-bit for speed, store in 4-bit
)

# Load model
model_id = "google/gemma-2-9b-it"

print(f"Loading {model_id} in 4-bit...")
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

print("Model loaded successfully!")


In [None]:
import pandas as pd
import json

# Load data
try:
    df = pd.read_json('QEvasion_cleaned.jsonl', lines=True)
    print(f"Data loaded. Total rows: {len(df)}")
except FileNotFoundError:
    print("ERROR: Upload 'QEvasion_cleaned.jsonl' first.")

# (9 Evasion -> 3 Clarity)
EVASION_TO_CLARITY_ID = {
    'Explicit': 0,
    'Implicit': 1, 'Dodging': 1, 'Deflection': 1, 'General': 1, 'Partial/half-answer': 1,
    'Declining to answer': 2, 'Claims ignorance': 2, 'Clarification': 2
}

# paper prompt
def create_gemma_indirect_prompt(question, answer):
    # Source: Appendix H, Page 26 of the paper
    system_instruction = """Based on a segment of the interview in which the interviewer poses a series of questions, classify the type of response provided by the interviewee for the following question using the following taxonomy and then provide a chain of thought explanation for your decision:

<Taxonomy>
1. Explicit: The information requested is explicitly stated (in the requested form).
2. Implicit: The information requested is given, but without being explicitly stated (not in the expected form).
3. General: The information provided is too general/lacks the requested specificity.
4. Partial/half-answer: Offers only a specific component of the requested information.
5. Dodging: Ignoring the question altogether.
6. Deflection: Starts on topic but shifts the focus and makes a different point than what is asked.
7. Declining to answer: Acknowledges the question but directly or indirectly refusing to answer at the moment.
8. Claims ignorance: The answerer claims/admits not to know the answer themselves.
9. Clarification: Does not provide the requested information and asks for clarification.

You are required to respond with a single term corresponding to the Taxonomy code and only.

### Part of the interview ###
"""

    # gemma2 chat template
    prompt = f"""<start_of_turn>user
{system_instruction}
Question: "{question}"
Answer: "{answer}"

### Question ###
"{question}"

Taxonomy code:<end_of_turn>
<start_of_turn>model
"""
    return prompt

print("finish")

Data loaded. Total rows: 3756
finish


In [None]:
import torch
import gc
import re
from tqdm import tqdm

# take test data
test_df = df[df['split_type'] == 'test'].copy()
print(f"Processing Test Set: {len(test_df)} samples")

predictions_evasion = []
predictions_clarity = []
raw_outputs = []

# Inference Loop
print("Starting Zero-Shot Inference ...")
model.eval()

for index, row in tqdm(test_df.iterrows(), total=len(test_df)):
    # Create Prompt
    prompt_text = create_gemma_indirect_prompt(row['question'], row['cleaned_answer'])

    # Tokenize
    inputs = tokenizer(prompt_text, return_tensors="pt").to("cuda")

    # Generate
    # more tokens allowed because of COT
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id
        )

    # Decode
    generated_text = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
    clean_text = generated_text.strip()
    raw_outputs.append(clean_text)

    # since the output might be a large text, we search, which word appeared from the evasion list in the text
    detected_label = "Error"

    found_labels = []
    for valid_label in EVASION_TO_CLARITY_ID.keys():
        # Case-insensitive search
        if re.search(r'\b' + re.escape(valid_label) + r'\b', clean_text, re.IGNORECASE):
            found_labels.append(valid_label)

    if found_labels:
        best_match = min(found_labels, key=lambda l: clean_text.lower().find(l.lower()))
        detected_label = best_match
    else:
        detected_label = "Error"

    # Map to Clarity ID
    mapped_id = EVASION_TO_CLARITY_ID.get(detected_label, -1)

    predictions_evasion.append(detected_label)
    predictions_clarity.append(mapped_id)

# Save results in a csv file
test_df['raw_output'] = raw_outputs
test_df['pred_evasion'] = predictions_evasion
test_df['pred_clarity_id'] = predictions_clarity

output_filename = "gemma_indirect_zs_paper.csv"
test_df.to_csv(output_filename, index=False)
print(f"\nInference complete. Results saved to '{output_filename}'.")


Processing Test Set: 308 samples
Starting Zero-Shot Inference ...


  8%|▊         | 24/308 [04:44<56:02, 11.84s/it]


KeyboardInterrupt: 

In [None]:
# --- Step 4: Cleanup ---
print("Cleaning up GPU memory...")
del model
del tokenizer
torch.cuda.empty_cache()
gc.collect()
print("Model deleted.")