In [8]:
!pip install --upgrade pip 
import datetime
!pip install pandas tables
import pandas as pd
!pip install transformers torch torchvision torchaudio peft
!pip -qqq install bitsandbytes accelerate
import torch

print(f"{torch.cuda.is_available()=}\t{torch.cuda.device_count()=}\t{torch.version=}")

torch.cuda.is_available()=True	torch.cuda.device_count()=1	torch.version=<module 'torch.version' from '/home/frank/dev/uni/tumprojectweek/venv/lib/python3.11/site-packages/torch/version.py'>


In [9]:
df = pd.read_pickle('../processed_data.pkl')
training_df = df[df["train"]]
testing_df = df[df["test"]]

In [27]:
from pathlib import Path

PROMT_DIR_PATH = Path("../mistral-prediction") / "prompt-variations"

PROMT_PATHS = sorted(PROMT_DIR_PATH.glob("v*.txt"), key=lambda f: int(f.name.strip("v.txt")))
SYSTEM_PROMPT = [f.read_text(encoding="utf-8") for f in PROMT_PATHS]

In [11]:
from transformers import AutoTokenizer, BitsAndBytesConfig
from peft import AutoPeftModelForCausalLM

nf4_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

model_path = "mibressler/tumproject"
token = "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"

tokenizer = AutoTokenizer.from_pretrained(model_path, token=token)
model = AutoPeftModelForCausalLM.from_pretrained(
    model_path,
    quantization_config=nf4_config,
    pad_token_id=tokenizer.eos_token_id,
    device_map="cuda",
    torch_dtype='auto',
    token=token,
).eval()

def generate_response(system_promt: str, text: str) -> str:
    messages = [
        {"role": "user", "content": system_promt + "\nText to evaluate: \"" + text + "\""},
    ]

    input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True,
                                              return_tensors='pt')
    output_ids = model.generate(input_ids=input_ids.to('cuda'), max_new_tokens=1024)
    return tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)


def run_model(prompt_id: int, model: str):
    results = []
    start_time = datetime.datetime.now()
    for i, (row_index, row) in enumerate(testing_df.iterrows()):
        total = testing_df["text"].count()
        counter = i + 1
        elapsed = datetime.datetime.now() - start_time
        percentage = counter / total
        s_per_gen = elapsed / counter
        print(f'[{elapsed}<{s_per_gen * (total - counter)}, {s_per_gen}s/generations] '
              f'{model} - promt {prompt_id}: {counter}/{total} | {percentage * 100:.2f}%')
        answer = generate_response(SYSTEM_PROMPT[prompt_id], row["text"])
        results.append({
            'prompt_id': prompt_id,
            'model': model,
            'sample_size': total,
            "text": row["text"],
            "answer": answer,
            "labeled_hateful": row["hate"]
        })
    return pd.DataFrame(results)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [12]:
"""
all_runs = pd.DataFrame()
for i in range(len(SYSTEM_PROMPT)):
    run = run_model(i, "fine-tuned-mistral-7b-v0.2-instruct
    all_runs = pd.concat([all_runs, run])
    pd.DataFrame(all_runs).to_pickle("fine-tuned.pkl")
"""

'\nall_runs = pd.DataFrame()\nfor i in range(len(SYSTEM_PROMPT)):\n    run = run_model(i, "fine-tuned-mistral-7b-v0.2-instruct\n    all_runs = pd.concat([all_runs, run])\n    pd.DataFrame(all_runs).to_pickle("fine-tuned.pkl")\n'

In [33]:
import torch
torch.cuda.empty_cache()

## LIME

In [47]:
!pip install lime
import lime
from lime.lime_text import LimeTextExplainer
import re

def predict_fn(texts):
    probabilities = []
    for text in texts:
        print("TEXTT: "+text)
        answer = generate_response(SYSTEM_PROMPT[0], text)
        print("ANSWER: "+answer)
        match = re.search(r'"hate_speech_probability": (\d+\.\d+)', answer)
        if match:
            hate_speech_probability = float(match.group(1))
            print("HATE_P: "+str(hate_speech_probability))
            probabilities.append([1 - hate_speech_probability, hate_speech_probability])
            print("PROBABILITIES: :"+str(probabilities))
    return probabilities


explainer = LimeTextExplainer(class_names=["Not Hate Speech", "Hate Speech"]) 

instance = "A cat makes fun of an ape"

exp = explainer.explain_instance(instance, predict_fn, num_features=5)

exp.show_in_notebook()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


TEXTT: A cat makes fun of an ape
ANSWER: { 
  "relevant_facts": [],
  "contained_explicit_hate_speech_vocabular": [],
  "argument_for_hate_speech": "There is no hate speech in the given text.",
  "argument_against_hate_speech": "The text is a simple statement and does not contain any derogatory language or hate speech.",
  "likelihood_of_presence_of_implicit_hate_speech": 0.00,
  "likelihood_of_presence_of_explicit_hate_speech": 0.00,
  "likelihood_of_ad_hominem_attack": 0.00,
  "likelihood_of_minority_attack": 0.00,
  "likelyhood_of_takedown_on_social_media": 0.00,
  "hate_speech_probability": 0.00,
}
HATE_P: 0.0
PROBABILITIES: :[[1.0, 0.0]]
TEXTT:  cat makes    
ANSWER: { 
  "relevant_facts": [],
  "contained_explicit_hate_speech_vocabular": [],
  "argument_for_hate_speech": "",
  "argument_against_hate_speech": "",
  "likelihood_of_presence_of_implicit_hate_speech": 0.00,
  "likelihood_of_presence_of_explicit_hate_speech": 0.00,
  "likelihood_of_ad_hominem_attack": 0.00,
  "likeliho

KeyboardInterrupt: 

In [None]:
# inspect attention weights
# self attention layer
# print out the gradients