In [2]:
!pip install --upgrade pip 
import datetime
!pip install pandas tables
import pandas as pd
!pip install transformers torch torchvision torchaudio peft
!pip -qqq install bitsandbytes accelerate
import torch

print(f"{torch.cuda.is_available()=}\t{torch.cuda.device_count()=}\t{torch.version=}")



In [None]:
df = pd.read_pickle('processed_data.pkl')
training_df = df[df["train"]]
testing_df = df[df["test"]]

In [None]:
from pathlib import Path

PROMT_DIR_PATH = Path("mistral-prediction") / "prompt-variations"

PROMT_PATHS = sorted(PROMT_DIR_PATH.glob("v*.txt"), key=lambda f: int(f.name.strip("v.txt")))
SYSTEM_PROMPT = [f.read_text(encoding="utf-8") for f in PROMT_PATHS]

In [None]:
from transformers import AutoTokenizer, BitsAndBytesConfig
from peft import AutoPeftModelForCausalLM

nf4_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

model_path = "mibressler/tumproject"
token = "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"

tokenizer = AutoTokenizer.from_pretrained(model_path, token=token)
model = AutoPeftModelForCausalLM.from_pretrained(
    model_path,
    quantization_config=nf4_config,
    pad_token_id=tokenizer.eos_token_id,
    device_map="cuda",
    torch_dtype='auto',
    token=token,
).eval()

def generate_response(system_promt: str, text: str) -> str:
    messages = [
        {"role": "user", "content": system_promt + "\nText to evaluate: \"" + text + "\""},
    ]

    input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True,
                                              return_tensors='pt')
    output_ids = model.generate(input_ids=input_ids.to('cuda'), max_new_tokens=1024)
    return tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)


def run_model(prompt_id: int, model: str):
    results = []
    start_time = datetime.datetime.now()
    for i, (row_index, row) in enumerate(testing_df.iterrows()):
        total = testing_df["text"].count()
        counter = i + 1
        elapsed = datetime.datetime.now() - start_time
        percentage = counter / total
        s_per_gen = elapsed / counter
        print(f'[{elapsed}<{s_per_gen * (total - counter)}, {s_per_gen}s/generations] '
              f'{model} - promt {prompt_id}: {counter}/{total} | {percentage * 100:.2f}%')
        answer = generate_response(SYSTEM_PROMPT[prompt_id], row["text"])
        results.append({
            'prompt_id': prompt_id,
            'model': model,
            'sample_size': total,
            "text": row["text"],
            "answer": answer,
            "labeled_hateful": row["hate"]
        })
    return pd.DataFrame(results)

In [None]:
all_runs = pd.read_pickle("mistral-prediction/all_runs.pkl")
for i in range(len(SYSTEM_PROMPT)):
    run = run_model(i, "fine-tuned-mistral-7b-v0.2-instruct")
    all_runs = pd.concat([all_runs, run])
    all_runs.to_pickle("mistral-prediction/all_runs.pkl")

In [None]:
import torch
torch.cuda.empty_cache()