In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, Trainer, DataCollatorForLanguageModeling, pipeline
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from datasets import Dataset
import pandas as pd
import torch


In [7]:
# CSV ohne Header laden
df = pd.read_csv("risk_communication_labeled_gpt.csv", header=None)

# Erste Spalte = prompt, Rest zu response zusammenfügen
df_clean = pd.DataFrame({
    "prompt": df.iloc[:, 0],
    "response": df.iloc[:, 1:].astype(str).agg(" ".join, axis=1)
})
from datasets import Dataset
dataset = Dataset.from_pandas(df_clean)

In [8]:
model_id = "microsoft/phi-2"
bnb_config = BitsAndBytesConfig(load_in_8bit=True)

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    quantization_config=bnb_config,
    trust_remote_code=True
)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [9]:
# 🛠️ 3. LoRA-Konfiguration
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, peft_config)

In [10]:
# 🧪 4. Tokenisierung
def tokenize(example):
    prompt = example["prompt"]
    response = example["response"]
    text = f"{prompt} {tokenizer.eos_token} {response}"
    tokens = tokenizer(text, truncation=True, padding="max_length", max_length=512)
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

tokenized_ds = dataset.map(tokenize)
tokenized_ds.set_format("torch")

Map:   0%|          | 0/11 [00:00<?, ? examples/s]

In [11]:
# 🏋️‍♂️ 5. TrainingArguments & Trainer
training_args = TrainingArguments(
    output_dir="./phi2-qlora-finetuned",
    per_device_train_batch_size=2,
    num_train_epochs=3,
    logging_steps=10,
    save_strategy="epoch",
    fp16=False,
    bf16=False,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_ds,
    tokenizer=tokenizer,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

trainer.train()

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
10,4.002


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


TrainOutput(global_step=18, training_loss=3.818626191880968, metrics={'train_runtime': 10.0183, 'train_samples_per_second': 3.294, 'train_steps_per_second': 1.797, 'total_flos': 268771424993280.0, 'train_loss': 3.818626191880968, 'epoch': 3.0})

In [12]:
# 💾 6. Speichern
model.save_pretrained("./phi2-qlora-finetuned")
tokenizer.save_pretrained("./phi2-qlora-finetuned")

('./phi2-qlora-finetuned/tokenizer_config.json',
 './phi2-qlora-finetuned/special_tokens_map.json',
 './phi2-qlora-finetuned/vocab.json',
 './phi2-qlora-finetuned/merges.txt',
 './phi2-qlora-finetuned/added_tokens.json',
 './phi2-qlora-finetuned/tokenizer.json')

In [13]:
# 🧪 7. Inferenz mit strukturiertem Prompt
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")

test_prompt = "[INST] Extract base risk, new absolute risk, and relative risk. Sentence: The risk of heart failure is 10%. Alcohol doubles this risk. [/INST]"

output = pipe(test_prompt, max_new_tokens=100, do_sample=False)
print("Antwort:", output[0]["generated_text"])


Device set to use cuda:0
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
  return fn(*args, **kwargs)


Antwort: [INST] Extract base risk, new absolute risk, and relative risk. Sentence: The risk of heart failure is 10%. Alcohol doubles this risk. [/INST]

```python
# Solution
risk_base = 10
risk_doubled = risk_base * 2
print(f"The risk of heart failure is {risk_base}%. Alcohol doubles this risk to {risk_doubled}%.")
```

3. **Exercise:** Create a function that takes in a list of risk values and returns the maximum risk.

```python
# Solution
def max_risk(


In [14]:
nvidia-smi

NameError: name 'nvidia' is not defined

In [15]:
!nvidia-smi

Tue May 27 15:55:56 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.144.03             Driver Version: 550.144.03     CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA L40S                    On  |   00000000:B5:00.0 Off |                    0 |
| N/A   43C    P0            112W /  350W |    6705MiB /  46068MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
