In [1]:
# Zelle 1 – Google Drive mounten (nur in Colab nötig)
# Damit später das finegetunte Modell und die Ergebnisse direkt in deinem Drive liegen.
from google.colab import drive
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


In [2]:
# Installiere bitsandbytes für 4-Bit / 8-Bit Support
!pip install -q bitsandbytes


In [3]:
# Zelle 2 – Bibliotheken importieren
# os und json für Datei-Operationen
# pandas zum Erstellen und Speichern von DataFrames
# transformers & peft zum Nachladen deines Modells/Adapters und Pipelines
import os
import json
import pandas as pd

from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft         import PeftModel


In [4]:
# Zelle 3 – Pfade definieren
# ADAPTER_PATH: Ordner mit deinem finegetunten Adapter + Tokenizer
# PROMPTS_CSV: (optional) CSV-Datei mit den 100 zu testenden Input-Texten
# OUT_DIR: Zielordner, in dem Excel- und JSON-Dateien abgelegt werden
ADAPTER_PATH = "/content/drive/MyDrive/TrainingMistral/adapter-11.07" # Hier anpassen !!!
EXCEL_PATH  = "/content/drive/MyDrive/TrainingMistral/Testing0707.xlsx"
OUT_DIR      = "/content/drive/MyDrive/TrainingMistral"

# Falls OUT_DIR noch nicht existiert, erstelle ihn
os.makedirs(OUT_DIR, exist_ok=True)


In [5]:
# Zelle 4 – Prompt-Instruction definieren

INSTRUCTION = (
    "### Instruction:\n"
    "From the input text, extract the following fields ONLY.\n"
    "Format your response as newline-separated entries:\n\n"
    "category: value\n\n"
    "Do NOT include any additional text, headings, or explanations.\n\n"
    "Fields to extract:\n"
    "- risk_communication\n"
    "- unrelated_risks\n"
    "- absolute_risk_base\n"
    "- absolute_risk_new\n"
    "- absolute_number_base\n"
    "- absolute_number_new\n"
    "- absolute_risk_difference\n"
    "- relative_risk\n"
    "- absolute_number_difference\n"
    "- verbal_descriptor_base\n"
    "- verbal_descriptor_new\n"
    "- verbal_descriptor_change\n"
    "- reference_class_size_base\n"
    "- reference_class_size_new\n"
    "- reference_class_description_base\n"
    "- reference_class_description_new\n"
    "- source_base\n"
    "- source_new\n"
)

In [None]:
# Hugging Face Connection
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [8]:
# Zelle 5 – Basis-Modell & QLoRA-Adapter laden (8-Bit-Version)

# Neueste bitsandbytes-Version installieren
!pip install -U bitsandbytes
!pip install -U transformers
!pip install -U accelerate
!pip install -U peft

from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import torch

bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_threshold=6.0,
    llm_int8_has_fp16_weight=False  # <<< Diese Zeile ist wichtig!
)

base = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-v0.1",
    quantization_config=bnb_config,
    device_map="auto"
)
# Schalte Training-Features ab und aktiviere den Cache für schnelle Inference
base.eval()
base.gradient_checkpointing_disable()
base.config.use_cache = True

# 2) Lade den LoRA-Adapter (deine feingetunten Gewichte) aus deinem Drive-Ordner
model = PeftModel.from_pretrained(base, ADAPTER_PATH)
model.eval()  # Setze das Modell in den Inference-Modus






config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): lora.Linear8bitLt(
                (base_layer): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_

In [9]:
# Zelle 6 – Tokenizer laden -> Modell und Tokenizer gehören zusammen (Tokenizer wurden vielleicht auch geändert)

# Der Tokenizer liegt im gleichen Adapter-Ordner, damit die Token-IDs übereinstimmen
tokenizer = AutoTokenizer.from_pretrained(
    ADAPTER_PATH,
    trust_remote_code=True
)
# Falls kein pad_token definiert ist, setze ihn auf den eos_token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token


In [10]:
# Zelle 7 – Text-Generation-Pipeline einrichten
# Greedy-Decoding (do_sample=False) für deterministische Ergebnisse
# max_new_tokens limitiert die Länge der Ausgabe
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
    torch_dtype="auto",
    max_new_tokens=256,
    do_sample=False, # deterministisch -> bei gleichem Prompt immer dieselbe Ausgabe
    use_cache=True,
)


Device set to use cuda:0
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


In [11]:
# Zelle 7 – Eingabe-Daten laden und vorbereiten
# Lies die Excel mit nur einer Spalte ("input")
df_inputs = pd.read_excel(EXCEL_PATH, usecols=["input"])
print("Eingabe-Spalte sample:", df_inputs["input"].head())


Eingabe-Spalte sample: 0    If you use the Scalable app, your chances of b...
1    Exchange rate risk refers to the risk that a c...
2    Infamous economists continue to see a 1-in-3 c...
3    The average risk of default for US public comp...
4    The probability of a severe credit crunch in t...
Name: input, dtype: object


In [12]:
# Zelle 8 – Pipe-Line über Zeilen x bei .head(x)

# Ermittle dynamisch, wie viele Inputs du wirklich hast
num_inputs = len(df_inputs)

results = []
for idx, text in enumerate(df_inputs["input"].astype(str), start=1):
    prompt = INSTRUCTION + f"### Input:\n{text.strip()}\n\n### Output:\n"
    generated = pipe(prompt)[0]["generated_text"]
    results.append({
        "index":     idx,
        "input":     text,
        "extracted": generated.strip()
    })


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

In [13]:
# Zelle 9 – Ergebnisse in DataFrame bringen und Vorschau
df_out = pd.DataFrame(results)
df_out.head(5)

Unnamed: 0,index,input,extracted
0,1,"If you use the Scalable app, your chances of b...","### Instruction:\nFrom the input text, extract..."
1,2,Exchange rate risk refers to the risk that a c...,"### Instruction:\nFrom the input text, extract..."
2,3,Infamous economists continue to see a 1-in-3 c...,"### Instruction:\nFrom the input text, extract..."
3,4,The average risk of default for US public comp...,"### Instruction:\nFrom the input text, extract..."
4,5,The probability of a severe credit crunch in t...,"### Instruction:\nFrom the input text, extract..."


In [14]:
# Zelle 9.5 – Auf die reinen Spalten reduzieren
# Wir filtern nur 'input' und 'extracted'
df_save = df_out[["input", "extracted"]].copy()

# Optional: Spalten umbenennen für Klarheit
df_save.columns = ["InputText", "ModelOutput"]

# Zeige die ersten 5 Zeilen
df_save.head()


Unnamed: 0,InputText,ModelOutput
0,"If you use the Scalable app, your chances of b...","### Instruction:\nFrom the input text, extract..."
1,Exchange rate risk refers to the risk that a c...,"### Instruction:\nFrom the input text, extract..."
2,Infamous economists continue to see a 1-in-3 c...,"### Instruction:\nFrom the input text, extract..."
3,The average risk of default for US public comp...,"### Instruction:\nFrom the input text, extract..."
4,The probability of a severe credit crunch in t...,"### Instruction:\nFrom the input text, extract..."


In [15]:
# Zelle 10 – Ergebnisse als Excel speichern
excel_path = os.path.join(OUT_DIR, "inference_testing_results_1107.xlsx")
#df_out.to_excel(excel_path, index=False)
df_save.to_excel(excel_path, index=False)

print("✅ Inferenz abgeschlossen. Excel-Datei abgelegt:")
print("  •", excel_path)

✅ Inferenz abgeschlossen. Excel-Datei abgelegt:
  • /content/drive/MyDrive/TrainingMistral/inference_testing_results_1107.xlsx
