In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

logging.set_verbosity_info()

dataset = load_dataset("csv", data_files="combined.csv")

model_name = "meta-llama/Llama-2-7b-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    fp16=True,
    save_total_limit=3,
    logging_steps=100,
    push_to_hub=False,
)

def preprocess_function(examples):
    inputs = [f"Device: {d}, Status: {s}, Timestamp: {t}\nAnomaly:" for d, s, t in zip(examples['Device_id'], examples['Status'], examples['timestamp'])]
    targets = [str(a) for a in examples['anomaly']]
    model_inputs = tokenizer(inputs, max_length=128, truncation=True, padding="max_length")
    labels = tokenizer(targets, max_length=2, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_dataset = dataset.map(preprocess_function, batched=True, remove_columns=dataset["train"].column_names)

trainer = SFTTrainer(
    model=model,
    train_dataset=tokenized_dataset["train"],
    peft_config=peft_config,
    dataset_text_field="text",
    args=training_args,
)

trainer.train()

trainer.model.save_pretrained("./fine_tuned_model")

fine_tuned_model = PeftModel.from_pretrained(model, "./fine_tuned_model")
fine_tuned_model = fine_tuned_model.merge_and_unload()

pipe = pipeline(
    "text-generation",
    model=fine_tuned_model,
    tokenizer=tokenizer,
    max_new_tokens=1,
)

input_text = "Device: f66f1a4a-e7db-41d4-aec8-f7392ce5ab11, Status: armAway, Timestamp: 2023-01-11 00:00:00\nAnomaly:"
result = pipe(input_text)
print(f"Input: {input_text}")
print(f"Predicted Anomaly: {result[0]['generated_text'].split('Anomaly:')[-1].strip()}")