# UK Cyber Fraud Assistant - Fine-Tuning with Mistral-7B (Updated)

This notebook fine-tunes Mistral-7B-Instruct-v0.3 on UK cyber fraud guidance data using Unsloth for optimized training on Google Colab Pro A100.

**Updates in v2:**
- Increased dataset size from 111 to 278 QA pairs
- Added early stopping to prevent overfitting

## Setup and Installation

In [None]:
# Install Unsloth and dependencies
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install transformers
!pip install unsloth trl peft accelerate bitsandbytes

In [None]:
# Verify GPU setup
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")
print(f"CUDA version: {torch.version.cuda}")
print(f"Available VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

## Load and Prepare Dataset

In [None]:
from google.colab import drive
import json
from datasets import Dataset
import pandas as pd

# Mount Google Drive
drive.mount('/content/drive')

dataset_path = '/content/drive/MyDrive/Dissertation/cyber-fraud-chatbot/model_training/master_fraud_qa_dataset.json'

# Load the fraud Q&A dataset
with open(dataset_path, 'r') as f:
    fraud_data = json.load(f)

print(f"Total samples: {len(fraud_data)}")
print(f"Sample keys: {list(fraud_data[0].keys())}")

# Preview a sample
sample = fraud_data[0]
print(f"\nSample instruction: {sample['instruction']}")
print(f"\nSample output (first 200 chars): {sample['output'][:200]}...")

In [None]:
# Format data for instruction tuning with Mistral chat template
def format_fraud_prompt(sample):
    system_message = "You are a helpful UK cyber fraud assistant providing empathetic support to fraud victims. Provide accurate, UK-specific guidance with proper contact numbers and procedures."

    # Mistral chat format
    formatted_text = f"<s>[INST] {system_message}\n\n{sample['instruction']} [/INST] {sample['output']}</s>"

    return formatted_text

# Apply formatting
formatted_data = [format_fraud_prompt(item) for item in fraud_data]

# Create train/validation split (80/20)
split_idx = int(len(formatted_data) * 0.8)
train_data = formatted_data[:split_idx]
val_data = formatted_data[split_idx:]

print(f"Training samples: {len(train_data)}")
print(f"Validation samples: {len(val_data)}")

# Create datasets
train_dataset = Dataset.from_dict({"text": train_data})
val_dataset = Dataset.from_dict({"text": val_data})

# Preview formatted sample
print(f"\nFormatted sample (first 300 chars):\n{formatted_data[0][:300]}...")

## Load Model and Configure LoRA

In [None]:
from unsloth import FastLanguageModel
import torch

# Full precision Mistral model
model_name = "mistralai/Mistral-7B-Instruct-v0.3"  # Original unquantized model
max_seq_length = 2048
dtype = torch.bfloat16  # Full precision

# Load model without quantization
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=False,  # No quantization
    device_map={"": 0},
)

print("Model loaded in full precision for LoRA training")
print(f"Model device: {next(model.parameters()).device}")

In [None]:
def print_gpu_memory():
    if torch.cuda.is_available():
        print(f"GPU Memory: {torch.cuda.memory_allocated()/1024**3:.2f}GB / {torch.cuda.max_memory_allocated()/1024**3:.2f}GB")

print_gpu_memory()

In [None]:
# Configure LoRA for optimal fraud assistant training
model = FastLanguageModel.get_peft_model(
    model,
    r=32,  # Reduced rank for more stable training
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj"
    ],
    lora_alpha=64,  # 2x rank for stable training
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",  # Unsloth optimized checkpointing
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

print("LoRA configuration applied")
model.print_trainable_parameters()

In [None]:
def print_gpu_memory():
    if torch.cuda.is_available():
        print(f"GPU Memory: {torch.cuda.memory_allocated()/1024**3:.2f}GB / {torch.cuda.max_memory_allocated()/1024**3:.2f}GB")

print_gpu_memory()

## Configure Training Parameters

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments, EarlyStoppingCallback

# Training arguments
training_args = TrainingArguments(
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,
    warmup_steps=20,  # Increased for more gradual warmup
    num_train_epochs=5,
    learning_rate=5e-5,  # Reduced learning rate for stability
    bf16=torch.cuda.is_bf16_supported(),
    fp16=not torch.cuda.is_bf16_supported(),
    logging_steps=5,
    optim="adamw_torch",                # Full precision optimizer
    weight_decay=0.05,  # Increased weight decay for regularization
    lr_scheduler_type="cosine",
    seed=3407,
    output_dir="/content/drive/MyDrive/Dissertation/cyber-fraud-chatbot/trained_models",
    save_strategy="epoch",
    save_total_limit=2,
    eval_strategy="epoch",
    evaluation_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    dataloader_pin_memory=True,
    remove_unused_columns=False,
    report_to="none",
)

print("Training arguments configured")
print(f"Effective batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}")
print(f"Total training steps: {len(train_dataset) // (training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps) * training_args.num_train_epochs}")

In [None]:
# Move all model parameters to GPU before creating trainer
model = model.to("cuda")

# Verify all parameters are on GPU
print("Checking model device placement...")
for name, param in model.named_parameters():
    if param.device.type == 'meta':
        print(f"Warning: {name} still on meta device")
    elif param.device.type != 'cuda':
        print(f"Moving {name} from {param.device} to cuda")
        param.data = param.data.to("cuda")

print("All parameters moved to GPU")

# Initialize trainer with early stopping
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    args=training_args,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]  # Increased patience
)

print("Trainer initialized successfully with early stopping!")

## Start Training

In [None]:
# Start training
print("Starting training...")
trainer_stats = trainer.train()

print("Training completed!")
print(f"Final training loss: {trainer_stats.training_loss:.4f}")
print(f"Training time: {trainer_stats.metrics['train_runtime']:.1f} seconds")

## Test the Fine-Tuned Model

In [None]:
# Enable fast inference
FastLanguageModel.for_inference(model)

# Test scenarios for fraud assistant
test_scenarios = [
    "I received a text saying my bank account is frozen and I need to pay £50 to unlock it. Is this legitimate?",
    "Someone called claiming to be from HMRC saying I owe tax money. What should I do?",
    "I paid for a loan arrangement fee but haven't received the loan. How can I get help?",
    "How do I report a romance scam to the authorities?",
    "Is there a way to check if an investment opportunity is legitimate?"
]

def test_fraud_assistant(question):
    system_message = "You are a helpful UK cyber fraud assistant providing empathetic support to fraud victims. Provide accurate, UK-specific guidance with proper contact numbers and procedures."

    # Format input using Mistral chat template
    messages = [
        {"role": "user", "content": f"{system_message}\n\n{question}"}
    ]

    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("cuda")

    attention_mask = torch.ones_like(inputs)

    # Generate response
    outputs = model.generate(
        input_ids=inputs,
        attention_mask=attention_mask,
        max_new_tokens=512,
        use_cache=True,
        temperature=0.1,
        do_sample=True,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id
    )

    # Decode response
    response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

    # Extract just the assistant's response
    if "[/INST]" in response:
        response = response.split("[/INST]")[-1].strip()

    if question in response:
        response = response.split(question, 1)[-1].strip()

    # Remove any remaining system instruction fragments
    response = response.replace("Provide accurate, UK-specific guidance with proper contact numbers and procedures.", "").strip()

    # Clean up any leading punctuation or artifacts
    while response.startswith((".", "?", "!", ":")):
        response = response[1:].strip()

    return response

print("Testing fine-tuned fraud assistant:\n")
print("=" * 80)

In [None]:
# Test each scenario
for i, scenario in enumerate(test_scenarios, 1):
    print(f"\nTest {i}: {scenario}\n")
    response = test_fraud_assistant(scenario)
    print(f"Assistant: {response}\n")
    print("-" * 80)

## Save Model for Local Deployment

In [None]:
# Save the trained adapter
save_path = "/content/drive/MyDrive/Dissertation/cyber-fraud-chatbot/trained_models/uk-fraud-assistant-adapter"

model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

print(f"Model adapter saved successfully to: {save_path}")

# Check if files were actually saved
import os
if os.path.exists(save_path):
    files = os.listdir(save_path)
    print(f"Files saved: {files}")
else:
    print("ERROR: Save path doesn't exist!")

In [None]:
# Export to GGUF format for local deployment
gguf_save_path = "/content/drive/MyDrive/Dissertation/cyber-fraud-chatbot/trained_models/uk-fraud-assistant-gguf"

model.save_pretrained_gguf(
    gguf_save_path,
    tokenizer,
    quantization_method="q4_k_m"  # Quantize only for deployment
)

print(f"Model exported to GGUF format for local deployment at: {gguf_save_path}")

## Create Ollama Modelfile

In [None]:
# Create Ollama Modelfile for easy deployment
gguf_save_path = "/content/drive/MyDrive/Dissertation/cyber-fraud-chatbot/trained_models/uk-fraud-assistant-gguf"

modelfile_content = '''FROM ./model-unsloth.Q4_K_M.gguf

TEMPLATE """<s>[INST] You are a helpful UK cyber fraud assistant providing empathetic
support to fraud victims. Provide accurate, UK-specific guidance with proper contact
numbers and procedures.

{{ .Prompt }} [/INST] """

PARAMETER temperature 0.1
PARAMETER top_p 0.9
PARAMETER stop "</s>"
PARAMETER stop "[INST]"
PARAMETER stop "[/INST]"

SYSTEM """You are a specialized UK cyber fraud assistant. Your role is to:
- Provide empathetic support to fraud victims
- Offer accurate UK-specific guidance and procedures
- Include proper UK contact numbers (Action Fraud: 0300 123 2040)
- Maintain a supportive, non-judgmental tone
- Help victims understand their next steps
"""
'''

with open(f'{gguf_save_path}/Modelfile', 'w') as f:
    f.write(modelfile_content)

print(f"Ollama Modelfile created at: {gguf_save_path}/Modelfile")
print("\nTo deploy locally with Ollama:")
print("1. Download the uk-fraud-assistant-gguf folder from Google Drive")
print("2. cd uk-fraud-assistant-gguf")
print("3. ollama create uk-fraud-assistant -f Modelfile")
print("4. ollama run uk-fraud-assistant")

# Check if model files exist and find their actual location
import os

def check_model_files():
    # Expected paths
    gguf_path = "/content/drive/MyDrive/Dissertation/cyber-fraud-chatbot/trained_models/uk-fraud-assistant-gguf"
    adapter_path = "/content/drive/MyDrive/Dissertation/cyber-fraud-chatbot/trained_models/uk-fraud-assistant-adapter"
    
    print("Checking expected paths...")
    print(f"GGUF path exists: {os.path.exists(gguf_path)}")
    print(f"Adapter path exists: {os.path.exists(adapter_path)}")
    
    if os.path.exists(gguf_path):
        print(f"GGUF files: {os.listdir(gguf_path)}")
    
    if os.path.exists(adapter_path):
        print(f"Adapter files: {os.listdir(adapter_path)}")
    
    # If not found, search broader area
    if not os.path.exists(gguf_path) or not os.path.exists(adapter_path):
        print("\nSearching for model files in Drive...")
        base_path = "/content/drive/MyDrive"
        if os.path.exists(base_path):
            for root, dirs, files in os.walk(base_path):
                for file in files:
                    if file.endswith(('.gguf', '.safetensors', '.bin')) and 'fraud' in root.lower():
                        print(f"Found model file: {os.path.join(root, file)}")

check_model_files()

In [None]:
# Create deployment zip with correct paths
import zipfile
import os

def create_deployment_zip():
    gguf_path = "/content/drive/MyDrive/Dissertation/cyber-fraud-chatbot/trained_models/uk-fraud-assistant-gguf"
    adapter_path = "/content/drive/MyDrive/Dissertation/cyber-fraud-chatbot/trained_models/uk-fraud-assistant-adapter"
    
    files_added = 0
    with zipfile.ZipFile('uk-fraud-assistant-deployment.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
        # Add GGUF files
        if os.path.exists(gguf_path):
            for file in os.listdir(gguf_path):
                file_path = os.path.join(gguf_path, file)
                if os.path.isfile(file_path):
                    zipf.write(file_path, f"uk-fraud-assistant-gguf/{file}")
                    print(f"Added: uk-fraud-assistant-gguf/{file}")
                    files_added += 1
        
        # Add adapter files  
        if os.path.exists(adapter_path):
            for file in os.listdir(adapter_path):
                file_path = os.path.join(adapter_path, file)
                if os.path.isfile(file_path):
                    zipf.write(file_path, f"uk-fraud-assistant-adapter/{file}")
                    print(f"Added: uk-fraud-assistant-adapter/{file}")
                    files_added += 1
    
    if files_added > 0:
        print(f"Zip created with {files_added} files")
        size = os.path.getsize('uk-fraud-assistant-deployment.zip') / 1024 / 1024
        print(f"Size: {size:.1f} MB")
        return True
    else:
        print("No files found!")
        return False

# Create and download
if create_deployment_zip():
    from google.colab import files
    files.download('uk-fraud-assistant-deployment.zip')
else:
    print("Please check if the model files exist in the specified paths")

## Training Summary and Next Steps

### Key Improvements in v2:
- **Dataset Size**: Increased from 111 to 278 QA pairs (2.5x increase)
- **Early Stopping**: Added with patience=2 to prevent overfitting
- **Conservative Parameters**: Kept original learning rate and epochs to prevent overfitting