In [1]:
# Cell 1: Install packages
!pip install -q transformers datasets peft accelerate trl wandb gradio
print("✅ Packages installed!")

✅ Packages installed!


In [2]:
# Cell 2: Imports and GPU check
import os
import torch
import wandb
import math
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer, SFTConfig
import gradio as gr

print("✅ Gradio installed and imported!")
print("="*80)
print("SYSTEM CHECK")
print("="*80)
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    num_gpus = torch.cuda.device_count()
    print(f"Number of GPUs: {num_gpus}")
    for i in range(num_gpus):
        print(f"  GPU {i}: {torch.cuda.get_device_name(i)}")
print("="*80)

2025-10-19 11:31:37.139917: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1760873497.162622     566 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1760873497.169785     566 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


✅ Gradio installed and imported!
SYSTEM CHECK
PyTorch version: 2.6.0+cu124
CUDA available: True
Number of GPUs: 2
  GPU 0: Tesla T4
  GPU 1: Tesla T4


In [3]:
# Cell 3: Configuration
import os

MODEL_NAME = "Qwen/Qwen2.5-0.5B"
DATASET_NAME = "rzeraat/law"
OUTPUT_DIR = "./pactoria-v1-simple"
HUGGINGFACE_MODEL_NAME = "rzeraat/pactoria-v2"

# API Keys - Load from Kaggle Secrets for security
try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    WANDB_API_KEY = user_secrets.get_secret("WANDB_API_KEY")
    HUGGINGFACE_TOKEN = user_secrets.get_secret("HUGGINGFACE_TOKEN")
    print("✅ API keys loaded from Kaggle Secrets")
except Exception as e:
    # Fallback to environment variables (for local development)
    WANDB_API_KEY = os.getenv('WANDB_API_KEY')
    HUGGINGFACE_TOKEN = os.getenv('HUGGINGFACE_TOKEN')
    print(f"⚠️  Kaggle Secrets not available, using environment variables")

# W&B Configuration
WANDB_PROJECT = "legal-training"
WANDB_ENABLED = True if WANDB_API_KEY else False

# LoRA config
LORA_R = 32
LORA_ALPHA = 64
LORA_DROPOUT = 0.05
TARGET_MODULES = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]

# Training config
BATCH_SIZE = 2
GRADIENT_ACCUMULATION_STEPS = 1
LEARNING_RATE = 2e-4
NUM_EPOCHS = 5
MAX_SEQ_LENGTH = 1000
EVAL_STEPS = 1000



# W&B run name
WANDB_RUN_NAME = f"qwen-0.5b-law-r{LORA_R}-seq{MAX_SEQ_LENGTH}-bs{BATCH_SIZE}x{GRADIENT_ACCUMULATION_STEPS}-ep{NUM_EPOCHS}"

print(f"✅ Configuration loaded")
print(f"   Model: {MODEL_NAME}")
print(f"   Dataset: {DATASET_NAME}")
print(f"   Output: {OUTPUT_DIR}")
if WANDB_ENABLED:
    print(f"   W&B: {WANDB_PROJECT}/{WANDB_RUN_NAME}")
else:
    print(f"   ⚠️  W&B disabled (WANDB_API_KEY not found)")
if HUGGINGFACE_TOKEN:
    print(f"   ✅ HuggingFace token configured")
else:
    print(f"   ⚠️  HuggingFace token not found (model push will fail)")

✅ API keys loaded from Kaggle Secrets
✅ Configuration loaded
   Model: Qwen/Qwen2.5-0.5B
   Dataset: rzeraat/law
   Output: ./pactoria-v1-simple
   W&B: legal-training/qwen-0.5b-law-r32-seq1000-bs2x1-ep5
   ✅ HuggingFace token configured


In [4]:
# Cell 4: Load tokenizer
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
print("✅ Tokenizer loaded")

Loading tokenizer...
✅ Tokenizer loaded


In [5]:
# Cell 5: Load model - NO DEVICE_MAP!
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    trust_remote_code=True,
    attn_implementation="sdpa",
)

model.gradient_checkpointing_enable()

print(f"✅ Model loaded (params: {model.num_parameters():,})")
print(f"   Device: {next(model.parameters()).device}")

`torch_dtype` is deprecated! Use `dtype` instead!


Loading model...
✅ Model loaded (params: 494,032,768)
   Device: cpu


In [6]:
# Cell 6: Apply LoRA
print("Applying LoRA...")
lora_config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    target_modules=TARGET_MODULES,
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

Applying LoRA...
trainable params: 17,596,416 || all params: 511,629,184 || trainable%: 3.4393


In [7]:
# Cell 7: Load and format dataset - MODEL LEARNS TO IDENTIFY FORMAT FROM QUESTION
def format_sample(sample):
    """
    Format a legal sample for training where the model learns to identify
    the appropriate answer format based on the question itself.
    
    Sample Types (learned implicitly from question patterns):
    - case_analysis: Questions about cases, legal problems → IRAC methodology
    - educational: "What is...", "Explain..." → Teaching format
    - client_interaction: Client scenarios, practical advice → Practical guidance
    - statutory_interpretation: Questions about statutes/acts → Legislative analysis
    
    The model learns which format to use by seeing patterns in questions paired
    with their corresponding answer structures during training.
    """
    
    # Build instruction (question only - NO sample type hint!)
    instruction = f"""### Instruction:
{sample['question']}

### Response:"""
    
    # Start response with metadata (NO sample type - model must infer it!)
    response = f"""
### Topic: {sample.get('topic', 'Corporate Law')}
### Difficulty: {sample.get('difficulty', 'intermediate')}
"""
    
    # Add reasoning section (chain of thought)
    if 'reasoning' in sample and sample['reasoning']:
        response += f"""
### Reasoning:
{sample['reasoning']}
"""
    
    # Add answer WITHOUT type hints
    # The model learns the structure from the answer itself paired with question patterns
    response += f"""
### Answer:
{sample['answer']}"""
    
    # Add case citations if available
    if 'case_citation' in sample and sample['case_citation']:
        response += f"""

### Case Citation:
{sample['case_citation']}"""
    
    return {"text": instruction + response}

print("Loading and formatting dataset...")
print("📊 Model will learn to identify answer format from question patterns")
print()
dataset = load_dataset(DATASET_NAME)
formatted_dataset = dataset.map(format_sample)

# Split: 80% train, 10% validation, 10% test
# Train: Used for training the model
# Val: Used during training to monitor overfitting and select best checkpoint
# Test: HELD OUT - used only for final evaluation after training completes
train_val_split = formatted_dataset['train'].train_test_split(test_size=0.2, seed=42)
val_test_split = train_val_split['test'].train_test_split(test_size=0.5, seed=42)

train_dataset = train_val_split['train']
val_dataset = val_test_split['train']
test_dataset = val_test_split['test']

print(f"✅ Dataset split:")
print(f"   Train: {len(train_dataset)} samples (80%) - Used for training")
print(f"   Val:   {len(val_dataset)} samples (10%) - Used during training for checkpoint selection")
print(f"   Test:  {len(test_dataset)} samples (10%) - HELD OUT for final evaluation")
print(f"   Total: {len(formatted_dataset['train'])} samples")
print()
print("Training Examples:")
print("  'What are the duties of directors?' → Model learns educational format")
print("  'My client is facing insolvency...' → Model learns client interaction format")  
print("  'Analyze the case of...' → Model learns case analysis format")
print("  'Explain Section 172 of the Act' → Model learns statutory interpretation format")

Loading and formatting dataset...
📊 Model will learn to identify answer format from question patterns

✅ Dataset split:
   Train: 8014 samples (80%) - Used for training
   Val:   1002 samples (10%) - Used during training for checkpoint selection
   Test:  1002 samples (10%) - HELD OUT for final evaluation
   Total: 10018 samples

Training Examples:
  'What are the duties of directors?' → Model learns educational format
  'My client is facing insolvency...' → Model learns client interaction format
  'Analyze the case of...' → Model learns case analysis format
  'Explain Section 172 of the Act' → Model learns statutory interpretation format


In [8]:
# Cell 8: Training arguments with validation and W&B

# Initialize Weights & Biases
if WANDB_ENABLED:
    try:
        wandb.login(key=WANDB_API_KEY, relogin=True)
        wandb.init(
            project=WANDB_PROJECT,
            name=WANDB_RUN_NAME,
            mode="online",
            config={
                "model": MODEL_NAME,
                "dataset": DATASET_NAME,
                "lora_r": LORA_R,
                "lora_alpha": LORA_ALPHA,
                "lora_dropout": LORA_DROPOUT,
                "batch_size": BATCH_SIZE,
                "gradient_accumulation_steps": GRADIENT_ACCUMULATION_STEPS,
                "learning_rate": LEARNING_RATE,
                "num_epochs": NUM_EPOCHS,
                "max_seq_length": MAX_SEQ_LENGTH,
                "effective_batch_size": BATCH_SIZE * GRADIENT_ACCUMULATION_STEPS,
                "num_gpus": torch.cuda.device_count(),
            }
        )
        wandb.config.update({
            "gpu_ids": list(range(torch.cuda.device_count())),
            "gpu_names": [torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())],
        })
        print(f"✅ W&B initialized: {WANDB_PROJECT}/{WANDB_RUN_NAME}")
        print(f"📊 Track at: https://wandb.ai/{wandb.run.entity}/{WANDB_PROJECT}/runs/{wandb.run.id}")
    except Exception as e:
        print(f"⚠️  W&B initialization failed: {e}")
        WANDB_ENABLED = False

training_args = SFTConfig(
    output_dir=OUTPUT_DIR,
    num_train_epochs=NUM_EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    learning_rate=LEARNING_RATE,
    fp16=True,
    logging_steps=3,
    save_strategy="steps",
    optim="adamw_torch",
    lr_scheduler_type="cosine",
    warmup_ratio=0.03,
    max_grad_norm=0.3,
    weight_decay=0.01,
    report_to="wandb" if WANDB_ENABLED else "none",
    run_name=WANDB_RUN_NAME if WANDB_ENABLED else None,
    
    # Evaluation settings (note: eval_strategy not evaluation_strategy)
    eval_strategy="steps",
    eval_steps=EVAL_STEPS,
    per_device_eval_batch_size=BATCH_SIZE,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    save_total_limit=1,
    
    # SFT-specific
    max_length=MAX_SEQ_LENGTH,
    dataset_text_field="text",
    packing=True,
)

print("✅ Training configuration created with validation")
print(f"   Evaluation every {training_args.eval_steps} steps")
print(f"   Logging every {training_args.logging_steps} steps")

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mrzeraat-tur[0m ([33mrzeraat-tur-elyoni[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


✅ W&B initialized: legal-training/qwen-0.5b-law-r32-seq1000-bs2x1-ep5
📊 Track at: https://wandb.ai/rzeraat-tur-elyoni/legal-training/runs/qona7x89
✅ Training configuration created with validation
   Evaluation every 1000 steps
   Logging every 3 steps


In [9]:
# Cell 9: Create Trainer with validation
print("Creating trainer...")
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

print("✅ Trainer created")
print(f"   Training on {torch.cuda.device_count()} GPU(s)")
print(f"   Train: {len(train_dataset)} samples | Val: {len(val_dataset)} samples")

Creating trainer...
✅ Trainer created
   Training on 2 GPU(s)
   Train: 8014 samples | Val: 1002 samples


In [None]:
# Cell 10: Train!
print("="*80)
print("STARTING TRAINING")
print("="*80)
print(f"Train: {len(train_dataset)} samples | Val: {len(val_dataset)} samples")
print(f"Epochs: {NUM_EPOCHS}")
print(f"Batch per GPU: {BATCH_SIZE} | Grad Accum: {GRADIENT_ACCUMULATION_STEPS}")
print(f"Effective Batch: {BATCH_SIZE * GRADIENT_ACCUMULATION_STEPS * torch.cuda.device_count()}")
print(f"GPUs: {torch.cuda.device_count()}")
print("="*80)

trainer.train()

# Show final metrics
train_loss = [x['loss'] for x in trainer.state.log_history if 'loss' in x]
eval_loss = [x['eval_loss'] for x in trainer.state.log_history if 'eval_loss' in x]

print("\n" + "="*80)
print("TRAINING COMPLETED")
print("="*80)
if train_loss:
    print(f"Train Loss: {train_loss[0]:.4f} → {train_loss[-1]:.4f}")
if eval_loss:
    print(f"Val Loss: {eval_loss[-1]:.4f} | Perplexity: {math.exp(eval_loss[-1]):.2f}")
print("="*80)

# Finish W&B
if WANDB_ENABLED:
    wandb.finish()
    print("✅ W&B run finished")

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.


STARTING TRAINING
Train: 8014 samples | Val: 1002 samples
Epochs: 5
Batch per GPU: 2 | Grad Accum: 1
Effective Batch: 4
GPUs: 2


Step,Training Loss,Validation Loss


In [None]:
# Cell 11: Save model
print("Saving model...")
trainer.save_model(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print(f"✅ Model saved to {OUTPUT_DIR}")

In [None]:
from peft import PeftModel

print("Merging LoRA weights with base model...")

if torch.cuda.is_available():
    torch.cuda.empty_cache()

base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
    device_map="auto",
    trust_remote_code=True
)

merged_model = PeftModel.from_pretrained(base_model, OUTPUT_DIR)
merged_model = merged_model.merge_and_unload()

merged_output_dir = "./qwen-law-merged"
merged_model.save_pretrained(merged_output_dir)
tokenizer.save_pretrained(merged_output_dir)
print(f"✅ Merged model saved to {merged_output_dir}")

In [None]:
# Cell 11b: Evaluate on Test Set (HELD OUT - Never Seen During Training)

print("="*80)
print("TEST SET EVALUATION")
print("="*80)
print("Evaluating model on held-out test set...")
print(f"Test samples: {len(test_dataset)}")
print()

# Evaluate on test set
test_results = trainer.evaluate(eval_dataset=test_dataset)

test_loss = test_results.get('eval_loss', 0)
test_perplexity = math.exp(test_loss) if test_loss > 0 else 0

print("="*80)
print("TEST SET RESULTS")
print("="*80)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Perplexity: {test_perplexity:.2f}")
print()
print("Comparison with Validation Set:")
if eval_loss:
    val_loss_final = eval_loss[-1]
    val_perplexity = math.exp(val_loss_final)
    print(f"  Val Loss:  {val_loss_final:.4f} | Perplexity: {val_perplexity:.2f}")
    print(f"  Test Loss: {test_loss:.4f} | Perplexity: {test_perplexity:.2f}")
    print()
    if abs(test_loss - val_loss_final) < 0.1:
        print("✅ Test and validation losses are similar - good generalization!")
    elif test_loss > val_loss_final + 0.2:
        print("⚠️  Test loss is higher than validation - possible overfitting")
    else:
        print("✅ Test performance looks good!")
print("="*80)

In [None]:
from huggingface_hub import login


print(f"Pushing to HuggingFace Hub: {HUGGINGFACE_MODEL_NAME}")

if HUGGINGFACE_TOKEN:
    try:
        login(token=HUGGINGFACE_TOKEN, add_to_git_credential=False)
        merged_model.push_to_hub(HUGGINGFACE_MODEL_NAME)
        tokenizer.push_to_hub(HUGGINGFACE_MODEL_NAME)
        print(f"✅ Model pushed: https://huggingface.co/{HUGGINGFACE_MODEL_NAME}")
    except Exception as e:
        print(f"❌ Failed to push to HuggingFace Hub: {e}")
else:
    print("❌ No HuggingFace token configured")
    print("   Set HUGGINGFACE_TOKEN in Kaggle Secrets to enable model push")

In [None]:
# Cell 15: Load Merged Model for Inference
print("Loading merged model for inference...")

# Load the merged model
inference_model = AutoModelForCausalLM.from_pretrained(
    merged_output_dir,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

inference_tokenizer = AutoTokenizer.from_pretrained(
    merged_output_dir,
    trust_remote_code=True
)
inference_tokenizer.pad_token = inference_tokenizer.eos_token

print(f"✅ Inference model loaded from {merged_output_dir}")
print(f"   Device: {next(inference_model.parameters()).device}")

In [None]:
# Cell 16: Create Gradio UI with Streaming - MODEL AUTO-DETECTS FORMAT

from transformers import TextIteratorStreamer
from threading import Thread

def generate_legal_answer_stream(
    question,
    temperature=0.7,
    max_new_tokens=512,
    top_p=0.9,
    repetition_penalty=1.1
):
    """
    Generate answer to legal question using fine-tuned model with streaming.
    
    The model automatically identifies the appropriate answer format based on
    the question pattern (no manual format selection needed).
    """
    
    # Simple prompt - model decides format based on question
    prompt = f"""### Instruction:
{question}

### Response:"""
    
    # Tokenize
    inputs = inference_tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True,
        max_length=MAX_SEQ_LENGTH
    ).to(inference_model.device)
    
    # Create streamer
    streamer = TextIteratorStreamer(
        inference_tokenizer,
        skip_prompt=True,
        skip_special_tokens=True
    )
    
    # Generation kwargs
    generation_kwargs = dict(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        pad_token_id=inference_tokenizer.eos_token_id,
        eos_token_id=inference_tokenizer.eos_token_id,
        streamer=streamer,
    )
    
    # Run generation in separate thread
    thread = Thread(target=inference_model.generate, kwargs=generation_kwargs)
    thread.start()
    
    # Stream the output
    partial_text = ""
    for new_text in streamer:
        partial_text += new_text
        yield partial_text
    
    thread.join()


# Sample legal questions demonstrating different formats the model learned
sample_questions = [
    "What are the key duties of company directors under UK law?",  # Educational
    "My client's company is facing insolvency. What should they do?",  # Client interaction
    "Analyze the case of Salomon v Salomon & Co Ltd and its implications.",  # Case analysis
    "Explain how Section 172 of the Companies Act 2006 defines directors' duties.",  # Statutory
    "What is the difference between negligence and breach of statutory duty?",  # Educational
    "A director wants to enter into a contract with their company. What are the legal requirements?",  # Client interaction
]

# Create Gradio Interface - NO SAMPLE TYPE SELECTOR
with gr.Blocks(theme=gr.themes.Soft(), title="UK Legal AI Assistant - Pactoria v1") as demo:
    gr.Markdown("""
    # 🏛️ UK Legal AI Assistant - Pactoria v1
    ### Powered by Fine-tuned Qwen2-0.5B
    
    **🤖 Intelligent Format Detection**
    
    The model automatically adapts its answer style based on your question:
    - **Educational questions** ("What is...", "Explain...") → Teaching format with definitions and examples
    - **Client scenarios** ("My client...", "What should...") → Practical advice and recommendations
    - **Case analysis** ("Analyze the case...", legal problems) → IRAC methodology
    - **Statutory questions** ("Section X says...", "The Act provides...") → Legislative interpretation
    
    **No need to select a format - the model knows!** ✨
    """)
    
    with gr.Row():
        with gr.Column(scale=2):
            question_input = gr.Textbox(
                label="Legal Question",
                placeholder="Ask any UK law question - the model will adapt its answer format automatically...",
                lines=4
            )
            
            with gr.Accordion("⚙️ Generation Settings", open=False):
                temperature = gr.Slider(
                    minimum=0.1,
                    maximum=2.0,
                    value=0.7,
                    step=0.1,
                    label="Temperature (creativity)",
                    info="Lower = more focused, Higher = more creative"
                )
                
                max_tokens = gr.Slider(
                    minimum=128,
                    maximum=1024,
                    value=512,
                    step=64,
                    label="Max Tokens",
                    info="Maximum length of generated response"
                )
                
                top_p = gr.Slider(
                    minimum=0.1,
                    maximum=1.0,
                    value=0.9,
                    step=0.05,
                    label="Top P (nucleus sampling)",
                    info="Controls diversity of output"
                )
                
                repetition_penalty = gr.Slider(
                    minimum=1.0,
                    maximum=2.0,
                    value=1.1,
                    step=0.1,
                    label="Repetition Penalty",
                    info="Penalize repeated tokens"
                )
            
            generate_btn = gr.Button("🔍 Generate Answer (Streaming)", variant="primary", size="lg")
            
            gr.Markdown("### 📝 Sample Questions")
            gr.Markdown("*Click any question to try it - notice how the model adapts its format!*")
            sample_btns = []
            for sq in sample_questions:
                btn = gr.Button(sq, size="sm")
                sample_btns.append((btn, sq))
        
        with gr.Column(scale=3):
            answer_output = gr.Textbox(
                label="AI Response (Streaming)",
                lines=22,
                show_copy_button=True
            )
            
            gr.Markdown("""
            ---
            **Model Info:**
            - **Name**: Pactoria v1 (`rzeraat/pactoria-v1`)
            - **Base**: Qwen/Qwen2-0.5B (494M params)
            - **Dataset**: rzeraat/law (10,018 samples)
            - **LoRA**: Rank 32, Alpha 64
            - **Training**: Multi-GPU (2× T4)
            
            **🎯 Intelligent Features:**
            - Automatic format detection from question patterns
            - Multi-format training (4 answer styles learned)
            - Step-by-step legal reasoning
            - Real case citations and statutory references
            
            **💡 The model learned different answer structures from question patterns!**
            """)
    
    # Connect the generate button
    generate_btn.click(
        fn=generate_legal_answer_stream,
        inputs=[question_input, temperature, max_tokens, top_p, repetition_penalty],
        outputs=answer_output
    )
    
    # Connect sample question buttons
    for btn, question in sample_btns:
        btn.click(
            fn=lambda q=question: q,
            outputs=question_input
        )
    
    # Examples section
    gr.Examples(
        examples=[
            ["What are the requirements for a valid contract under English law?"],
            ["My client's company is facing insolvency. What legal options are available?"],
            ["Analyze the doctrine of piercing the corporate veil with relevant case examples."],
            ["How does Section 172 of the Companies Act 2006 define directors' duties?"],
            ["What is the difference between wrongful dismissal and unfair dismissal?"],
            ["A director wants to use company property for personal use. Is this allowed?"],
        ],
        inputs=[question_input],
        label="💡 Example Questions (Different Formats)"
    )

print("✅ Gradio UI created!")
print("🤖 Model will automatically detect and use the appropriate answer format")
print("🚀 Launching interface...")

# Launch the interface
demo.launch(
    share=True,  # Creates public link
    debug=True,
    show_error=True
)