# Legal Assistant Chatbot - Pactoria v1
Fine-tuned Qwen model (merged) for legal Q&A

In [10]:
!pip install -q transformers accelerate huggingface_hub bitsandbytes gradio

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Merged model (no adapter needed!)
model_id = "rzeraat/pactoria-v3"

# Configuration
MAX_SEQ_LENGTH = 1000  # Same as training

print("="*80)
print("LOADING PACTORIA V1 MODEL")
print("="*80)

# Load tokenizer
print("\n📖 Loading tokenizer...")
inference_tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)

if inference_tokenizer.pad_token_id is None and inference_tokenizer.eos_token_id is not None:
    inference_tokenizer.pad_token = inference_tokenizer.eos_token

print("   ✅ Tokenizer loaded")

# Load merged model directly
print("\n🤖 Loading merged model...")
dtype = torch.float16 if torch.cuda.is_available() else torch.float32
inference_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=dtype,
    device_map="auto" if torch.cuda.is_available() else None,
)

inference_model.eval()

print("="*80)
print("✅ MODEL LOADED SUCCESSFULLY")
print("="*80)
print(f"Model: {model_id}")
print(f"Device: {'CUDA' if torch.cuda.is_available() else 'CPU'}")
print(f"Dtype: {dtype}")
print(f"Max Sequence Length: {MAX_SEQ_LENGTH}")
print(f"Parameters: 494M (Qwen2-0.5B base)")
print("="*80)

In [None]:
import gradio as gr
import torch
from transformers import TextIteratorStreamer
from threading import Thread

# Define generation function for Gradio
def chat_with_model(message, history, temperature, max_tokens, top_p):
    """
    Generate response using the fine-tuned merged model with streaming.
    
    The model automatically identifies the appropriate answer format based on
    the question pattern (no manual format selection needed).
    """
    # Simple prompt - model decides format based on question
    prompt = f"""### Instruction:
{message}

### Response:"""
    
    # Tokenize
    inputs = inference_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=MAX_SEQ_LENGTH)
    if torch.cuda.is_available():
        inputs = {k: v.cuda() for k, v in inputs.items()}
    
    # Setup streamer for real-time output
    streamer = TextIteratorStreamer(inference_tokenizer, skip_prompt=True, skip_special_tokens=True)
    
    # Generation parameters
    generation_kwargs = {
        **inputs,
        "max_new_tokens": max_tokens,
        "temperature": temperature,
        "top_p": top_p,
        "do_sample": True,
        "pad_token_id": inference_tokenizer.pad_token_id or inference_tokenizer.eos_token_id,
        "eos_token_id": inference_tokenizer.eos_token_id,
        "streamer": streamer,
    }
    
    # Run generation in separate thread
    thread = Thread(target=inference_model.generate, kwargs=generation_kwargs)
    thread.start()
    
    # Stream the output
    partial_response = ""
    for new_text in streamer:
        partial_response += new_text
        yield partial_response
    
    thread.join()

# Create Gradio interface with ChatInterface
demo = gr.ChatInterface(
    fn=chat_with_model,
    title="🏛️ Pactoria v1 - UK Legal Assistant",
    description="""
    **Pactoria v1: Fine-tuned Qwen2-0.5B for UK Legal Q&A**
    
    **🤖 Intelligent Format Detection**
    
    The model automatically adapts its answer style based on your question:
    - **Educational questions** ("What is...", "Explain...") → Teaching format with definitions and examples
    - **Client scenarios** ("My client...", "What should...") → Practical advice and recommendations
    - **Case analysis** ("Analyze the case...", legal problems) → IRAC methodology
    - **Statutory questions** ("Section X says...", "The Act provides...") → Legislative interpretation
    
    **No need to select a format - the model knows!** ✨
    
    **Training Details:**
    - Dataset: 10,018+ UK legal Q&A samples
    - Training: Multi-GPU DDP (2× T4 GPUs)
    - LoRA Rank: 32, Alpha: 64
    - Format: Model learns answer structure from question patterns
    
    *Model: [`rzeraat/pactoria-v1`](https://huggingface.co/rzeraat/pactoria-v1)*
    """,
    examples=[
        ["What are the requirements for a valid contract under English law?"],
        ["My client's company is facing insolvency. What legal options are available?"],
        ["Analyze the doctrine of piercing the corporate veil with case examples."],
        ["How does Section 172 of the Companies Act 2006 define directors' duties?"],
        ["What is the difference between wrongful dismissal and unfair dismissal?"],
        ["A director wants to use company property for personal use. Is this allowed?"],
        ["Explain the concept of consideration in contract law."],
        ["What are the fiduciary duties of trustees under UK law?"],
    ],
    additional_inputs=[
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.7,
            step=0.1,
            label="Temperature",
            info="Higher = more creative, Lower = more focused"
        ),
        gr.Slider(
            minimum=128,
            maximum=512,
            value=300,
            step=64,
            label="Max Tokens",
            info="Maximum length of generated response"
        ),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.9,
            step=0.05,
            label="Top P",
            info="Nucleus sampling threshold"
        ),
    ],
    theme=gr.themes.Soft(
        primary_hue="blue",
        secondary_hue="slate",
    ),
    retry_btn="🔄 Retry",
    undo_btn="↩️ Undo",
    clear_btn="🗑️ Clear",
)

# Launch the interface
print("\n" + "="*80)
print("🚀 LAUNCHING PACTORIA V1 WEB INTERFACE")
print("="*80)
print("📊 Features:")
print("   ✅ Automatic format detection from question patterns")
print("   ✅ Streaming responses for real-time output")
print("   ✅ 4 answer styles (educational, client, case, statutory)")
print("   ✅ Step-by-step legal reasoning")
print("   ✅ Real UK case citations")
print()
print("📍 Interface will be available at:")
print("   - Local: http://127.0.0.1:7860")
print("   - Public: Will be shown below (valid 72 hours)")
print("="*80 + "\n")

# Launch with share=True for public URL
demo.launch(
    share=True,           # Creates public URL for 72 hours
    server_name="0.0.0.0", # Listen on all interfaces
    server_port=7860,      # Default Gradio port
    show_error=True,       # Show errors in UI
)