# TMF921 Intent Translation - Interactive Notebook

This notebook provides an interactive environment for experimenting with TMF921 intent translation using lightweight LLMs.

## Features
- Test individual scenarios
- Compare different approaches (few-shot, RAG)
- Visualize results
- Experiment with prompts

## Setup

First, let's import all necessary modules and initialize components.

In [None]:
import sys
from pathlib import Path
import json

# Add src to path
sys.path.insert(0, str(Path.cwd() / "src"))

from tmf921 import (
    ScenarioDataset, GSTSpecification, TMF921Validator, OllamaClient,
    TMF921PromptBuilder, EXAMPLE_SCENARIOS,
    GSTRetriever, CharacteristicNameMapper
)

print("✓ All modules imported successfully")

In [None]:
# Load data and initialize components
print("Loading components...")

# Load GST specification
gst = GSTSpecification("gst.json")
print(f"✓ Loaded GST with {len(gst.spec['serviceSpecCharacteristic'])} characteristics")

# Load scenarios
scenarios = ScenarioDataset("data/val.json")
print(f"✓ Loaded {len(scenarios.scenarios)} validation scenarios")

# Initialize components
validator = TMF921Validator(gst.spec)
prompt_builder = TMF921PromptBuilder(gst.spec)
name_mapper = CharacteristicNameMapper(gst.spec)

print("✓ All components initialized")

## 1. Test Single Scenario Translation

Let's test translating a single scenario with different approaches.

In [None]:
# Choose a test scenario
test_scenario = "Deploy IoT sensor network: 1 Mbps per device, 500ms latency tolerance, 99.9% availability, 10,000 sensors."

print(f"Test Scenario: {test_scenario}\n")

### Initialize LLM Client

In [None]:
# Initialize client (choose model)
MODEL = "gpt-oss:20b-cloud"  # or "llama3:latest" for local

client = OllamaClient(model=MODEL)

if client._check_connection():
    print(f"✓ Connected to Ollama with model: {MODEL}")
else:
    print("✗ Cannot connect to Ollama. Make sure it's running.")

### Approach 1: Few-Shot Learning

In [None]:
# Build few-shot prompt
system_prompt = prompt_builder.build_system_prompt()
user_prompt = prompt_builder.build_few_shot_prompt(
    test_scenario,
    EXAMPLE_SCENARIOS[:3],
    max_examples=3
)

print(f"Prompt length: {len(user_prompt)} characters\n")
print("First 500 characters:")
print(user_prompt[:500] + "...")

In [None]:
# Generate translation
import time

print("Generating translation...")
start = time.time()

response = client.generate(
    prompt=user_prompt,
    system_prompt=system_prompt,
    temperature=0.1
)

elapsed = time.time() - start

print(f"✓ Generated in {elapsed:.1f}s")
print(f"Tokens: {response['tokens']}")

# Extract JSON
intent_json = client.extract_json(response['response'])

if intent_json:
    print("\n✓ Successfully extracted JSON")
    print(json.dumps(intent_json, indent=2))
else:
    print("\n✗ Failed to extract JSON")
    print(response['response'])

In [None]:
# Validate result
if intent_json:
    # Apply name correction
    corrected_intent, corrections = name_mapper.correct_intent(intent_json)
    
    # Validate
    validation = validator.validate_all(corrected_intent)
    
    print("Validation Results:")
    print(f"  Format valid: {validation['format_valid']}")
    print(f"  Characteristics valid: {validation['characteristics_valid']}")
    print(f"  Overall valid: {validation['overall_valid']}")
    print(f"  Name corrections: {len(corrections)}")
    
    if corrections:
        print(f"\nCorrections applied:")
        for correction in corrections:
            print(f"  - {correction}")
    
    if not validation['overall_valid']:
        print(f"\nErrors:")
        for error in validation['errors']:
            print(f"  - {error}")

### Approach 2: RAG-Enhanced

In [None]:
# Initialize RAG retriever
try:
    retriever = GSTRetriever()
    print("✓ RAG retriever initialized")
    
    # Retrieve relevant characteristics
    retrieved = retriever.retrieve_for_scenario(test_scenario, n_results=8)
    
    print(f"\nRetrieved {len(retrieved)} characteristics:")
    for i, char in enumerate(retrieved, 1):
        print(f"  {i}. {char['name']} (similarity: {char['similarity']:.3f})")
        
except Exception as e:
    print(f"✗ RAG not available: {e}")
    print("  Run: python scripts/setup_rag.py")

In [None]:
# Build RAG prompt (if retriever works)
if 'retrieved' in locals() and retrieved:
    rag_prompt = prompt_builder.build_rag_prompt(
        test_scenario,
        retrieved,
        include_examples=True
    )
    
    print(f"RAG prompt length: {len(rag_prompt)} characters")
    
    # Generate with RAG
    print("\nGenerating with RAG...")
    start = time.time()
    
    response_rag = client.generate(
        prompt=rag_prompt,
        system_prompt=system_prompt,
        temperature=0.1
    )
    
    elapsed = time.time() - start
    print(f"✓ Generated in {elapsed:.1f}s")
    
    # Extract and validate
    intent_rag = client.extract_json(response_rag['response'])
    
    if intent_rag:
        validation_rag = validator.validate_all(intent_rag)
        print(f"\nRAG Result - Valid: {validation_rag['overall_valid']}")
        print(json.dumps(intent_rag, indent=2)[:500] + "...")

## 2. Batch Testing

Test multiple scenarios and analyze results.

In [None]:
# Select test scenarios
test_scenarios = scenarios.scenarios[:5]  # First 5 scenarios

results = []

print(f"Testing {len(test_scenarios)} scenarios...\n")

for i, scenario in enumerate(test_scenarios, 1):
    print(f"[{i}/{len(test_scenarios)}] {scenario[:60]}...")
    
    try:
        # Build prompt (few-shot)
        user_prompt = prompt_builder.build_few_shot_prompt(
            scenario, EXAMPLE_SCENARIOS[:2], max_examples=2
        )
        
        # Generate
        response = client.generate(
            prompt=user_prompt,
            system_prompt=system_prompt,
            temperature=0.1
        )
        
        # Extract and validate
        intent = client.extract_json(response['response'])
        
        if intent:
            corrected, _ = name_mapper.correct_intent(intent)
            validation = validator.validate_all(corrected)
            
            results.append({
                'scenario': scenario,
                'valid': validation['overall_valid'],
                'time': response['time_seconds'],
                'tokens': response['tokens']
            })
            
            status = "✓" if validation['overall_valid'] else "✗"
            print(f"  {status} Valid: {validation['overall_valid']} ({response['time_seconds']:.1f}s)")
        else:
            print(f"  ✗ Failed to extract JSON")
            
    except Exception as e:
        print(f"  ✗ Error: {str(e)[:50]}")

print(f"\nCompleted: {len(results)}/{len(test_scenarios)} successful")

In [None]:
# Analyze batch results
if results:
    valid_count = sum(1 for r in results if r['valid'])
    avg_time = sum(r['time'] for r in results) / len(results)
    avg_tokens = sum(r['tokens'] for r in results) / len(results)
    
    print("Batch Results Summary:")
    print(f"  Accuracy: {valid_count}/{len(results)} ({valid_count/len(results)*100:.0f}%)")
    print(f"  Avg Time: {avg_time:.1f}s per scenario")
    print(f"  Avg Tokens: {avg_tokens:.0f}")

## 3. Prompt Experimentation

Test different prompt variations.

In [None]:
# Custom scenario for testing
custom_scenario = "Create a smart city traffic management system: 100 Mbps bandwidth, 10ms latency, 99.99% uptime."

print(f"Custom Scenario: {custom_scenario}")

In [None]:
# Test with different number of examples
for num_examples in [0, 1, 3]:
    print(f"\n--- Testing with {num_examples} examples ---")
    
    if num_examples == 0:
        # Zero-shot
        prompt = prompt_builder.build_zero_shot_prompt(custom_scenario)
    else:
        # Few-shot
        prompt = prompt_builder.build_few_shot_prompt(
            custom_scenario,
            EXAMPLE_SCENARIOS[:num_examples],
            max_examples=num_examples
        )
    
    print(f"Prompt length: {len(prompt)} chars")
    
    # You can uncomment to actually run
    # response = client.generate(prompt=prompt, system_prompt=system_prompt)
    # print(f"Result: {response['response'][:100]}...")

## 4. Visualize Results

Create visualizations of experiment results.

In [None]:
# Load and compare experiment results
import matplotlib.pyplot as plt

experiments_to_compare = [
    "validation_50",
    "rag_cloud_50_scenarios"
]

experiment_results = {}

for exp in experiments_to_compare:
    try:
        with open(f"results/{exp}/metrics_summary.json") as f:
            experiment_results[exp] = json.load(f)
    except:
        print(f"Could not load {exp}")

if experiment_results:
    # Create comparison plots
    fig, axes = plt.subplots(1, 2, figsize=(12, 4))
    
    # Accuracy comparison
    names = list(experiment_results.keys())
    accuracies = [experiment_results[n]['feaci']['accuracy'] for n in names]
    
    axes[0].bar(names, accuracies)
    axes[0].set_ylabel('Accuracy (%)')
    axes[0].set_title('Accuracy Comparison')
    axes[0].set_ylim([0, 100])
    
    # Speed comparison
    speeds = [experiment_results[n]['feaci']['inference_time_avg_seconds'] for n in names]
    
    axes[1].bar(names, speeds)
    axes[1].set_ylabel('Time (seconds)')
    axes[1].set_title('Speed Comparison')
    
    plt.tight_layout()
    plt.show()
else:
    print("No experiment results to visualize")

## 5. Export Results

Save your experimental results.

In [None]:
# Save notebook results
if results:
    output_file = "notebook_results.json"
    
    with open(output_file, 'w') as f:
        json.dump(results, f, indent=2)
    
    print(f"✓ Results saved to {output_file}")

## Next Steps

- Run full experiments: `python scripts/run_experiment.py --list`
- Analyze results: `python scripts/analyze_results.py --list`
- See README.md for more information