In [9]:
print("=" * 60)
print("INSTALLING VISION MODEL DEPENDENCIES")
print("=" * 60)

print("\nInstalling required packages...")
print("This may take 2-3 minutes...\n")

# Install qwen-vl-utils and other dependencies
!pip install -q qwen-vl-utils
!pip install -q timm
!pip install -q torchvision
!pip install -q Pillow

print("\n‚úì Vision model dependencies installed")
print("=" * 60)

INSTALLING VISION MODEL DEPENDENCIES

Installing required packages...
This may take 2-3 minutes...


‚úì Vision model dependencies installed


In [10]:
print("\n" + "=" * 60)
print("IMPORTING LIBRARIES")
print("=" * 60)

import torch
import json
import pandas as pd  # ‚Üê FIX: Added pandas import
from pathlib import Path
from datetime import datetime
from PIL import Image
import requests
from io import BytesIO

print("‚úì Core libraries imported")

try:
    from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
    from qwen_vl_utils import process_vision_info
    print("‚úì Vision model libraries imported")
except ImportError as e:
    print(f"‚ö†Ô∏è Warning: {e}")
    print("Run Cell 1 to install dependencies, then restart kernel")

print("=" * 60)


IMPORTING LIBRARIES
‚úì Core libraries imported
‚úì Vision model libraries imported


In [11]:
print("\n" + "=" * 60)
print("LOADING VISION MODEL FOR PROTOTYPE ANALYSIS")
print("=" * 60)

# For this prototype, we'll use a simpler approach that doesn't require Qwen
# We'll create a robust parsing system that works with the models you already have

print("\nüìå Using alternative approach compatible with your environment")
print("This approach uses rule-based parsing + your existing Mistral model")
print("\n‚úì Ready to parse prototypes")
print("=" * 60)


LOADING VISION MODEL FOR PROTOTYPE ANALYSIS

üìå Using alternative approach compatible with your environment
This approach uses rule-based parsing + your existing Mistral model

‚úì Ready to parse prototypes


In [12]:
print("\n" + "=" * 60)
print("PROTOTYPE PARSING FUNCTIONS")
print("=" * 60)

def parse_figma_url(figma_url, access_token=None):
    """
    Parse Figma prototype from URL
    
    Args:
        figma_url: Figma file URL
        access_token: Figma personal access token (optional)
    
    Returns:
        Parsed prototype structure
    """
    print(f"Parsing Figma URL: {figma_url[:50]}...")
    
    # Extract file ID from URL
    if "figma.com/file/" in figma_url or "figma.com/design/" in figma_url:
        # Extract file key from URL
        parts = figma_url.split('/')
        file_key = None
        for i, part in enumerate(parts):
            if part in ['file', 'design'] and i + 1 < len(parts):
                file_key = parts[i + 1]
                break
        
        if not file_key:
            return {"error": "Could not extract file ID from URL"}
        
        print(f"‚úì Extracted file key: {file_key}")
        
        # Note: Full Figma API implementation would go here
        # For now, return a structured template
        return {
            "figma_file_key": file_key,
            "figma_url": figma_url,
            "status": "ready_for_analysis",
            "note": "Figma API integration requires access token. Using visual analysis approach."
        }
    else:
        return {"error": "Invalid Figma URL format"}


def parse_local_screenshots(image_paths):
    """
    Parse local UI screenshots
    
    Args:
        image_paths: List of paths to UI screenshot images
    
    Returns:
        List of parsed screen structures
    """
    parsed_screens = []
    
    for i, img_path in enumerate(image_paths):
        try:
            img = Image.open(img_path)
            
            screen = {
                "screen_id": f"screen_{i+1}",
                "source": str(img_path),
                "dimensions": {"width": img.width, "height": img.height},
                "parsed": True,
                "timestamp": datetime.now().isoformat()
            }
            
            parsed_screens.append(screen)
            print(f"‚úì Parsed: {img_path.name}")
            
        except Exception as e:
            print(f"‚úó Error parsing {img_path}: {e}")
    
    return parsed_screens


def analyze_ui_structure(screen_data, model=None, tokenizer=None):
    """
    Analyze UI structure using LLM
    
    Args:
        screen_data: Screen data dictionary
        model: Language model for analysis
        tokenizer: Tokenizer for model
    
    Returns:
        Structured UI analysis
    """
    # Use global model if not provided
    if model is None:
        model = globals().get('model')
    if tokenizer is None:
        tokenizer = globals().get('tokenizer')
    
    # Create analysis prompt
    prompt = f"""Analyze this UI screen and identify:
1. Interactive elements (buttons, links, forms, inputs)
2. Visual hierarchy and layout
3. Navigation patterns
4. Potential usability issues

Screen: {screen_data.get('screen_id', 'unknown')}
Context: This is a screen from a user interface prototype.

Provide a structured analysis."""

    try:
        if model and tokenizer:
            inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
            
            if torch.cuda.is_available():
                inputs = {k: v.to('cuda') for k, v in inputs.items()}
            
            with torch.no_grad():
                outputs = model.generate(
                    **inputs,
                    max_new_tokens=200,
                    temperature=0.7,
                    do_sample=True,
                    pad_token_id=tokenizer.eos_token_id
                )
            
            analysis = tokenizer.decode(outputs[0], skip_special_tokens=True)
            
            # Extract just the response
            if prompt in analysis:
                analysis = analysis.split(prompt)[-1].strip()
            
            return analysis
        else:
            # Fallback structure
            return "AI analysis unavailable - using template structure"
            
    except Exception as e:
        print(f"Warning: Analysis error - {e}")
        return "Analysis error - using default structure"


def create_prototype_structure(source_type, source_data, analyze_with_llm=True):
    """
    Create complete prototype structure
    
    Args:
        source_type: 'figma_url' or 'local_screenshots'
        source_data: Figma URL or list of image paths
        analyze_with_llm: Whether to use LLM for analysis
    
    Returns:
        Complete prototype structure
    """
    prototype = {
        "prototype_id": f"proto_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
        "source_type": source_type,
        "created_at": datetime.now().isoformat(),
        "screens": []
    }
    
    if source_type == "figma_url":
        figma_data = parse_figma_url(source_data)
        prototype["figma_data"] = figma_data
        
        # Create placeholder screens for Figma
        # In production, this would fetch actual frames from Figma API
        prototype["screens"] = [
            {
                "screen_id": "figma_screen_1",
                "source": "figma",
                "figma_url": source_data,
                "interactive_elements": [
                    {"type": "button", "label": "Primary CTA", "location": "center"},
                    {"type": "navigation", "label": "Menu", "location": "top"}
                ],
                "analysis": "Figma screen ready for testing"
            }
        ]
        
    elif source_type == "local_screenshots":
        screens = parse_local_screenshots(source_data)
        
        for screen in screens:
            if analyze_with_llm:
                screen["llm_analysis"] = analyze_ui_structure(screen)
            
            # Add default interactive elements structure
            screen["interactive_elements"] = [
                {"type": "button", "label": "Action Button", "prominence": "high"},
                {"type": "navigation", "label": "Menu/Nav", "prominence": "medium"}
            ]
        
        prototype["screens"] = screens
    
    return prototype


print("‚úì Prototype parsing functions loaded")
print("=" * 60)


PROTOTYPE PARSING FUNCTIONS
‚úì Prototype parsing functions loaded


In [17]:
print("\n" + "=" * 60)
print("EXAMPLE: PARSE FIGMA PROTOTYPE")
print("=" * 60)

# Example Figma URL
figma_url = "https://www.figma.com/proto/dM7nIdi4rcgRP7gigDs1Az/UI?page-id=380%3A989&node-id=420-1449&p=f&viewport=275%2C442%2C0.02&t=xcnjGcrIZkuavBiK-1&scaling=scale-down&content-scaling=fixed&starting-point-node-id=420%3A1449&show-proto-sidebar=1"

print("\nüìã To use a real Figma prototype:")
print("1. Get your Figma file URL")
print("2. (Optional) Get personal access token from:")
print("   https://www.figma.com/settings/access-tokens")
print("3. Replace the URL below with your Figma URL\n")

# Parse Figma prototype
figma_prototype = create_prototype_structure(
    source_type="figma_url",
    source_data=figma_url,
    analyze_with_llm=False  # Set True to use LLM analysis
)

print("\n‚úì Figma prototype structure created")
print(f"Prototype ID: {figma_prototype['prototype_id']}")
print(f"Screens: {len(figma_prototype['screens'])}")


EXAMPLE: PARSE FIGMA PROTOTYPE

üìã To use a real Figma prototype:
1. Get your Figma file URL
2. (Optional) Get personal access token from:
   https://www.figma.com/settings/access-tokens
3. Replace the URL below with your Figma URL

Parsing Figma URL: https://www.figma.com/proto/dM7nIdi4rcgRP7gigDs1Az...

‚úì Figma prototype structure created
Prototype ID: proto_20251112_075022
Screens: 1


In [18]:
print("\n" + "=" * 60)
print("EXAMPLE: PARSE LOCAL SCREENSHOTS")
print("=" * 60)

# Create prototypes directory
prototypes_dir = Path("./prototypes")
prototypes_dir.mkdir(exist_ok=True)

print(f"\nüìÅ Looking for screenshots in: {prototypes_dir}/")
print("\nTo add your screenshots:")
print("1. Save UI screenshots as PNG or JPG")
print("2. Place them in ./prototypes/ directory")
print("3. Run this cell again\n")

# Find all images in prototypes directory
image_extensions = ['.png', '.jpg', '.jpeg']
image_files = [
    f for f in prototypes_dir.glob('*') 
    if f.suffix.lower() in image_extensions
]

if image_files:
    print(f"Found {len(image_files)} screenshot(s):")
    for img in image_files:
        print(f"  ‚Ä¢ {img.name}")
    
    # Parse screenshots
    local_prototype = create_prototype_structure(
        source_type="local_screenshots",
        source_data=image_files,
        analyze_with_llm=True  # Use LLM if model is loaded
    )
    
    print(f"\n‚úì Parsed {len(local_prototype['screens'])} screens")
    
else:
    print("‚ö†Ô∏è  No screenshots found in ./prototypes/")
    print("Creating demo prototype structure instead...\n")
    
    # Create demo prototype
    local_prototype = {
        "prototype_id": f"demo_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
        "source_type": "demo",
        "created_at": datetime.now().isoformat(),
        "screens": [
            {
                "screen_id": "home_screen",
                "interactive_elements": [
                    {
                        "type": "button",
                        "label": "Get Started",
                        "location": "center",
                        "prominence": "high",
                        "accessibility": "easy - large, centered, high contrast"
                    },
                    {
                        "type": "link",
                        "label": "Learn More",
                        "location": "bottom-center",
                        "prominence": "medium",
                        "accessibility": "moderate - smaller text"
                    }
                ],
                "visual_hierarchy": "Clear primary CTA with secondary action",
                "navigation_pattern": "Single primary action",
                "usability_concerns": [
                    "No visible back/home navigation",
                    "Unclear destination for 'Get Started'"
                ]
            }
        ]
    }
    
    print("‚úì Demo prototype created")

print(f"\nPrototype ID: {local_prototype['prototype_id']}")
print(f"Screens: {len(local_prototype['screens'])}")


EXAMPLE: PARSE LOCAL SCREENSHOTS

üìÅ Looking for screenshots in: prototypes/

To add your screenshots:
1. Save UI screenshots as PNG or JPG
2. Place them in ./prototypes/ directory
3. Run this cell again

‚ö†Ô∏è  No screenshots found in ./prototypes/
Creating demo prototype structure instead...

‚úì Demo prototype created

Prototype ID: demo_20251112_075229
Screens: 1


In [7]:
print("\n" + "="*60)
print("BATCH PROCESSING WORKFLOW")
print("="*60)

def batch_parse_prototype(images, save_path="parsed_prototype.json"):
    """
    Parse all screens in a prototype and save results
    
    Args:
        images: List of PIL Images
        save_path: Path to save parsed results
    
    Returns:
        List of parsed UI dictionaries
    """
    
    parsed_screens = []
    
    print(f"Processing {len(images)} screens...")
    
    for i, image in enumerate(images, 1):
        print(f"\nParsing screen {i}/{len(images)}...")
        
        try:
            parsed_ui = parse_ui_elements(image)
            parsed_ui['screen_number'] = i
            parsed_screens.append(parsed_ui)
            
            print(f"  ‚úì Found {len(parsed_ui.get('interactive_elements', []))} interactive elements")
            
        except Exception as e:
            print(f"  ‚ùå Error parsing screen {i}: {e}")
            parsed_screens.append({
                "screen_number": i,
                "error": str(e)
            })
        
        # Clear GPU cache between screens
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
    
    # Save results
    with open(save_path, 'w') as f:
        json.dump(parsed_screens, f, indent=2)
    
    print(f"\n‚úì Parsed prototype saved to {save_path}")
    
    # Analyze workflow if multiple screens
    if len(images) >= 2:
        print("\nAnalyzing workflow progression...")
        workflow_analysis = analyze_workflow_flow(images)
        
        with open("workflow_analysis.json", 'w') as f:
            json.dump(workflow_analysis, f, indent=2)
        
        print("‚úì Workflow analysis saved to workflow_analysis.json")
    
    return parsed_screens

# Uncomment when you have actual images:
# parsed_results = batch_parse_prototype(images)

print("\n‚úì Batch processing function ready")
print("="*60)


BATCH PROCESSING WORKFLOW

‚úì Batch processing function ready


In [19]:
print("\n" + "=" * 60)
print("SAVING PROTOTYPE DATA")
print("=" * 60)

# Save prototype metadata
prototype_metadata = {
    "prototype_name": "Sample E-commerce Checkout",
    "screen_count": len(local_prototype['screens']),
    "date_parsed": str(pd.Timestamp.now()),  # ‚Üê FIX: pd is now imported
    "screens": local_prototype['screens']
}

output_path = "prototype_data.json"

with open(output_path, 'w') as f:
    json.dump(prototype_metadata, f, indent=2)

print(f"‚úì Prototype data saved to: {output_path}")
print(f"‚úì Screens: {prototype_metadata['screen_count']}")

# Display sample
print("\n" + "-" * 60)
print("SAMPLE PARSED UI:")
print("-" * 60)
print(json.dumps(local_prototype['screens'][0], indent=2))

print("\n" + "=" * 60)
print("PROTOTYPE PARSING COMPLETE")
print("=" * 60)
print("\nNext steps:")
print("1. Review parsed prototype structure")
print("2. Proceed to Notebook 3: Generate test scenarios")
print("3. Use personas from Notebook 1 to test this prototype")


SAVING PROTOTYPE DATA
‚úì Prototype data saved to: prototype_data.json
‚úì Screens: 1

------------------------------------------------------------
SAMPLE PARSED UI:
------------------------------------------------------------
{
  "screen_id": "home_screen",
  "interactive_elements": [
    {
      "type": "button",
      "label": "Get Started",
      "location": "center",
      "prominence": "high",
      "accessibility": "easy - large, centered, high contrast"
    },
    {
      "type": "link",
      "label": "Learn More",
      "location": "bottom-center",
      "prominence": "medium",
      "accessibility": "moderate - smaller text"
    }
  ],
  "visual_hierarchy": "Clear primary CTA with secondary action",
  "navigation_pattern": "Single primary action",
  "usability_concerns": [
    "No visible back/home navigation",
    "Unclear destination for 'Get Started'"
  ]
}

PROTOTYPE PARSING COMPLETE

Next steps:
1. Review parsed prototype structure
2. Proceed to Notebook 3: Generate te