# Lab 3.8: Capstone - Multi-Modal Content Analyzer

**Objective**: Build a unified system that analyzes text and images using Hugging Face Hub models

**Duration**: 55 minutes

## Learning Outcomes
- Search the Hub programmatically for models
- Load and explore datasets
- Build inference pipelines for multiple modalities
- Create a unified analysis function

In [None]:
import sys
sys.path.insert(0, "../../../src")
from hf_ecosystem import __version__
print(f"hf-ecosystem version: {__version__}")

In [None]:
from huggingface_hub import HfApi
from datasets import load_dataset
from transformers import pipeline
from hf_ecosystem.hub import search_models
import requests
from PIL import Image
from io import BytesIO

## Part 1: Search the Hub Programmatically

In [None]:
# Search for top models by task
tasks = ["text-classification", "image-classification"]

for task in tasks:
    models = search_models(task=task, limit=3)
    print(f"\n{task}:")
    for m in models:
        print(f"  {m.id} ({m.downloads:,} downloads)")

In [None]:
# Get detailed model info
api = HfApi()
model_info = api.model_info("distilbert-base-uncased-finetuned-sst-2-english")

print(f"Model: {model_info.id}")
print(f"Downloads: {model_info.downloads:,}")
print(f"License: {model_info.card_data.license if model_info.card_data else 'N/A'}")
print(f"Tags: {model_info.tags[:5]}")

## Part 2: Load Sample Datasets

In [None]:
# Load text dataset (movie reviews)
text_data = load_dataset("rotten_tomatoes", split="test[:5]")
print("Text samples:")
for i, example in enumerate(text_data):
    print(f"  {i+1}. {example['text'][:60]}...")

In [None]:
# Load image dataset
image_data = load_dataset("beans", split="test[:3]")
print(f"Image dataset columns: {image_data.column_names}")
print(f"Labels: {image_data.features['labels'].names}")

## Part 3: Build Inference Pipelines

In [None]:
# Text classification pipeline
text_classifier = pipeline(
    "text-classification",
    model="distilbert-base-uncased-finetuned-sst-2-english",
    device="cpu"
)
print(f"Text classifier loaded: {text_classifier.model.config.model_type}")

In [None]:
# Image classification pipeline
image_classifier = pipeline(
    "image-classification",
    model="google/vit-base-patch16-224",
    device="cpu"
)
print(f"Image classifier loaded: {image_classifier.model.config.model_type}")

## Part 4: Run Analysis

In [None]:
# Analyze text samples
print("Text Sentiment Analysis:")
print("-" * 50)
for example in text_data:
    result = text_classifier(example["text"])[0]
    print(f"Text: {example['text'][:50]}...")
    print(f"Result: {result['label']} ({result['score']:.2%})\n")

In [None]:
# Analyze images from dataset
print("Image Classification:")
print("-" * 50)
for i, example in enumerate(image_data):
    result = image_classifier(example["image"])[0]
    print(f"Image {i+1}: {result['label']} ({result['score']:.2%})")

In [None]:
# Load and analyze a web image
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
response = requests.get(url)
image = Image.open(BytesIO(response.content))

# Classify
classification = image_classifier(image)[:3]
print("Classification:")
for r in classification:
    print(f"  {r['label']}: {r['score']:.2%}")

## Part 5: Unified Multi-Modal Analyzer

In [None]:
def analyze_content(text=None, image=None):
    """
    Multi-modal content analyzer.
    
    Args:
        text: String to analyze for sentiment
        image: PIL Image or URL to classify
    
    Returns:
        Dictionary with analysis results
    """
    results = {}
    
    if text:
        sentiment = text_classifier(text)[0]
        results["text"] = {
            "input": text[:100],
            "sentiment": sentiment["label"],
            "confidence": sentiment["score"]
        }
    
    if image:
        # Handle URL input
        if isinstance(image, str):
            response = requests.get(image)
            image = Image.open(BytesIO(response.content))
        
        classification = image_classifier(image)[0]
        results["image"] = {
            "category": classification["label"],
            "confidence": classification["score"]
        }
    
    return results

In [None]:
# Demo: Text only
result = analyze_content(text="This product exceeded all my expectations!")
print("Text Analysis:")
print(f"  Sentiment: {result['text']['sentiment']}")
print(f"  Confidence: {result['text']['confidence']:.2%}")

In [None]:
# Demo: Multi-modal (text + image)
result = analyze_content(
    text="What a cute cat!",
    image="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
)
print("Multi-Modal Analysis:")
print(f"  Text sentiment: {result['text']['sentiment']} ({result['text']['confidence']:.2%})")
print(f"  Image category: {result['image']['category']} ({result['image']['confidence']:.2%})")

## Verification

In [None]:
def verify_lab():
    """Verify lab completion."""
    assert text_classifier is not None, "Text classifier not loaded"
    assert image_classifier is not None, "Image classifier not loaded"
    
    test_result = analyze_content(text="test")
    assert "text" in test_result, "Text analysis failed"
    assert "sentiment" in test_result["text"], "Sentiment missing"
    
    print("All verifications passed!")

verify_lab()