# GerMed ChatBot (Gemedusa) - Embedding Utilities

This notebook provides utilities for generating and testing text and image embeddings using the modernized GerMed-Chatbot-FastAPI infrastructure.

## üéì Key Features:
- Uses the same `TextEmbeddingModel` and `ImageEmbeddingModel` singletons as the FastAPI app.
- Directly interacts with the models for vector generation.
- Optimized for testing without intermediate file storage.

## 1. Setup Environment

In [None]:
import os
import sys
import torch
import numpy as np
from pathlib import Path
from PIL import Image
from io import BytesIO
import httpx

# Add project root to path for imports
project_root = Path(os.getcwd()).parent.parent
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

from src.app.utils.embedding_model import TextEmbeddingModel, ImageEmbeddingModel
from src.app.config.settings import settings

print(f"‚úÖ Environment Ready. Project Root: {project_root}")

## 2. Text Embedding Laboratory

In [None]:
# Load Text Model (SentenceTransformer)
text_model = TextEmbeddingModel.get_instance()

def get_text_vector(text: str):
    return text_model.encode(text).tolist()

sample_query = "Veterinary surgical forceps for small cats"
vector = get_text_vector(sample_query)
print(f"Generated vector for '{sample_query}' (Size: {len(vector)})")
print(f"Preview (first 5): {vector[:5]}")

## 3. Vision Embedding Laboratory (CLIP)

In [None]:
# Load Image Model (CLIP)
clip_data = ImageEmbeddingModel.get_instance()
model = clip_data["model"]
processor = clip_data["processor"]
device = clip_data["device"]

def get_image_vector(image_input):
    if isinstance(image_input, str) and image_input.startswith('http'):
        response = httpx.get(image_input)
        img = Image.open(BytesIO(response.content))
    else:
        img = Image.open(image_input)
    
    if img.mode != 'RGB':
        img = img.convert('RGB')
        
    with torch.no_grad():
        inputs = processor(images=img, return_tensors="pt", padding=True)
        inputs = {k: v.to(device) for k, v in inputs.items()}
        outputs = model.get_image_features(**inputs)
        
        # Handle structured output
        if hasattr(outputs, "image_embeds"):
            features = outputs.image_embeds
        elif hasattr(outputs, "pooler_output"):
            features = outputs.pooler_output
        else:
            features = outputs
            
        features = features.cpu().numpy()
        norm = np.linalg.norm(features)
        features = features / (norm + 1e-12)
        
    return features.flatten().tolist()

sample_image_url = "https://www.gervetusa.com/up_data/products/images/medium/1-110-14.jpg"
try:
    img_vector = get_image_vector(sample_image_url)
    print(f"Generated image vector (Size: {len(img_vector)})")
    print(f"Preview (first 5): {img_vector[:5]}")
except Exception as e:
    print(f"‚ùå Could not process image: {e}")

## 4. Similarity Testing

In [None]:
def cosine_similarity(v1, v2):
    return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))

print("Utility loaded: cosine_similarity(v1, v2)")