# Embeddings Benchmark

This notebook provides a simple way to compare embeddings between two texts using the DeepInfra API.

In [None]:
# Import necessary libraries
import os
import numpy as np
import httpx
import matplotlib.pyplot as plt
from typing import List, Optional
from dotenv import load_dotenv
import asyncio

# Load environment variables if you have a .env file
load_dotenv()

In [None]:
# Simple implementation of embeddings client without batching
class SimpleDeepInfraEmbedder:
    """
    A simplified client for obtaining embeddings from the DeepInfra API.
    This version doesn't include batching or advanced features.
    """
    
    def __init__(
        self,
        api_key: str,
        base_url: str = "https://api.deepinfra.com/v1/inference/BAAI/bge-en-icl",
    ):
        self.base_url = base_url
        self.api_key = api_key
        self.client = httpx.AsyncClient(timeout=60, verify=False)
    
    async def get_embedding(self, text: str) -> List[float]:
        """
        Get embeddings for a single text.
        """
        payload = {
            "inputs": [text],
            "normalize": True,
        }
        
        response = await self.client.post(
            self.base_url,
            json=payload,
            headers={
                "Content-Type": "application/json",
                "Authorization": f"Bearer {self.api_key}",
            },
        )
        response.raise_for_status()
        
        json_response = response.json()
        return json_response["embeddings"][0]
    
    async def close(self):
        await self.client.aclose()

In [None]:
# Helper functions for comparing embeddings
def cosine_similarity(vec1, vec2):
    """
    Calculate cosine similarity between two vectors.
    """
    dot_product = np.dot(vec1, vec2)
    norm1 = np.linalg.norm(vec1)
    norm2 = np.linalg.norm(vec2)
    return dot_product / (norm1 * norm2)

def euclidean_distance(vec1, vec2):
    """
    Calculate Euclidean distance between two vectors.
    """
    return np.linalg.norm(np.array(vec1) - np.array(vec2))

async def compare_texts(embedder, text1, text2):
    """
    Compare two texts by computing their embeddings and similarity metrics.
    """
    try:
        # Get embeddings for both texts
        embedding1 = await embedder.get_embedding(text1)
        embedding2 = await embedder.get_embedding(text2)
        
        # Calculate similarity metrics
        similarity = cosine_similarity(embedding1, embedding2)
        distance = euclidean_distance(embedding1, embedding2)
        
        return {
            "embedding1": embedding1,
            "embedding2": embedding2,
            "cosine_similarity": similarity,
            "euclidean_distance": distance
        }
    except Exception as e:
        print(f"Error comparing texts: {e}")
        return None

In [None]:
# Initialize the embedder with your API key
api_key = os.environ.get("DEEPINFRA_API_KEY")
if not api_key:
    api_key = input("Enter your DeepInfra API key: ")

embedder = SimpleDeepInfraEmbedder(api_key=api_key)

In [None]:
# Example usage: Compare two texts
text1 = "cat"
text2 = "tiger"

# For Jupyter, we need to run async functions with asyncio
results = await compare_texts(embedder, text1, text2)

print(f"Cosine Similarity: {results['cosine_similarity']:.4f}")
print(f"Euclidean Distance: {results['euclidean_distance']:.4f}")