#### Fireworks AI Embedding Techniques

Converting text into vectors using Fireworks AI API


In [None]:
import os
from dotenv import load_dotenv
import requests
import json

# Load environment variables
load_dotenv()

In [None]:
# Set up Fireworks API configuration
FIREWORKS_API_KEY = os.getenv("FIREWORKS_API_KEY")
FIREWORKS_API_BASE = "https://api.fireworks.ai/inference/v1"

print(f"API Key loaded: {'Yes' if FIREWORKS_API_KEY else 'No'}")
print(f"API Base URL: {FIREWORKS_API_BASE}")

In [None]:
def get_fireworks_embedding(text, model="nomic-ai/nomic-embed-text-v1.5"):
    """
    Get embeddings from Fireworks AI API
    
    Args:
        text (str): Text to embed
        model (str): Model to use for embeddings
    
    Returns:
        list: Embedding vector
    """
    url = f"{FIREWORKS_API_BASE}/embeddings"
    
    headers = {
        "Authorization": f"Bearer {FIREWORKS_API_KEY}",
        "Content-Type": "application/json"
    }
    
    payload = {
        "model": model,
        "input": text
    }
    
    try:
        response = requests.post(url, headers=headers, json=payload)
        response.raise_for_status()
        
        result = response.json()
        return result["data"][0]["embedding"]
    
    except requests.exceptions.RequestException as e:
        print(f"Error making request: {e}")
        return None
    except KeyError as e:
        print(f"Error parsing response: {e}")
        print(f"Response: {response.text}")
        return None

In [None]:
# Generate embedding for "hello world"
text = "hello world"
print(f"Generating embedding for: '{text}'")

embedding = get_fireworks_embedding(text)

if embedding:
    print(f"\nEmbedding generated successfully!")
    print(f"Embedding dimensions: {len(embedding)}")
    print(f"First 10 values: {embedding[:10]}")
    print(f"Last 10 values: {embedding[-10:]}")
else:
    print("Failed to generate embedding")

In [None]:
# Alternative approach using LangChain with Fireworks (if supported)
# Note: This might not work as Fireworks may not have direct LangChain integration for embeddings

try:
    from langchain_community.embeddings import FireworksEmbeddings
    
    # Initialize Fireworks embeddings
    fireworks_embeddings = FireworksEmbeddings(
        api_key=FIREWORKS_API_KEY,
        model="nomic-ai/nomic-embed-text-v1.5"
    )
    
    # Generate embedding
    langchain_embedding = fireworks_embeddings.embed_query("hello world")
    
    print(f"\nLangChain Fireworks Embedding:")
    print(f"Dimensions: {len(langchain_embedding)}")
    print(f"First 10 values: {langchain_embedding[:10]}")
    
except ImportError:
    print("\nLangChain Fireworks embeddings not available. Using direct API approach.")
except Exception as e:
    print(f"\nError with LangChain approach: {e}")

In [None]:
# Compare multiple texts
texts = [
    "hello world",
    "goodbye world", 
    "machine learning",
    "artificial intelligence"
]

print("Generating embeddings for multiple texts...\n")

embeddings_dict = {}
for text in texts:
    embedding = get_fireworks_embedding(text)
    if embedding:
        embeddings_dict[text] = embedding
        print(f"✓ '{text}': {len(embedding)} dimensions")
    else:
        print(f"✗ Failed to get embedding for '{text}'")

print(f"\nSuccessfully generated {len(embeddings_dict)} embeddings")

In [None]:
# Calculate similarity between embeddings (cosine similarity)
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

if len(embeddings_dict) >= 2:
    print("\nCalculating cosine similarities:")
    
    text_list = list(embeddings_dict.keys())
    embedding_matrix = np.array(list(embeddings_dict.values()))
    
    # Calculate cosine similarity matrix
    similarity_matrix = cosine_similarity(embedding_matrix)
    
    # Print similarity scores
    for i, text1 in enumerate(text_list):
        for j, text2 in enumerate(text_list):
            if i < j:  # Only print upper triangle to avoid duplicates
                similarity = similarity_matrix[i][j]
                print(f"'{text1}' vs '{text2}': {similarity:.4f}")
else:
    print("\nNeed at least 2 embeddings to calculate similarity")

In [None]:
# Save embeddings to file (optional)
import pickle

if embeddings_dict:
    # Save as pickle file
    with open('fireworks_embeddings.pkl', 'wb') as f:
        pickle.dump(embeddings_dict, f)
    
    # Save as JSON file
    with open('fireworks_embeddings.json', 'w') as f:
        json.dump(embeddings_dict, f, indent=2)
    
    print("\nEmbeddings saved to:")
    print("- fireworks_embeddings.pkl")
    print("- fireworks_embeddings.json")
else:
    print("\nNo embeddings to save")