In [None]:
# Python Example - OpenAI text-embedding-3-large

import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY= os.getenv("OPENAI_API_KEY","")
import openai
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Initialize OpenAI client
client = openai.OpenAI(api_key=OPENAI_API_KEY)

def get_embedding(text, model="text-embedding-3-large"):
    """Get embedding for a single text"""
    response = client.embeddings.create(
        input=text,
        model=model
    )
    return response.data[0].embedding

def get_embeddings_batch(texts: list[str], model="text-embedding-3-large"):
    """Get embeddings for multiple texts in batch"""
    response = client.embeddings.create(
        input=texts,
        model=model
    )
    return [item.embedding for item in response.data]

def calculate_similarity(text1, text2):
    """Calculate cosine similarity between two texts"""
    embedding1 = get_embedding(text1)
    embedding2 = get_embedding(text2)
    
    # Convert to numpy arrays and reshape for cosine_similarity
    emb1 = np.array(embedding1).reshape(1, -1)
    emb2 = np.array(embedding2).reshape(1, -1)
    
    similarity = cosine_similarity(emb1, emb2)[0][0]
    return similarity

# Example 1: Basic embedding
text = "The weather is beautiful today"
embedding = get_embedding(text)
print(f"Embedding dimension: {len(embedding)}")
print(f"First 5 values: {embedding[:5]}")

# Example 2: Batch processing
documents = [
    "Python is a programming language",
    "JavaScript is used for web development",
    "Machine learning helps computers learn patterns",
    "Natural language processing analyzes text"
]

embeddings = get_embeddings_batch(documents)
print(f"Generated {len(embeddings)} embeddings")

# Example 3: Semantic similarity search
query = "What programming languages are popular?"
query_embedding = get_embedding(query)

# Calculate similarities with documents
similarities = []
for i, doc_embedding in enumerate(embeddings):
    query_emb = np.array(query_embedding).reshape(1, -1)
    doc_emb = np.array(doc_embedding).reshape(1, -1)
    similarity = cosine_similarity(query_emb, doc_emb)[0][0]
    similarities.append((i, similarity, documents[i]))

# Sort by similarity (highest first)
similarities.sort(key=lambda x: x[1], reverse=True)

print(f"\nQuery: {query}")
print("Most similar documents:")
for rank, (idx, sim, doc) in enumerate(similarities, 1):
    print(f"{rank}. {doc} (similarity: {sim:.4f})")

# Example 4: Simple RAG-like document retrieval
class SimpleDocumentRetriever:
    def __init__(self, documents):
        self.documents = documents
        self.embeddings = get_embeddings_batch(documents)
    
    def search(self, query, top_k=3):
        query_embedding = get_embedding(query)
        
        similarities = []
        for i, doc_embedding in enumerate(self.embeddings):
            query_emb = np.array(query_embedding).reshape(1, -1)
            doc_emb = np.array(doc_embedding).reshape(1, -1)
            similarity = cosine_similarity(query_emb, doc_emb)[0][0]
            similarities.append((similarity, i))
        
        # Get top-k most similar documents
        similarities.sort(reverse=True)
        results = []
        for sim, idx in similarities[:top_k]:
            results.append({
                'document': self.documents[idx],
                'similarity': sim,
                'index': idx
            })
        
        return results

# Usage example
knowledge_base = [
    "Paris is the capital of France and known for the Eiffel Tower",
    "Tokyo is Japan's capital and largest city",
    "Python is a versatile programming language used for data science",
    "Machine learning algorithms can predict future trends",
    "The Great Wall of China is a historic fortification"
]

retriever = SimpleDocumentRetriever(knowledge_base)
results = retriever.search("What is the capital of France?")

print("\nDocument retrieval results:")
for result in results:
    print(f"Similarity: {result['similarity']:.4f} - {result['document']}")

# JavaScript/Node.js Example
js_code = '''
// JavaScript/Node.js Example
const OpenAI = require('openai');

const openai = new OpenAI({
  apiKey: 'your-api-key-here'
});

async function getEmbedding(text, model = 'text-embedding-3-large') {
  try {
    const response = await openai.embeddings.create({
      model: model,
      input: text,
    });
    return response.data[0].embedding;
  } catch (error) {
    console.error('Error getting embedding:', error);
    throw error;
  }
}

async function getEmbeddingsBatch(texts, model = 'text-embedding-3-large') {
  try {
    const response = await openai.embeddings.create({
      model: model,
      input: texts,
    });
    return response.data.map(item => item.embedding);
  } catch (error) {
    console.error('Error getting embeddings:', error);
    throw error;
  }
}

function cosineSimilarity(vecA, vecB) {
  const dotProduct = vecA.reduce((sum, a, i) => sum + a * vecB[i], 0);
  const magnitudeA = Math.sqrt(vecA.reduce((sum, a) => sum + a * a, 0));
  const magnitudeB = Math.sqrt(vecB.reduce((sum, b) => sum + b * b, 0));
  return dotProduct / (magnitudeA * magnitudeB);
}

async function semanticSearch() {
  const documents = [
    "Python is a programming language",
    "JavaScript is used for web development", 
    "Machine learning helps computers learn patterns",
    "Natural language processing analyzes text"
  ];
  
  const query = "What programming languages are popular?";
  
  // Get embeddings
  const docEmbeddings = await getEmbeddingsBatch(documents);
  const queryEmbedding = await getEmbedding(query);
  
  // Calculate similarities
  const similarities = documents.map((doc, i) => ({
    document: doc,
    similarity: cosineSimilarity(queryEmbedding, docEmbeddings[i])
  }));
  
  // Sort by similarity
  similarities.sort((a, b) => b.similarity - a.similarity);
  
  console.log('Search Results:');
  similarities.forEach((result, i) => {
    console.log(`${i + 1}. ${result.document} (${result.similarity.toFixed(4)})`);
  });
}

// Usage
semanticSearch().catch(console.error);
'''

print("\n" + "="*50)
print("JavaScript/Node.js Code:")
print("="*50)
print(js_code)

Embedding dimension: 3072
First 5 values: [0.030305640771985054, -0.006016154307872057, -0.0005967720644548535, 0.0020290249958634377, 0.01048935018479824]
Generated 4 embeddings

Query: What programming languages are popular?
Most similar documents:
1. Python is a programming language (similarity: 0.4071)
2. JavaScript is used for web development (similarity: 0.3633)
3. Natural language processing analyzes text (similarity: 0.2094)
4. Machine learning helps computers learn patterns (similarity: 0.2084)

Document retrieval results:
Similarity: 0.6177 - Paris is the capital of France and known for the Eiffel Tower
Similarity: 0.3176 - Tokyo is Japan's capital and largest city
Similarity: 0.1162 - Python is a versatile programming language used for data science

JavaScript/Node.js Code:

// JavaScript/Node.js Example
const OpenAI = require('openai');

const openai = new OpenAI({
  apiKey: 'your-api-key-here'
});

async function getEmbedding(text, model = 'text-embedding-3-large') {
  try