# Testing services

## `_01_embeddings`

In [3]:
import requests
import json

BASE_URL = "http://localhost:8000"

def test_single_text_embedding():
    """Test embedding a single text string."""
    url = f"{BASE_URL}/embeddings"
    payload = {
        "texts": "This is a sample text for embedding.",
        "model": "text-embedding-ada-002",  # Use default model if this doesn't work
        "dimensions": None,  # Optional: specify a smaller dimension if needed
        "validate": True
    }
    
    response = requests.post(url, json=payload)
    
    print(f"Status Code: {response.status_code}")
    if response.status_code == 200:
        result = response.json()
        print("Model used:", result["model"])
        print("Embedding dimensions:", len(result["data"]))
        print("First few values:", result["data"][:5])
    else:
        print("Error:", response.text)
    
    return response

test_single_text_embedding()

Status Code: 200
Model used: text-embedding-ada-002
Embedding dimensions: 1536
First few values: [-0.023076197132468224, 0.0004667885659728199, -0.0064190831035375595, -0.0013034531148150563, 0.000989439431577921]


<Response [200]>

In [1]:
"""Test script for the Embeddings API using Python requests."""

import requests
import json

BASE_URL = "http://localhost:8000"

def test_single_text_embedding():
    """Test embedding a single text string."""
    url = f"{BASE_URL}/embeddings"
    payload = {
        "texts": "This is a sample text for embedding.",
        "model": "text-embedding-ada-002",  # Use default model if this doesn't work
        "dimensions": None,  # Optional: specify a smaller dimension if needed
        "validate": True
    }
    
    response = requests.post(url, json=payload)
    
    print(f"Status Code: {response.status_code}")
    if response.status_code == 200:
        result = response.json()
        print("Model used:", result["model"])
        print("Embedding dimensions:", len(result["data"]))
        print("First few values:", result["data"][:5])
    else:
        print("Error:", response.text)
    
    return response


def test_multiple_texts_embedding():
    """Test embedding multiple text strings."""
    url = f"{BASE_URL}/embeddings"
    payload = {
        "texts": [
            "First sample text for embedding.",
            "Second sample with different content.",
            "Third example with unique words."
        ],
        "batch_size": 10  # Small batch size for testing
    }
    
    response = requests.post(url, json=payload)
    
    print(f"\nStatus Code: {response.status_code}")
    if response.status_code == 200:
        result = response.json()
        print("Model used:", result["model"])
        print("Number of embeddings:", len(result["data"]))
        print("Dimensions of first embedding:", len(result["data"][0]))
    else:
        print("Error:", response.text)
    
    return response


def test_dict_texts_embedding():
    """Test embedding a dictionary of text strings."""
    url = f"{BASE_URL}/embeddings"
    payload = {
        "texts": {
            "doc1": "First document with some content.",
            "doc2": "Second document with different content.",
            "doc3": "Third document with more variety."
        }
    }
    
    response = requests.post(url, json=payload)
    
    print(f"\nStatus Code: {response.status_code}")
    if response.status_code == 200:
        result = response.json()
        print("Model used:", result["model"])
        print("Number of keys in response:", len(result["data"]))
        print("Keys in response:", list(result["data"].keys()))
    else:
        print("Error:", response.text)
    
    return response


def test_list_models():
    """Test retrieving available models."""
    url = f"{BASE_URL}/models"
    
    response = requests.get(url)
    
    print(f"\nStatus Code: {response.status_code}")
    if response.status_code == 200:
        models = response.json()
        print(f"Available models ({len(models)}):")
        for model in models:
            print(f"  - {model}")
    else:
        print("Error:", response.text)
    
    return response


def test_model_info():
    """Test retrieving model information."""
    model_name = "text-embedding-ada-002"  # Use a model you know exists
    url = f"{BASE_URL}/model-info/{model_name}"
    
    response = requests.get(url)
    
    print(f"\nStatus Code: {response.status_code}")
    if response.status_code == 200:
        info = response.json()
        print(f"Information for model '{model_name}':")
        for key, value in info.items():
            print(f"  {key}: {value}")
    else:
        print("Error:", response.text)
    
    return response


if __name__ == "__main__":
    print("Testing Embeddings API...")
    print("=========================")
    
    # Run the tests
    single_resp = test_single_text_embedding()
    multiple_resp = test_multiple_texts_embedding()
    dict_resp = test_dict_texts_embedding()
    models_resp = test_list_models()
    model_info_resp = test_model_info()
    
    print("\nAll tests completed!")

Testing Embeddings API...
Status Code: 200
Model used: text-embedding-ada-002
Embedding dimensions: 1536
First few values: [-0.02304857224225998, 0.0004295369435567409, -0.006391848437488079, -0.001323735574260354, 0.0010258103720843792]

Status Code: 200
Model used: text-embedding-3-small
Number of embeddings: 3
Dimensions of first embedding: 1536

Status Code: 200
Model used: text-embedding-3-small
Number of keys in response: 3
Keys in response: ['doc1', 'doc2', 'doc3']

Status Code: 200
Available models (54):
  - gpt-4.5-preview
  - gpt-4.5-preview-2025-02-27
  - gpt-4o-mini-2024-07-18
  - gpt-4-0125-preview
  - gpt-4o-mini-audio-preview-2024-12-17
  - dall-e-3
  - dall-e-2
  - gpt-4-turbo-preview
  - gpt-4o-audio-preview-2024-10-01
  - gpt-4o-audio-preview
  - gpt-4o-mini-realtime-preview-2024-12-17
  - gpt-4o-mini-realtime-preview
  - o1-mini-2024-09-12
  - o1-mini
  - omni-moderation-latest
  - gpt-4o-mini-audio-preview
  - omni-moderation-2024-09-26
  - whisper-1
  - gpt-4o-real

# Composite store

In [None]:
from typing import Mapping, MutableMapping
from dataclasses import dataclass

metas = dict()
blobs = dict()

@dataclass
class Contents():
    metas: MutableMapping
    


In [None]:
import oa

oa.embeddings




<function oa.base.embeddings(texts: Union[str, Iterable[str], Mapping[~KT, str]], *, input: 'Union[str, List[str], Iterable[int], Iterable[Iterable[int]]]', batch_size: Optional[int] = 2048, egress: Optional[str] = None, batch_callback: Optional[Callable[[int, List[list]], Any]] = None, validate: Union[bool, Callable, NoneType] = True, valid_text_getter=<function _raise_if_any_invalid at 0x1171e3490>, model='text-embedding-3-small', client=None, dimensions: Optional[int] = NOT_GIVEN, encoding_format: "Literal['float', 'base64'] | NotGiven" = NOT_GIVEN, user: 'str | NotGiven' = NOT_GIVEN, extra_headers: 'Headers | None' = None, extra_query: 'Query | None' = None, extra_body: 'Body | None' = None, timeout: 'float | httpx.Timeout | None | NotGiven' = NOT_GIVEN)>

# Scrap: Mongodol work

In [1]:
import mongodol 

dir(mongodol)

['MongoClientReader',
 'MongoCollectionFirstDocPersister',
 'MongoCollectionFirstDocReader',
 'MongoCollectionMultipleDocsPersister',
 'MongoCollectionMultipleDocsReader',
 'MongoCollectionPersister',
 'MongoCollectionReader',
 'MongoCollectionUniqueDocPersister',
 'MongoCollectionUniqueDocReader',
 'MongoDbReader',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'base',
 'constants',
 'get_mongo_collection_pymongo_obj',
 'get_test_collection_persister',
 'mk_dflt_mgc',
 'normalize_projection',
 'stores',
 'tests',
 'trans',
 'util']