In [None]:
# default_exp llm

# LLM Embedding Support

> It provides some mocks from LLM.

This module provides text embedding capabilities for Large Language Models, specifically supporting Chinese text processing.

## Overview

The LLM module offers:
- **Text Embeddings**: Generate text embeddings using SentenceTransformer models
- **Chinese Language Support**: Optimized for Chinese text with GanymedeNil/text2vec-large-chinese
- **OpenAI API Mocking**: Mock OpenAI embedding APIs for testing purposes
- **PyTest Integration**: Fixtures for embedding-related testing

## Key Features

- High-quality Chinese text embeddings
- OpenAI Embedding API compatibility
- Vector similarity operations
- Test fixture support for embeddings

In [None]:
# exporti
from sentence_transformers import SentenceTransformer
embeddings_model = SentenceTransformer('GanymedeNil/text2vec-large-chinese')

## Embedding Model Setup

Initialize the Chinese text embedding model for vector representations.

In [None]:
# export
import ipymock.browser

def mock_embed_create(**kwargs):
    texts = kwargs['input']
    if isinstance(texts, str):
        texts = [texts]
    data = []
    for idx, embedding in enumerate(embeddings_model.encode(texts)):
        data.append({
            'object': 'embedding',
            'embedding': embedding,
            'index': idx,
        })
    return ipymock.browser.attributize({
        'object': 'list',
        'data': data,
        'usage': {
            'prompt_tokens': 0,
            'total_tokens': 0,
        },
    })

## OpenAI Embedding API Mock

Mock implementation of OpenAI's embedding API using the Chinese text model.

In [None]:
# export
import openai, pytest

@pytest.fixture
def mock_openai_embed(monkeypatch):
    monkeypatch.setattr(openai.Embedding, 'create', mock_embed_create)