# Lexy client

In [1]:
from lexy_py.client import LexyClient

lexy = LexyClient()

In [2]:
import inspect
import pytest

# Collections

### List collections

In [3]:
lexy.collection.list_collections()

[<Collection('default', description=Default collection)>,
 <Collection('code', description=Github code repos)>,
 <Collection('texts', description=Text messages)>]

### Get collection

In [4]:
default_collection = lexy.collection.get_collection('default')
default_collection

<Collection('default', description=Default collection)>

In [5]:
default_docs = default_collection.list_documents()
default_docs[:5]

[<Document("This is my first document! It's great!", ddbaef54-abee-48b2-b1d6-45d1d0f65812)>,
 <Document("Meh. This one is just ok.", 732492b3-3c1e-4692-9693-ce4904eb5691)>,
 <Document("Starlink is a satellite internet constellation operated by American aerospace company SpaceX,...", 4d605d42-94df-423b-bb96-ee41c521760f)>,
 <Document("A latent space, also known as a latent feature space or embedding space, is an embedding of a set...", fa15ee1a-a546-4cb4-9699-4457cf0a9f6f)>,
 <Document("ray is an open source framework", 0375fa39-fa05-4547-a05e-c5cab8980580)>]

In [6]:
doc = default_docs[0]
doc

<Document("This is my first document! It's great!", ddbaef54-abee-48b2-b1d6-45d1d0f65812)>

### Create a new collection

In [7]:
from lexy_py.collection.models import Collection

In [8]:
print(inspect.signature(Collection))

(*, collection_id: lexy_py.collection.models.ConstrainedStrValue, description: Optional[str] = None, created_at: Optional[datetime.datetime] = None, updated_at: Optional[datetime.datetime] = None) -> None


In [9]:
collection_without_a_client = Collection(collection_id='notbeingcreated')
collection_without_a_client

<Collection('notbeingcreated', description=None)>

In [10]:
with pytest.raises(ValueError) as exc_info:
    collection_without_a_client.list_documents()
print(exc_info)
assert isinstance(exc_info.value, ValueError)
assert str(exc_info.value) == "API client has not been set."

<ExceptionInfo ValueError('API client has not been set.') tblen=3>


In [11]:
new_collection = lexy.collection.add_collection(collection_id='junk', description='just testing for now')
new_collection

<Collection('junk', description=just testing for now)>

In [12]:
lexy.collection.list_collections()

[<Collection('default', description=Default collection)>,
 <Collection('code', description=Github code repos)>,
 <Collection('texts', description=Text messages)>,
 <Collection('junk', description=just testing for now)>]

In [13]:
new_collection.list_documents()

[]

In [14]:
lexy.collection.delete_collection('junk')

{'Say': 'Collection deleted!'}

In [15]:
lexy.collection.list_collections()

[<Collection('default', description=Default collection)>,
 <Collection('code', description=Github code repos)>,
 <Collection('texts', description=Text messages)>]

# Documents

### List documents

In [16]:
lexy.document.list_documents()[:5]

[<Document("This is my first document! It's great!", ddbaef54-abee-48b2-b1d6-45d1d0f65812)>,
 <Document("Meh. This one is just ok.", 732492b3-3c1e-4692-9693-ce4904eb5691)>,
 <Document("Starlink is a satellite internet constellation operated by American aerospace company SpaceX,...", 4d605d42-94df-423b-bb96-ee41c521760f)>,
 <Document("A latent space, also known as a latent feature space or embedding space, is an embedding of a set...", fa15ee1a-a546-4cb4-9699-4457cf0a9f6f)>,
 <Document("ray is an open source framework", 0375fa39-fa05-4547-a05e-c5cab8980580)>]

In [17]:
code_docs = lexy.document.list_documents('code')
code_docs

[<Document("import this", 20984c80-2a3c-475d-af59-45864762fc73)>,
 <Document("def multiply(a, b): return a * bif __name__ == '__main__': print(multiply(2, 3))", 1a9317e5-0d1f-4c7f-b731-42bddf0f4c98)>]

In [18]:
code_doc = code_docs[0]
code_doc

<Document("import this", 20984c80-2a3c-475d-af59-45864762fc73)>

In [19]:
assert isinstance(code_doc.client, LexyClient)

### Get document

In [20]:
sample_doc = lexy.document.get_document('20984c80-2a3c-475d-af59-45864762fc73')
sample_doc

<Document("import this", 20984c80-2a3c-475d-af59-45864762fc73)>

In [21]:
sample_doc.dict()

{'document_id': '20984c80-2a3c-475d-af59-45864762fc73',
 'content': 'import this',
 'meta': {'filename': 'main.py', 'language': 'python', 'file_extension': 'py'},
 'created_at': datetime.datetime(2023, 11, 3, 21, 40, 23, 948372, tzinfo=datetime.timezone.utc),
 'updated_at': datetime.datetime(2023, 11, 3, 21, 40, 23, 948372, tzinfo=datetime.timezone.utc),
 'collection_id': 'code'}

In [22]:
assert isinstance(sample_doc.client, LexyClient)

### Create new document

In [23]:
from lexy_py.document.models import Document

In [24]:
print(inspect.signature(Document))

(content: str, *, document_id: Optional[str] = None, meta: Optional[dict[Any, Any]] = {}, created_at: Optional[datetime.datetime] = None, updated_at: Optional[datetime.datetime] = None, collection_id: Optional[str] = None) -> None


In [25]:
Document("this is a new doc")

<Document("this is a new doc", None)>

In [26]:
new_doc_response = lexy.document.add_documents([
    {'content': 'This is my shiny new document!'}
])
new_doc_response

[{'document': {'updated_at': '2023-11-08T01:02:01.015371+00:00',
   'created_at': '2023-11-08T01:02:01.015371+00:00',
   'document_id': 'afe30284-6d0e-4a39-af91-e12a22d97626',
   'meta': {},
   'content': 'This is my shiny new document!',
   'collection_id': 'default'},
  'tasks': [{'task_id': 'b09250d0-9390-4b9d-a5dd-92e6be0cc33a',
    'document_id': 'afe30284-6d0e-4a39-af91-e12a22d97626'},
   {'task_id': '20b854b0-1e70-489a-bc6c-ff9516450599',
    'document_id': 'afe30284-6d0e-4a39-af91-e12a22d97626'}]}]

In [27]:
new_doc = Document(**new_doc_response[0]['document'], client=lexy)
new_doc

<Document("This is my shiny new document!", afe30284-6d0e-4a39-af91-e12a22d97626)>

In [28]:
lexy.document.delete_document(document_id=new_doc.document_id)

{'Say': 'Document deleted!'}

# Indexes

### List indexes

In [29]:
lexy.index.list_indexes()

[<Index('default_text_embeddings', description='Text embeddings for default collection')>,
 <Index('word_counts', description='Word counts')>,
 <Index('ex_index', description='Text and metadata')>]

### Get index

In [30]:
idx = lexy.index.get_index('default_text_embeddings')
idx

<Index('default_text_embeddings', description='Text embeddings for default collection')>

In [31]:
idx.query('hello world')

[{'document_id': '732492b3-3c1e-4692-9693-ce4904eb5691',
  'custom_id': None,
  'meta': {},
  'index_record_id': 'bf5d156b-7b1f-43ed-841f-725ea6cd9e8e',
  'document_content': 'Meh. This one is just ok.',
  'abs_distance': 1.2701034545898438,
  'distance': 1.2701034545898438,
  'text': 'Meh. This one is just ok.'},
 {'document_id': 'ddbaef54-abee-48b2-b1d6-45d1d0f65812',
  'custom_id': None,
  'meta': {},
  'index_record_id': '36b9a917-9cb7-479f-a7bd-84a8f12d3b94',
  'document_content': "This is my first document! It's great!",
  'abs_distance': 1.2820980548858643,
  'distance': 1.2820980548858643,
  'text': "This is my first document! It's great!"},
 {'document_id': '0375fa39-fa05-4547-a05e-c5cab8980580',
  'custom_id': None,
  'meta': {},
  'index_record_id': 'fc0d3d54-2717-4f1c-ade9-4bd0e7976a78',
  'document_content': 'ray is an open source framework',
  'abs_distance': 1.3046300411224365,
  'distance': 1.3046300411224365,
  'text': 'ray is an open source framework'},
 {'document_id

### Create new index

In [32]:
from lexy_py.index.models import Index

In [33]:
print(inspect.signature(Index))

(*, index_id: lexy_py.index.models.ConstrainedStrValue, description: Optional[str] = None, index_table_schema: Optional[dict[str, Any]] = {}, index_fields: Optional[dict[str, Any]] = {}, created_at: Optional[datetime.datetime] = None, updated_at: Optional[datetime.datetime] = None, index_table_name: Optional[str] = None) -> None


In [34]:
# TODO: implement this after setting up mock (do not run against live server)

# Bindings

### List bindings

In [35]:
lexy.binding.list_bindings()

[<TransformerIndexBinding id=1, status=ON, collection_id=default, transformer_id=text.embeddings.minilm, index_id=default_text_embeddings>,
 <TransformerIndexBinding id=2, status=ON, collection_id=default, transformer_id=text.counter.word_counter, index_id=word_counts>,
 <TransformerIndexBinding id=3, status=ON, collection_id=texts, transformer_id=text.counter.word_counter, index_id=ex_index>]

### Get binding

In [36]:
binding = lexy.binding.get_binding(1)
binding

<TransformerIndexBinding id=1, status=ON, collection_id=default, transformer_id=text.embeddings.minilm, index_id=default_text_embeddings>

In [37]:
binding.collection

<Collection('default', description=Default collection)>

In [38]:
binding.collection.description

'Default collection'

In [39]:
binding.transformer

<Transformer('text.embeddings.minilm', description='Text embeddings using Hugging Face model 'sentence-transformers/all-MiniLM-L6-v2'')>

In [40]:
binding.index

<Index('default_text_embeddings', description='Text embeddings for default collection')>

In [41]:
binding.index.description

'Text embeddings for default collection'

In [42]:
binding.index.query('hi')

[{'document_id': 'ddbaef54-abee-48b2-b1d6-45d1d0f65812',
  'custom_id': None,
  'meta': {},
  'index_record_id': '36b9a917-9cb7-479f-a7bd-84a8f12d3b94',
  'document_content': "This is my first document! It's great!",
  'abs_distance': 1.2729943990707397,
  'distance': 1.2729943990707397,
  'text': "This is my first document! It's great!"},
 {'document_id': '0375fa39-fa05-4547-a05e-c5cab8980580',
  'custom_id': None,
  'meta': {},
  'index_record_id': 'fc0d3d54-2717-4f1c-ade9-4bd0e7976a78',
  'document_content': 'ray is an open source framework',
  'abs_distance': 1.2912185192108154,
  'distance': 1.2912185192108154,
  'text': 'ray is an open source framework'},
 {'document_id': '732492b3-3c1e-4692-9693-ce4904eb5691',
  'custom_id': None,
  'meta': {},
  'index_record_id': 'bf5d156b-7b1f-43ed-841f-725ea6cd9e8e',
  'document_content': 'Meh. This one is just ok.',
  'abs_distance': 1.3080474138259888,
  'distance': 1.3080474138259888,
  'text': 'Meh. This one is just ok.'},
 {'document_id

In [43]:
binding.collection.list_documents()[:5]

[<Document("This is my first document! It's great!", ddbaef54-abee-48b2-b1d6-45d1d0f65812)>,
 <Document("Meh. This one is just ok.", 732492b3-3c1e-4692-9693-ce4904eb5691)>,
 <Document("Starlink is a satellite internet constellation operated by American aerospace company SpaceX,...", 4d605d42-94df-423b-bb96-ee41c521760f)>,
 <Document("A latent space, also known as a latent feature space or embedding space, is an embedding of a set...", fa15ee1a-a546-4cb4-9699-4457cf0a9f6f)>,
 <Document("ray is an open source framework", 0375fa39-fa05-4547-a05e-c5cab8980580)>]

In [44]:
assert isinstance(binding.collection.client, LexyClient)

### Create new binding

In [45]:
from lexy_py.binding.models import TransformerIndexBinding

In [46]:
print(inspect.signature(TransformerIndexBinding))

(*, binding_id: Optional[int] = None, collection_id: Optional[str] = None, transformer_id: Optional[str] = None, index_id: Optional[str] = None, created_at: Optional[datetime.datetime] = None, updated_at: Optional[datetime.datetime] = None, description: Optional[str] = None, execution_params: Optional[dict[str, Any]] = {}, transformer_params: Optional[dict[str, Any]] = {}, filters: Optional[dict] = {}, status: Optional[lexy_py.binding.models.BindingStatus] = <BindingStatus.PENDING: 'pending'>, collection: Optional[lexy_py.collection.models.CollectionModel] = None, transformer: Optional[lexy_py.transformer.models.TransformerModel] = None, index: Optional[lexy_py.index.models.IndexModel] = None) -> None


In [47]:
# TODO: implement this after setting up mock (do not run against live server)