In [7]:
from typing import List
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_community.vectorstores import Weaviate
from weaviate import Client

In [8]:
embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5") # which is also default

Fetching 9 files: 100%|██████████| 9/9 [00:00<00:00, 71358.67it/s]


In [30]:
## init client
client = Client(url="http://localhost:8080")
client.batch.configure(batch_size=25)

            Consider upgrading to the new and improved v4 client instead!
            See here for usage: https://weaviate.io/developers/weaviate/client-libraries/python
            


<weaviate.batch.crud_batch.Batch at 0x12f06ad10>

In [10]:
client.schema.get()

{'classes': [{'class': 'Document',
   'description': 'This is a class to store Document chunks',
   'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2},
    'cleanupIntervalSeconds': 60,
    'stopwords': {'additions': None, 'preset': 'en', 'removals': None}},
   'multiTenancyConfig': {'enabled': False},
   'properties': [{'dataType': ['text'],
     'description': 'Chunk of document content',
     'indexFilterable': False,
     'indexSearchable': False,
     'name': 'text',
     'tokenization': 'word'},
    {'dataType': ['text'],
     'description': 'topic of this document',
     'indexFilterable': True,
     'indexSearchable': False,
     'name': 'topic',
     'tokenization': 'word'},
    {'dataType': ['text'],
     'description': 'title of this document',
     'indexFilterable': False,
     'indexSearchable': True,
     'name': 'doc_name',
     'tokenization': 'word'},
    {'dataType': ['text'],
     'description': 'location of this document',
     'indexFilterable': False,
     'i

In [157]:
def semantic_search_with_filter(client, query_embedding, filter_value, top_k=3):
    response = (
        client.query
        .get("Document", ["text", "topic", "source"])
        .with_near_vector({"vector": query_embedding})    # the user input query_embedding. If we want to specify our own embedding model without using Weaviate's module, we must use this
        .with_where({                                       # the filter on inverted index
            "path": ["topic"],
            "operator": "Equal",
            "valueText": filter_value
        })
        .with_additional(["score", "explainScore", "distance"])
        .with_limit(top_k)
        .do()
    )
    return response

In [158]:
response = semantic_search_with_filter(client, embeddings.embed_query("Who is Furina"), "characters")

In [159]:
import json
print(json.dumps(response, indent=4))

{
    "data": {
        "Get": {
            "Document": [
                {
                    "_additional": {
                        "distance": 0.24088764,
                        "explainScore": "",
                        "score": "0"
                    },
                    "source": "../data/characters/Furina.txt",
                    "text": "Furina Furina de Fontaine[1][2] is a playable Hydro character in Genshin Impact who can alternate between Pneuma and Ousia alignments.",
                    "topic": "characters"
                },
                {
                    "_additional": {
                        "distance": 0.24376959,
                        "explainScore": "",
                        "score": "0"
                    },
                    "source": "../data/characters/Furina.txt",
                    "text": "Appearance See also: Coronated Prima Donna Furina is a fair-skinned girl who uses the medium female model. Her eyes are heterochromatic in shades

In [160]:
response = semantic_search_with_filter(client, embeddings.embed_query("Who is Furina"), "region")

In [161]:
print(json.dumps(response, indent=4))

{
    "data": {
        "Get": {
            "Document": [
                {
                    "_additional": {
                        "distance": 0.3319096,
                        "explainScore": "",
                        "score": "0"
                    },
                    "source": "../data/region/Fontaine.txt",
                    "text": "Furina's birthday is a public holiday in Fontaine; although no longer the ruling Hydro Archon, her birthday remains a holiday. [22]",
                    "topic": "region"
                },
                {
                    "_additional": {
                        "distance": 0.41404307,
                        "explainScore": "",
                        "score": "0"
                    },
                    "source": "../data/region/Fontaine.txt",
                    "text": "~499 years later, she was sentenced to death via the Oratrice, and has willingly given up her full authority as the Hydro Archon to the Hydro Dragon Sovereig

In [165]:
del client