In [None]:
from langchain_core.embeddings import Embeddings
from langchain.embeddings import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-MiniLM-L12-v2"

hf = HuggingFaceEmbeddings(model_name=model_name)


In [None]:
# Creating a langchain_core.VectorStore
# the default store name `allminilm` will be used
from infinispan_vector import Infinispan, InfinispanVS
ispn = Infinispan()
ispn_vs = InfinispanVS.from_texts(texts={}, embedding=hf, ispn=ispn,
                                 configuration={"lambda.content": lambda item: item["_key"], "cache_name": "sentence_demo_cache", "entity_name" : "sentence_demo_vec"})

In [None]:
# Create the protobuf schema for vector
import json
schema_vector = '''
/**
 * @Indexed
 */
message sentence_demo_vec {
/**
 * @Vector(dimension=384)
 */
repeated float floatVector = 1;
optional int32 _key = 2;
}
'''
output = ispn.req_schema_delete("sentence_demo.proto")
output = ispn.req_schema_post("sentence_demo.proto",schema_vector)
print(output.text)
assert output.status_code == 200
assert json.loads(output.text)["error"] == None

In [None]:
# Create the protobuf schema for content
schema = '''
message sentence {
optional string title = 1;
optional string description = 2;
}
'''
output = ispn.req_schema_post("sentence.proto",schema)
print(output.text)
assert output.status_code == 200
assert json.loads(output.text)["error"] == None

In [None]:
# Creating an Infinispan cache to store vectors

cache_def = '''
{
  "distributed-cache": {
    "owners": "2",
    "mode": "SYNC",
    "statistics": true,
    "encoding": {
      "media-type": "application/x-protostream"
    },
    "indexing": {
      "enabled": true,
      "storage": "filesystem",
      "startup-mode": "AUTO",
      "indexing-mode": "AUTO",
      "indexed-entities": [
        "sentence_demo_vec"
      ]
    }
  }
}
'''
ispn.req_cache_post("sentence_demo_cache",cache_def)
ispn.req_cache_clear("sentence_demo_cache")

In [None]:
# Creating an Infinispan cache to store content

cache_def = '''
{
  "distributed-cache": {
    "owners": "2",
    "mode": "SYNC",
    "statistics": true,
    "encoding": {
      "media-type": "application/x-protostream"
    }
  }
}
'''
ispn.req_cache_post("sentence",cache_def)
ispn.req_cache_clear("sentence")

In [None]:
# Adding some data from rnd_sentences.txt
import csv, time, gzip
with gzip.open('rnd_sentences.txt.gz', 'rt', newline='') as f:
    line = f.readline()
    i=0
    texts = []
    metas = []
    embeds = []
    while line:
        # Storing content
        doc={}
        doc["_type"]="sentence"
        doc["title"]=str(i)
        doc["description"]=line
        res = ispn.req_put(str(i), json.dumps(doc), cache_name="sentence")
        texts.append(line)
        # Storing meta
        meta={}
        meta["_key"]=str(i)
        metas.append(meta)
        i=i+1
# Change this to change the number of sentences you want to load
        if (i > 5000):
            break
        line = f.readline()

In [None]:
# add texts and fill vector db
keys = ispn_vs.add_texts(texts, metas)

In [None]:
# Some demo queries
query_res = ispn_vs.similarity_search("I want to have fun this night",2)
for res in query_res:
    print(ispn.req_get(res.page_content, "sentence").text)

In [None]:
query_res = ispn_vs.similarity_search("Leafs are falling from the trees in the park",5)
for res in query_res:
    print(ispn.req_get(res.page_content, "sentence").text)

In [None]:
query_res = ispn_vs.similarity_search("I'm getting hungry",2)
for res in query_res:
    print(ispn.req_get(res.page_content, "sentence").text)

In [None]:
query_res = ispn_vs.similarity_search("I feel like a bird in a cage",2)
for res in query_res:
    print(ispn.req_get(res.page_content, "sentence").text)

In [None]:
query_res = ispn_vs.similarity_search("People are strange, when you are stranger",2)
for res in query_res:
    print(ispn.req_get(res.page_content, "sentence").text)

In [None]:
query_res = ispn_vs.similarity_search("As we know, time is relative",2)
for res in query_res:
    print(ispn.req_get(res.page_content, "sentence").text)

In [None]:
# Clean up
ispn.req_cache_delete("sentence_demo_cache")
ispn.req_schema_delete("sentence_demo.proto")

In [None]:
ispn.req_cache_delete("sentence")
ispn.req_schema_delete("sentence.proto")