In [None]:
from langchain_core.embeddings import Embeddings
from infinispan_vector import Infinispan

# Demo with a real embedding
from langchain.embeddings import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-MiniLM-L12-v2"

hf = HuggingFaceEmbeddings(model_name=model_name)


In [None]:
# Creating a langchain_core.VectorStore
# the default store name `allminilm` will be used

ispn_allminilm = Infinispan.from_texts(texts={}, embedding=hf,
                                 configuration={"lambda.content": lambda item: item["_key"], "cache_name": "allminilm", "entity_name" : "allmini_vec"})

In [None]:
# Creating an Infinispan cache to store vectors

cache_def = '''
{
  "distributed-cache": {
    "owners": "2",
    "mode": "SYNC",
    "statistics": true,
    "encoding": {
      "media-type": "application/x-protostream"
    },
    "indexing": {
      "enabled": true,
      "storage": "filesystem",
      "startup-mode": "AUTO",
      "indexing-mode": "AUTO",
      "indexed-entities": [
        "allmini_vec"
      ]
    }
  }
}
'''
ispn_allminilm.req_cache_post("allminilm",cache_def)

In [None]:
# Creating an Infinispan cache to store news

cache_def = '''
{
  "distributed-cache": {
    "owners": "2",
    "mode": "SYNC",
    "statistics": true,
    "encoding": {
      "media-type": "application/x-protostream"
    }
  }
}
'''
ispn_allminilm.req_cache_post("news",cache_def)

In [None]:
import json
schema_vector = '''
/**
 * @Indexed
 */
message allmini_vec {
/**
 * @Vector(dimension=384)
 */
repeated float floatVector = 1;
optional int32 _key = 2;
optional string title = 3;
}
'''
output = ispn_allminilm.req_schema_post("allmini_vec.proto",schema_vector)
print(output.text)
assert output.status_code == 200
assert json.loads(output.text)["error"] == None

In [None]:
schema_news = '''
message news {
optional string title = 1;
optional string description = 2;
}
'''
output = ispn_allminilm.req_schema_post("news.proto",schema_news)
print(output.text)
assert output.status_code == 200
assert json.loads(output.text)["error"] == None

In [None]:
# Adding some data from rnd_sentences.txt
import csv, time

with open('bbc_news.csv', newline='') as csvfile:
    spamreader = csv.reader(csvfile, delimiter=',', quotechar='"')
    i=0
    texts = []
    metas = []
    embeds = []
    for row in spamreader:
        # Storing content
        doc={}
        doc["_type"]="news"
        doc["title"]=row[0]
        text=row[0]+"."+row[4]
        doc["description"]=text
        res = ispn_allminilm.req_put(str(i), json.dumps(doc), cache_name="news")
        texts.append(text)
        # Storing meta
        meta={}
        meta["_key"]=str(i)
        meta["title"]=row[0]
        metas.append(meta)
        i=i+1
        if (i > 1000):
            break

In [None]:
    # add texts and fill vector db
    keys = ispn_allminilm.add_texts(texts, metas)

In [None]:
query_res = ispn_allminilm.similarity_search("North Sea is getting hot",2)
for res in query_res:
    print(ispn_allminilm.req_get(res.page_content, "news").text)

In [None]:
query_res = ispn_allminilm.similarity_search("Milan fashion week begins",2)
for res in query_res:
    print(ispn_allminilm.req_get(res.page_content, "news").text)

In [None]:
query_res = ispn_allminilm.similarity_search("I'm getting hungry",2)
for res in query_res:
    print(ispn_allminilm.req_get(res.page_content, "news").text)

In [None]:
query_res = ispn_allminilm.similarity_search("Stock market is rising today",2)
for res in query_res:
    print(ispn_allminilm.req_get(res.page_content, "news").text)

In [None]:
query_res = ispn_allminilm.similarity_search("Why cats are so viral?",2)
for res in query_res:
    print(ispn_allminilm.req_get(res.page_content, "news").text)

In [None]:
query_res = ispn_allminilm.similarity_search("How to stay young",2)
for res in query_res:
    print(ispn_allminilm.req_get(res.page_content, "news").text)

In [None]:
# Clean up
ispn_allminilm.req_cache_delete("allminilm")
ispn_allminilm.req_schema_delete("allmini_vec.proto")

In [None]:
ispn_allminilm.req_cache_delete("news")
ispn_allminilm.req_schema_delete("news.proto")