In [None]:
from langchain_core.embeddings import Embeddings
from infinispan_vector import InfinispanVS

# Demo with a real embedding
from langchain.embeddings import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-MiniLM-L12-v2"

hf = HuggingFaceEmbeddings(model_name=model_name)


In [None]:
# Creating a langchain_core.VectorStore
# the default store name `allminilm` will be used

ispnvs = InfinispanVS.from_texts(texts={}, embedding=hf,
                                 configuration={"textfield": "_key",
                                                "vectorfield" : "floatVector",
                                                "cache_name": "news_demo_cache",
                                                "entity_name" : "news_demo_vec"})
ispn = ispnvs.ispn

In [None]:
# Creating an Infinispan cache to store vectors

cache_def = '''
{
  "distributed-cache": {
    "owners": "2",
    "mode": "SYNC",
    "statistics": true,
    "encoding": {
      "media-type": "application/x-protostream"
    },
    "indexing": {
      "enabled": true,
      "storage": "filesystem",
      "startup-mode": "AUTO",
      "indexing-mode": "AUTO",
      "indexed-entities": [
        "news_demo_vec"
      ]
    }
  }
}
'''
ispnvs.cache_create(cache_def)

In [None]:
# Creating an Infinispan cache to store news

cache_def = '''
{
  "distributed-cache": {
    "owners": "2",
    "mode": "SYNC",
    "statistics": true,
    "encoding": {
      "media-type": "application/x-protostream"
    }
  }
}
'''
ispn.req_cache_post("news",cache_def)
ispn.req_cache_clear("news")

In [None]:
import json
schema_vector = '''
/**
 * @Indexed
 */
message news_demo_vec {
/**
 * @Vector(dimension=384)
 */
repeated float floatVector = 1;
optional int32 _key = 2;
optional string title = 3;
}
'''
ispnvs.schema_delete()
output = ispnvs.schema_create(schema_vector)
print(output.text)
assert output.status_code == 200
assert json.loads(output.text)["error"] == None
output = ispnvs.cache_index_clear()
output = ispnvs.cache_index_reindex()

In [None]:
schema_news = '''
message news {
optional string title = 1;
optional string description = 2;
}
'''
ispn.req_schema_delete("news.proto")
output = ispn.req_schema_post("news.proto",schema_news)
print(output.text)
assert output.status_code == 200
assert json.loads(output.text)["error"] == None

In [None]:
# Adding some data from rnd_sentences.txt
import csv, time, gzip

with gzip.open('bbc_news.csv.gz', 'rt', newline='') as csvfile:
    spamreader = csv.reader(csvfile, delimiter=',', quotechar='"')
    i=0
    texts = []
    metas = []
    embeds = []
    for row in spamreader:
        # Storing content
        doc={}
        doc["_type"]="news"
        doc["title"]=row[0]
        text=row[0]+"."+row[4]
        doc["description"]=text
        res = ispn.req_put(str(i), json.dumps(doc), cache_name="news")
        texts.append(text)
        # Storing meta
        meta={}
        meta["_key"]=str(i)
        meta["title"]=row[0]
        metas.append(meta)
        i=i+1
        # Change this to change the number of news you want to load
        if (i >= 5000):
            break

In [None]:
    # add texts and fill vector db
    keys = ispnvs.add_texts(texts, metas)

In [None]:
query_res = ispnvs.similarity_search("North Sea is getting hot",2)
for res in query_res:
    print(ispn.req_get(res.page_content, "news").text)

In [None]:
query_res = ispnvs.similarity_search("Milan fashion week begins",2)
for res in query_res:
    print(ispn.req_get(res.page_content, "news").text)

In [None]:
query_res = ispnvs.similarity_search("I'm getting hungry",2)
for res in query_res:
    print(ispn.req_get(res.page_content, "news").text)

In [None]:
query_res = ispnvs.similarity_search("Stock market is rising today",2)
for res in query_res:
    print(ispn.req_get(res.page_content, "news").text)

In [None]:
query_res = ispnvs.similarity_search("Why cats are so viral?",2)
for res in query_res:
    print(ispn.req_get(res.page_content, "news").text)

In [None]:
query_res = ispnvs.similarity_search("How to stay young",2)
for res in query_res:
    print(ispn.req_get(res.page_content, "news").text)

In [None]:
# Clean up
ispnvs.schema_delete()
ispnvs.cache_delete()


In [None]:
ispn.req_cache_delete("news")
ispn.req_schema_delete("news.proto")