# Infinispan VectorStore: Similarity search demo 2

This demo shows how to run similarity search on a set of random sentences. To show how vector db and
cache can coexist in Infinispan, content is stored in a separated cache and referenced in the vector
cache via key id.

In [None]:
# Start Infinispan in a container

!docker rm --force infinispanvs-demo
!docker run -d --name infinispanvs-demo -v $(pwd):/user-config  -p 11222:11222 infinispan/server:15.0.0.Dev09 -c /user-config/infinispan-noauth.yaml 

In [None]:
# Ensure that all we need is installed

%pip install sentence-transformers
%pip install langchain
%pip install langchain_core
%pip install langchain_community

In [None]:
# Import HuggingFace language model

from langchain_core.embeddings import Embeddings
from langchain.embeddings import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-MiniLM-L12-v2"

hf = HuggingFaceEmbeddings(model_name=model_name)

In [None]:
# Create the protobuf schema for content. This is application design
# so configuration is needed. The Infinispan helper class is used for this.

from infinispan_vector import Infinispan

schema = '''
message sentence {
optional string title = 1;
optional string description = 2;
}
'''
ispn = Infinispan()

output = ispn.schema_delete("sentence.proto")
output = ispn.schema_post("sentence.proto",schema)
print(output.text)
assert output.status_code == 200

import json
assert json.loads(output.text)["error"] == None

In [None]:
# Creating an Infinispan cache to store content
# again Infinispan class is used

cache_def = '''
{
  "distributed-cache": {
    "owners": "2",
    "mode": "SYNC",
    "statistics": true,
    "encoding": {
      "media-type": "application/x-protostream"
    }
  }
}
'''
ispn.cache_post("sentence",cache_def)
ispn.cache_clear("sentence")

In [None]:
# Adding some data from rnd_sentences.txt
# Vector and _key (content id) are stored in the vector db
# actual content is store in the `sentence` cache

import csv, time, gzip
with gzip.open('rnd_sentences.txt.gz', 'rt', newline='') as f:
    line = f.readline()
    i=0
    texts = []
    metas = []
    embeds = []
    while line:
        # Storing content
        doc={}
        doc["_type"]="sentence"
        doc["title"]=str(i)
        doc["description"]=line
        # Populating cache with the whole document in json format
        res = ispnvs.ispn.put(str(i), json.dumps(doc), cache_name="sentence")
        # Append text to be embedded
        texts.append(line)
        # Append related meta
        meta={}
        meta["text"]=str(i)
        metas.append(meta)
        i=i+1
# Change this to change the number of sentences you want to load
        if (i > 5000):
            break
        line = f.readline()

In [None]:
# create vector db from texts and metas

ispnvs = InfinispanVS.from_texts(texts=texts, metadatas=metas, embedding=hf)

In [None]:
# Some demo queries
# query_res is a list of Document: page_content will contain the key
# to access the actual content in the `sentence` cache

query_res = ispnvs.similarity_search("I want to have fun this night",2)
for res in query_res:
    print(ispnvs.ispn.get(res.page_content, "sentence").text)

In [None]:
query_res = ispnvs.similarity_search("Leafs are falling from the trees in the park",5)
for res in query_res:
    print(ispnvs.ispn.get(res.page_content, "sentence").text)

In [None]:
query_res = ispnvs.similarity_search("I'm getting hungry",2)
for res in query_res:
    print(ispnvs.ispn.get(res.page_content, "sentence").text)

In [None]:
query_res = ispnvs.similarity_search("I feel like a bird in a cage",2)
for res in query_res:
    print(ispnvs.ispn.get(res.page_content, "sentence").text)

In [None]:
query_res = ispnvs.similarity_search("People are strange, when you are stranger",2)
for res in query_res:
    print(ispnvs.ispn.get(res.page_content, "sentence").text)

In [None]:
query_res = ispnvs.similarity_search("As we know, time is relative",2)
for res in query_res:
    print(ispnvs.ispn.get(res.page_content, "sentence").text)

In [None]:
# Clean up Infinispan resources we used
ispnvs.ispn.cache_delete("sentence")
ispnvs.ispn.schema_delete("sentence.proto")

In [None]:
!docker rm --force infinispanvs-demo