# Infinispan VectorStore: question answer demo

This demo show how to setup a chain for a question answer application.
InfinispanVS will be used as a retriever.

*This demo needs an openAI api key: it must be stored in a `.env` file in the form
`OPENAI_API_KEY=<your-key-here>`*

In [None]:
# Start Infinispan in a container
!docker rm --force infinispanvs-demo
!docker run -d --name infinispanvs-demo -v $(pwd):/user-config  -p 11222:11222 infinispan/server:15.0.0.Dev09 -c /user-config/infinispan-noauth.yaml 

In [None]:
%pip install langchain
%pip install langchain_community
%pip install tqdm
%pip install pypdf
%pip install sentence_transformers
%pip install openai
%pip install python-dotenv

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings
from tqdm import tqdm
import pickle
import os
import json

In [None]:
# Load PDF documentation

pdf_folder_path = '/home/rigazilla/ai/data/ai-pdf'
loaders = [PyPDFLoader(os.path.join(pdf_folder_path, fn)) for fn in os.listdir(pdf_folder_path)]
documents = []
for loader in tqdm(loaders):
    try:
        documents.extend(loader.load())
    except:
        pass

In [None]:
# Split texts in documents

from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

from infinispan_vector.infinispanvs import Infinispan, InfinispanVS


In [None]:
for text in texts:
    text.metadata.update({"text": text.page_content})

In [None]:
# Replacing OpenAI embeddings with HuggingFace model due to vector dimension limit
#embeddings = OpenAIEmbeddings()

# Define the path to the pre-trained model you want to use
modelPath = "sentence-transformers/all-MiniLM-l6-v2"

# Create a dictionary with model configuration options, specifying to use the CPU for computations
model_kwargs = {'device':'cpu'}

# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False
encode_kwargs = {'normalize_embeddings': False}

# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,     # Provide the pre-trained model's path
    model_kwargs=model_kwargs, # Pass the model configuration options
    encode_kwargs=encode_kwargs # Pass the encoding options
)

In [None]:
vector_store = InfinispanVS.from_documents(documents=texts, embedding=embeddings)

from langchain.indexes import VectorstoreIndexCreator
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k":2})
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from dotenv import load_dotenv
load_dotenv()

In [None]:
# Create a q&a chain
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True)

In [None]:
# Some demo questions
result = qa({"query": "What is naive bayes?"})
print(result["result"])

In [None]:
result = qa({"query": "Can I use AI in manufactoring?"})
print(result["result"])

In [None]:
# This is out of scope, no answer should be produced
result = qa({"query": "What is a pizza?"})
print(result["result"])

In [None]:
# Try with your questions
q = ""
while q != "bye":
    if q != "":
        result = qa({"query": q})
        print(result["result"])
    q = str(input("Question (or bye to quit)> "))
print("bye")

In [None]:
!docker rm --force infinispanvs-demo