In [None]:
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from app.utils import *
from app.prompt import prompt
import warnings 
warnings.filterwarnings("ignore")

In [2]:
load_dotenv()

True

In [None]:
# Load the documents from the specified directory
data = load_documents("data/")

In [15]:
print(f"Loaded {len(data)} documents.")
print(f"\nSample document: \n{data[15]}...")

Loaded 911 documents.

Sample document: 
page_content='2 
s h a p i n g  f e a r
For English Protestants the monster was the many- headed Hydra of the 
Catholic Church, for Catholics the diabolic reforms of Martin Luther, John 
Calvin, and others. There are monsters in the ages before us and monsters 
right in front of us, and often those new monsters wear the shapes of the old.
So my question about the nature of fear must be expanded to include 
time: What is the relation between what scares us now and what scared people 
in the past? Is being afraid something that is hardwired into human nature 
and never varies over the centuries? Do fears always take the same shapes, or 
do nations and cultures have distinctive fears that may metamorphose through 
the centuries? In other words, what is universal about fear, and what is cultur-
ally specific in its images and stories? What belongs to a certain era and what 
transcends time? What is the process by which nameless dreads are named?
Fea

In [None]:
# Split the documents into chunks
chunks = split_chunks(data)

In [16]:
print(f"Split into {len(chunks)} chunks.")
print(f"\nSample chunk: \n{chunks[15]}...")

Split into 5035 chunks.

Sample chunk: 
page_content='p r e f a c e  
xi
the scattered fears of the past coalesce into more pervasive terrors, when the 
old stories meet new circumstances that expand and intensify their meaning.
Many times, in the course of my discussion, I have touched on issues that 
take up several library shelves, whether in religion, law, literature, science, and 
elsewhere. Many of these areas of study are controversial, and their authors are 
often a confusing but heady mixture of the academic, the fan, and the believer. 
As much as I could within the compass of my argument, I have tried to absorb 
the insights of scholarship as well as enthusiasm without delving excessively 
into often fascinating disputes that in themselves are well worth exploring. 
In its discussion of the relation of horror to religion, for example, Haunted 
is not so much about the intricate historical investigations that have taken 
place into the nature of belief in demons versus the rar

In [17]:
# Initialize the HuggingFace embeddings
embedding = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
)

In [None]:
# Initialize the Pinecone vector store
pinecone = Pinecone(
    api_key=os.getenv("PINECONE_API_KEY"),
)
index_name = "spirichat"

# Check if the index exists, if not exists, create it
if not pinecone.has_index(index_name):
    pinecone.create_index(
        name=index_name,
        dimension=len(embedding.embed_query("Hello, world!")),
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region='us-east-1'
        )
    )

In [19]:
# Connect to the Pinecone index
index = pinecone.Index(index_name)

# Create the Pinecone vector store
vector_store = PineconeVectorStore(
    index=index,
    embedding=embedding
)

In [48]:
# add the chunks to the vector store
# vector_store.add_documents(
#     documents=chunks,
#     ids=[f"chunk_{i}" for i in range(len(chunks))]
# ) 

In [20]:
def retrieve_query(query, top_k=5, get_k=3):
    """ Retrieve the top k documents from the vector store based on the query and return the top 3 results. """
    # perform similarity search with score
    results = vector_store.similarity_search_with_score(
        query=query,
        k=top_k,
    )

    sorted_result = sorted(results, key=lambda x: x[1], reverse=True)   # Sort by score in descending order
    get3_docs = sorted_result[:get_k]   # Get top 3 documents and return them
    return get3_docs

In [21]:
# search a sample query
search = retrieve_query("Why Spirits are so scary?")

In [28]:
print(f"Retrived Documents:")
for doc, score in search:
    print(f"\nScore: {score}\nDocument: {doc.page_content} \n\nMetadata: {doc.metadata}")

Retrived Documents:

Score: 0.644313693
Document: held by tribal cultures around the world. The appearance 
of spirits of the dead plays a role in rituals and beliefs 
among native North and South Americans. In some South 
American tribes, the dead appear as guardian spirits to 
medicine men and shamans.
The ancient Hebrews, Egyptians, Greeks, and Romans 
believed that the souls of the dead could return to haunt 
the living. The Roman scholar Pliny the Younger recorded 
the case of a Greek philosopher who moved into a 
haunted house. An apparition appeared wearing chains 
and led the philosopher to a spot where an excavation 
later revealed a skeleton in chains (see ATHENODORUS, 
HAUNTING OF).
During the Dark Ages, people believed in all manner 
of apparitions, usually frightful: DEMONS, VAMPIRES, and 
spectral creatures such as BLACK DOGS (see also BLACK 
SHUCK; WHISHT HOUNDS) and wild huntsmen (see HERNE 
THE HUNTER; WILD HUNT). By the Middle Ages, beliefs in 
ghosts were manipulated

In [29]:
# Format the retrieved documents for the prompt
context = format_docs_for_prompt(search)

In [31]:
print(f"Formatted Context: \n{context}")

Formatted Context: 
### Document: chunk_1406
Source: The encyclopedia of ghosts and spirits.pdf
Page: 33.0

Content: held by tribal cultures around the world. The appearance of spirits of the dead plays a role in rituals and beliefs among native North and South Americans. In some South American tribes, the dead appear as guardian spirits to medicine men and shamans. The ancient Hebrews, Egyptians, Greeks, and Romans believed that the souls of the dead could return to haunt the living. The Roman scholar Pliny the Younger recorded the case of a Greek philosopher who moved into a haunted house. An apparition appeared wearing chains and led the philosopher to a spot where an excavation later revealed a skeleton in chains (see ATHENODORUS, HAUNTING OF). During the Dark Ages, people believed in all manner of apparitions, usually frightful: DEMONS, VAMPIRES, and spectral creatures such as BLACK DOGS (see also BLACK SHUCK; WHISHT HOUNDS) and wild huntsmen (see HERNE THE HUNTER; WILD HUNT). By 

In [32]:
# Initialize the HuggingFace Llama model
llm = HuggingFaceEndpoint(
    repo_id = "meta-llama/Meta-Llama-3-8B-Instruct",
    task = "text-generation",
    max_new_tokens = 100,
    temperature = 0.1,
)
model = ChatHuggingFace(llm=llm)    # Initialize the HuggingFace model for chat

In [35]:
# Define the question to ask
question = "Tell me 5 things about Ghosts."

# Invoke the prompt with the context and question
query = prompt.format(
    context=context,
    question=question
)

In [36]:
print(f"Final Query: \n{query}")

Final Query: 

    You are a helpful assistant for answering questions about Supernatural stuffs such as Ghost, Demon, Vampire, Werewolf, etc.

    Use the following context:
    ---------------
    ### Document: chunk_1406
Source: The encyclopedia of ghosts and spirits.pdf
Page: 33.0

Content: held by tribal cultures around the world. The appearance of spirits of the dead plays a role in rituals and beliefs among native North and South Americans. In some South American tribes, the dead appear as guardian spirits to medicine men and shamans. The ancient Hebrews, Egyptians, Greeks, and Romans believed that the souls of the dead could return to haunt the living. The Roman scholar Pliny the Younger recorded the case of a Greek philosopher who moved into a haunted house. An apparition appeared wearing chains and led the philosopher to a spot where an excavation later revealed a skeleton in chains (see ATHENODORUS, HAUNTING OF). During the Dark Ages, people believed in all manner of apparit

In [37]:
# Invoke the model with the query
answer = model.invoke(query)

In [39]:
print(f"Answer: \n{answer.content}")

Answer: 
Based on the provided context, here are 5 things about ghosts:

1. **Appearance of spirits**: In some South American tribes, spirits of the dead appear as guardian spirits to medicine men and shamans, whereas in ancient cultures such as the Greeks and Romans, the souls of the dead could return to haunt the living.
2. **Manifestation**: Ghosts may manifest as annoying habits, such as knocking or running faucets, or as lighted balls or mists. They can also become apparitions if facial features are recognized by witnesses.
3. **Connection to the past**: Ghosts allow the living to retain a connection with the dead and the past. They can also involve intensely personal experiences, such as visits from departed loved ones.
4. **Types of ghosts**: There are various types of ghosts, including those attached to places, such as elementals and fairies, and nonhuman entities like demons and angels.
5. **Influencing human lives**: According to the Warrens, humans invite malevolence into th

In [42]:
# initialize the output parser
# This parser will convert the model's output into a string format
parser = StrOutputParser()

In [43]:
# Define the parallel chain to run the retrieval and formatting in parallel
# This chain will take the context and question as inputs and return the formatted query
parallel_chain = RunnableParallel({
    "context": RunnableLambda(retrieve_query) | RunnableLambda(format_docs_for_prompt),
    "question": RunnablePassthrough()
})

# Combine the parallel chain with the prompt, model, and parser
# This chain will first retrieve the context and format it, then use the prompt to create a query,
# invoke the model with the query, and finally parse the output
chain = parallel_chain | prompt | model | parser

In [44]:
# Invoke the chain with a specific question
answer = chain.invoke("Write 5 point about Spirits")

In [45]:
print(answer)

Based on the provided context, here are 5 points about Spirits:

1. **Definition**: A spirit is a discarnate being, essence, or supernatural force of nature that exists in an invisible realm.
2. **Existence**: Spirits are believed to intervene regularly in the affairs of humanity, for better or worse, and can be seen under certain circumstances or by persons with clairvoyance.
3. **Variety**: Spirits come in a multitude of guises, such as fairies, elves, monsters, demons, and angels.
4. **Role in Religions**: Spirits are recognized, worshipped, and propitiated in animistic societies, and are often associated with primal qualities, characteristics, and elemental forces.
5. **Reverence**: In many societies, including animistic ones, ancestral spirits of the dead are particularly revered and honored, with households having special altars or spirit houses to feed offerings and seek advice and protection.


In [46]:
# invoke the chain with a question that is not in the context
answer = chain.invoke("What is Artificial Intelligence?")

In [47]:
print(answer)

I don't know the information about this. The provided context is related to supernatural creatures and doesn't mention Artificial Intelligence.
