In [None]:
! pip install langchain faiss-cpu sentence-transformers openai tiktoken rouge-score nltk  python-dotenv langchain-community langchain_openai rouge nltk

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
from pathlib import Path
from langchain.schema import Document
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
import re

def load_documents_from_folder(folder_path):
    """Load each .txt file in the folder as a separate Document."""
    txt_files = Path(folder_path).glob("*.txt")
    documents = []
    
    for file in txt_files:
        text = file.read_text(encoding="utf-8")
        clean_text = re.sub(r'\s+', ' ', text.strip())  # Clean and normalize
        doc = Document(page_content=clean_text, metadata={"source": file.name})
        documents.append(doc)
    
    return documents

# Load documents from folder
folder_path = "Dataset/ancient_greece_data"
documents = load_documents_from_folder(folder_path)

# Create embedding model
embedding_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# Build FAISS index
vectorstore = FAISS.from_documents(documents, embedding_model)

# SAVE TO LOCAL STORE
vectorstore.save_local("faiss_index_ancient_greece")

In [None]:
# LOAD WHEN LOADING FROM LOCAL
vectorstore = FAISS.load_local("faiss_index_ancient_greece", embedding_model, allow_dangerous_deserialization=True)

In [None]:
retriever = vectorstore.as_retriever()

In [None]:
# vectorstore.similarity_search_with_relevance_scores(query="who was socrates", k=4)

In [None]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o-mini")

In [None]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder

In [None]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain


contextualize_q_prompt =  """Given above  chat history and the below  latest user question
    which might reference context in the chat history,
    formulate a standalone question which can be understood
    without the chat history. Do NOT answer the question,
    just reformulate it if needed and otherwise return it as is.
    Below is the latest  question:

    {input}
    """


contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant that rewrites follow-up questions into standalone questions using chat history."),
        MessagesPlaceholder("chat_history"),
        ("human", """Given the above chat history and the latest user question below,
reformulate it into a standalone question. Do not answer the question.
If it's already standalone, return it as is.

Latest user question:
{input}"""),
    ]
)


history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", (
                        """You are an assistant for question-answering tasks.
                        "Answer this question using the provided context only.
                        If you dont know the answer, just say 'I dont know'
                        {context}"""
                    )),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, prompt)

contextual_rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)


In [None]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    contextual_rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [None]:
conversational_rag_chain

In [None]:
response=conversational_rag_chain.invoke(
    {"input": "who is socrates"},
    config={
        "configurable": {"session_id": "abc1235"}
    },  # constructs a key "abc123" in `store`.
)
print(response["answer"])

In [None]:
response=conversational_rag_chain.invoke(
    {"input": "where did he lived"},
    config={
        "configurable": {"session_id": "abc1235"}
    },  # constructs a key "abc123" in `store`.
)
print(response["answer"])