In [42]:
corpus_path = "/Users/kumarpersonal/Downloads/Inquiry-Assistant/Context/cleaned_text.txt"

with open(corpus_path, "r", encoding="utf-8") as f:
    full_text = f.read()

In [43]:
import os
from dotenv import load_dotenv

env_path = "/Users/kumarpersonal/Downloads/Inquiry-Assistant/venv/.env"
load_dotenv(dotenv_path=env_path)

os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_PROJECT'] = 'inquiry-assistant'

os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')
groq_api_key = os.getenv('GROQ_API_KEY')  

In [44]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
    separators=["\n\n", "\n", ".", " "]
)

documents = splitter.create_documents([full_text])

In [45]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [46]:
from langchain.vectorstores import Chroma

persist_directory = "/Users/kumarpersonal/Downloads/Inquiry-Assistant/VectorStore/chroma_db"
vectorstore = Chroma.from_documents(
    documents,
    embedding=embeddings,
    persist_directory=persist_directory
)

# Save to disk
vectorstore.persist()

InternalError: Query error: Database error: error returned from database: (code: 1032) attempt to write a readonly database

In [34]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})

In [35]:
from langchain_groq import ChatGroq
llm = ChatGroq(groq_api_key=groq_api_key, model="gemma2-9b-it")

In [36]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

chat_prompt = ChatPromptTemplate.from_messages([
    ("system", 
     "You are an intelligent assistant for Scaler Academy, trained on internal documents, placement records, program curricula, and student feedback. "
     "You should answer questions only based on the context provided. "
     "If the answer is not found in the context, reply with: "
     "“I'm sorry, I couldn't find that information in the available documents.” "
     "Be precise, concise, and maintain a professional and helpful tone."
    ),
    ("human", 
     "Context:\n{context}\n\nQuestion:\n{question}")
])

In [37]:
from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()

In [38]:
from langchain_core.runnables import RunnableLambda, RunnableMap

retriever_runnable = RunnableLambda(lambda x: retriever.invoke(x["question"]))

def format_inputs(inputs):
    return {
        "context": "\n\n".join([doc.page_content for doc in inputs["documents"]]),
        "question": inputs["question"]
    }

In [39]:
rag_chain = (
    RunnableMap({
        "documents": retriever_runnable,
        "question": lambda x: x["question"]
    })
    | format_inputs | chat_prompt | llm | output_parser
)

In [41]:
query = input("Enter your question: ")
print("\nQuestion:", query)

response = rag_chain.invoke({"question": query})
print("\nAnswer:", response)


Question: Scaler's fee structure

Answer: I'm sorry, I couldn't find that information in the available documents. 

