In [None]:
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter

def load_cleaned_texts(folder_path):
    texts, metadatas = [], []
    for filename in os.listdir(folder_path):
        if filename.endswith(".txt"):
            with open(os.path.join(folder_path, filename), "r", encoding="utf-8") as f:
                raw_text = f.read()
            splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
            chunks = splitter.split_text(raw_text)
            texts.extend(chunks)
            metadatas.extend([{"source": filename}] * len(chunks))
    return texts, metadatas


In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
texts, metadatas = load_cleaned_texts("C:\\Users\\ethan\\OneDrive\\Desktop\\Bot_reponse_data\\cleaned")

vector_store = FAISS.from_texts(texts=texts, embedding=embedding_model, metadatas=metadatas)
vector_store.save_local("verztec_vector_store")


In [None]:
from langchain_community.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
import os
from langchain.chains import LLMChain


# ✅ Set OpenRouter API details
os.environ["OPENAI_API_KEY"] = "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"
os.environ["OPENAI_API_BASE"] = "https://openrouter.ai/api/v1"

# ✅ Load embedding model (same as before)
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# ✅ Load FAISS vector store
vector_store = FAISS.load_local(
    "verztec_vector_store",
    embedding_model,
    allow_dangerous_deserialization=True
)

# ✅ Create retriever
retriever = vector_store.as_retriever(search_type="similarity", k=3)

# ✅ Define the prompt template for structured response generation
prompt_template = """
You are a helpful assistant for Verztec employees. Answer the following question based on the provided context from Verztec's internal guidelines.

Context:
{context}

Question:
{question}

Answer:
"""

# ✅ Apply the prompt template
template = PromptTemplate.from_template(prompt_template)

# ✅ Load OpenRouter DeepSeek model into LangChain
llm = ChatOpenAI(
    model_name="tngtech/deepseek-r1t-chimera:free",
    temperature=0,
    openai_api_key=os.environ["OPENAI_API_KEY"],
    openai_api_base=os.environ["OPENAI_API_BASE"]
)

# ✅ Create LLMChain with the prompt template
llm_chain = LLMChain(
    llm=llm,
    prompt=template
)

# ✅ Build RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm_chain,
    retriever=retriever,
    return_source_documents=True
)

# ✅ Query and response
query = "what if i have a question during a digital meeting"

# Retrieve documents for context
docs = retriever.get_relevant_documents(query)

# Prepare the context from the retrieved documents
context = "\n".join([doc.page_content for doc in docs])  # Concatenate text from all retrieved docs

# Now, invoke the LLM chain using the query and context
response = qa_chain.invoke({
    "context": context,  # Pass the context
    "query": query # Pass the query

})

# ✅ Output the response and sources
print("Answer:", response["result"])
print("\nSources:")
for doc in response["source_documents"]:
    print("-", doc.metadata["source"])
    
    
# Error regarding "Missing some input keys:{'context'}"