In [None]:
# llm initialization (not secured with env file )
from langchain_core.tools import tool
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import Runnable
from langchain_aws import ChatBedrock
from typing import Annotated
from langchain.docstore.document import Document
from typing_extensions import TypedDict
from langgraph.graph.message import AnyMessage, add_messages
from langchain_core.messages import ToolMessage
from langchain_core.runnables import RunnableLambda
from langgraph.prebuilt import ToolNode
from langgraph.prebuilt import tools_condition
from langchain_groq import ChatGroq

class GraphState(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]
    documents: list[Document] 
    pdf_path:str 

llm = ChatGroq(model="llama3-70b-8192",api_key="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")

In [None]:
# rag tool 
import spacy,pickle,os,boto3
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from pdfminer.high_level import extract_text
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

@tool
def rag_tool(state: dict) -> None:
    
    path = state["pdf_path"]
    
    def text_extracter(path):
        text = extract_text(path)
        return text

    def text_chunk(text):
        splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200,separators = ["\n\n", "\n", ".", " ", ""])
        chunks = splitter.split_text(text)
        return chunks
    
    nlp = spacy.load('en_core_web_sm')

    def text_cleaning(chunks):
        cleaned_chunks = []
        for chunk in chunks:
            doc = nlp(chunk)
            cleaned = " ".join([token.text for token in doc if not token.is_stop and not token.is_punct])
            cleaned_chunks.append(cleaned)
        return cleaned_chunks
    
    def lemmatize_text(cleaned_chunks):
        lemma_chunks = []
        for chunk in cleaned_chunks:
            doc = nlp(chunk)
            lemma = " ".join([token.lemma_ for token in doc])
            lemma_chunks.append(lemma)
        return lemma_chunks
    
    def load_store():
        storage_dir = "/Users/pmanthan/Desktop/tomo.ai/faiss_storage"
        with open("/Users/pmanthan/Desktop/tomo.ai/faiss_storage/faiss_storage.pkl", "rb") as f:
            embeddings = pickle.load(f)
        
        vector_store = FAISS.load_local(storage_dir, embeddings, allow_dangerous_deserialization=True)
        return vector_store

    def embed_store(lemma_chunks):
        docs = [Document(page_content=chunk) for chunk in lemma_chunks]
        embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
        db = FAISS.from_documents(docs, embeddings)  # FAISS - Facebook AI Similarity Search (full form for revision)
        
        storage_dir = "/Users/pmanthan/Desktop/tomo.ai/faiss_storage"
        os.makedirs(storage_dir, exist_ok=True)
        db.save_local(storage_dir)
        
        with open("/Users/pmanthan/Desktop/tomo.ai/faiss_storage/faiss_storage.pkl", "wb") as f:
            pickle.dump(embeddings, f)
        
        return db
    
    def rag_pipeline(path): 
        extracted_text = text_extracter(path)
        chunked_text = text_chunk(extracted_text)
        cleaned_text = text_cleaning(chunked_text)
        lemma_text = lemmatize_text(cleaned_text)
        embed_store(lemma_text)
        embed_text = load_store()
        return embed_text
    
    def retrieve_data(path: str,state: dict) -> None:
        vector_store = rag_pipeline(path)
        state["documents"] = vector_store.as_retriever(search_kwargs={"k": 5})
        return state 
    
    retrieve_data(path,state)


In [None]:
# state management and error handling 

def handle_too_error(state:dict):
    error = state.get
