In [1]:
from dotenv import load_dotenv
load_dotenv()
from langchain_google_genai import ChatGoogleGenerativeAI    
from pathlib import Path  

In [2]:
import json
import re

In [3]:
current_dir =Path.cwd() 
persist_directory = current_dir /'db'/ 'main_db'
persist_directory=str(persist_directory)

In [4]:
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)

In [5]:
from langchain_google_vertexai import VertexAIEmbeddings

embeddings = VertexAIEmbeddings(model="text-embedding-004")

In [6]:
from langchain_chroma import Chroma
vector_store= Chroma(persist_directory=persist_directory,
            embedding_function=embeddings)

In [7]:
retriever = vector_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"k": 3, "score_threshold": 0.1}, 
)

In [8]:
from langchain_core.tools import tool
from pydantic import EmailStr

In [9]:
# langraph application
from langchain_core.prompts import ChatPromptTemplate
# from langchain_core.pydantic_v1 import Field
from pydantic import BaseModel, Field
from typing import Literal

In [10]:
from typing import List
from typing_extensions import TypedDict

In [11]:
## Data model
class RouteQuery(BaseModel):
    """Route a user query to the most relevant position in graph."""

    datasource: Literal["vectorstore","embedder"]| None  = Field(
        None,
        description="Given a todo list  choose to route it to vectorstore(vector store has all information related to email,personal ingo ,phone number,ect), or embedder",
    )

In [12]:
structured_llm_router = llm.with_structured_output(RouteQuery)

In [13]:

# Prompt
system = """You are an expert at routing a user todo list choose to route it to vectorstore(vector store has all information related to email,personal ingo ,phone number,ect), or Non"""
route_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{summer_and_todolist}"),
    ]
)

In [14]:
question_router = route_prompt | structured_llm_router

In [15]:
from typing import List
from typing_extensions import TypedDict

In [16]:
class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question asked by the user.
        summer_and_todolist: summary and todo list of the google meet recording.
        textToSpeech: text to speech of the google meet recording.
        todolist: todolist
        generation: LLM generation
        documents: list of documents
        embedder_status: status of the embedder.
        
    """
    question: str
    generation: str
    documents: List[str]
    textToSpeech: str
    summer_and_todolist: str
    embedder_status: str
    input :str
    all_documents: List[str]
    query_list:List[str]
    output:str
    extraction:List[dict]  

In [17]:
from langchain.schema import Document

In [18]:
from google import genai
client = genai.Client(api_key="AIzaSyBVcVdaE1qCdpONTHd2LS-6Tcz3vu65zww")


In [19]:
def sst_text_to_speech(state:GraphState):
    """
    Text to speech of the google meet recording.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, textToSpeech, that contains the text to speech of the google meet recording
    """
    print("---TEXT TO SPEECH---")
    # audio_path = 
    myfile = client.files.upload(file=r"C://Users//DELL//Desktop//CloneFlow//production//myrecording.m4a")
    
    textToSpeech= client.models.generate_content(
    model="gemini-2.0-flash", contents=[" Text to speech of  this audio clip  ", myfile]
 )
    #  state["new_key"] = "Hello, I'm a new state value!"

    state["textToSpeech"] = textToSpeech.text
    return state

In [20]:
def summer_and_todolist_maker(state:GraphState):
    """
    Summarize the textToSpeech and create a todo list.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, summer_and_todolist, that contains the todo list and the summery of the textToSpeech
    """
    print("---TODO LIST---")
    
    textToSpeech = state["textToSpeech"]
    
    # Create a todo list
    summer_and_todolist = llm.invoke(f"Create a todo list and summery  from this text: {textToSpeech}")
    state["summer_and_todolist"] = summer_and_todolist.content
    return state

In [21]:
def scheduler(state:GraphState):
    """
    Schedule the todo list.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, scheduler, that contains the scheduled status
    """
    print("---SCHEDULER---")
    summer_and_todolist = state["summer_and_todolist"]
    
    # Schedule the todo list
    state["scheduler"] = "shaduled"
    return state

In [22]:
def route_question(state:GraphState):
    """
    Route question to vector store or scheduler.

    Args:
        state (dict): The current graph state

    Returns:
        str: Next node to call
    """

    print("---ROUTE QUESTION---")
    summer_and_todolist = state["summer_and_todolist"]
    source = question_router.invoke({"summer_and_todolist":summer_and_todolist})
    if source.datasource == "vectorstore":
        print("---ROUTE QUESTION TO VECTOR STORE---")
      
        return "vectorstore"
    elif source.datasource == "scheduler":
        print("---ROUTE QUESTION TO EMBEDDER---")
        return "scheduler"

In [23]:
def generator(state:GraphState):
    '''
    generate a final output combininig retrive documents and the retrive documnets
    '''
    print("---generator---")
    input= state["input"]
    all_documents = state["all_documents"]
    
    
    result = llm.invoke(f"your task is to add details into the original text,Do not mention were you got the information formDo not mention were you got the information form.only provide details about the things menstioned,text{input},context:{all_documents}")
    output = result.content
    print("output",output)
    state["output"]=output
    
    return state
    

In [24]:
def extractor(state:GraphState):
    """
    Extract the information from the text in json format
    """
    print("---extractor---")
    output= state["output"]
    print("output",output)
    promt = 'You are an email information extractor, will be given a text, you have to extract the information in format: [{"email":"examplename@exaple.com","body":"body of mail","subject":"subject of the mail"},{"email":"examplename@exaple.com","body":"body of mail","subject":"subject of the mail"}],do not give nay other thing other than the json string,if not found any email in the text, return json[{"nothing found":"no email found"}],text'
    result = llm.invoke(f"{promt}, text{output}")
    output = result.content
    raw_output = output 
    print("raw_output",raw_output)

# Optional: remove prefix like "json" or anything before the actual JSON array
    match = re.search(r'\[.*\]', raw_output, re.DOTALL)
    queries = []
    if match:
     cleaned_json_str = match.group(0)
     queries = json.loads(cleaned_json_str)
     print("--->",queries)
    else:
     print("No valid JSON array found in the output.")

    print("extraction",queries)
    state['extraction']=queries
    
    return state

In [25]:
# creating flow

from langgraph.graph import END, StateGraph, START

In [26]:
workflow = StateGraph(GraphState)

In [27]:
workflow.add_node("generating_qureys",generating_qureys)
workflow.add_node("multi_query_retrival",multi_query_retrival)
workflow.add_node("generator",generator)
workflow.add_node("extractor",extractor)

NameError: name 'generating_qureys' is not defined