In [None]:
%pip install langchain langchain-community langchain-google-genai langgraph langchain-groq tavily-python 

In [None]:
import os
from typing import List, Dict, Any
from datetime import datetime
from pydantic import BaseModel, Field
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain
from langchain_groq import ChatGroq
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.utilities.tavily_search import TavilySearchAPIWrapper
from langgraph.graph import StateGraph, END
from google.colab import userdata



In [None]:

class MovieQueryState(BaseModel):
    """State for the movie query processing system"""
    original_query: str = Field(description="Original user query")
    raw_search_results: List[Dict] = Field(default_factory=list, description="Raw search results")
    cleaned_content: List[str] = Field(default_factory=list, description="Cleaned relevant paragraphs")
    final_response: str = Field(default="", description="Final synthesized response")
    sources: List[Dict] = Field(default_factory=list, description="Sources used")
    chat_history: List[str] = Field(default_factory=list, description="Chat conversation history")

In [None]:
llama3_llm = ChatGroq(
    model_name="llama3-70b-8192",
    api_key=userdata.get("GROQ_API_KEY"),
)

gemini_llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    google_api_key=userdata.get("GOOGLE_API_KEY"),
)
os.environ["TAVILY_API_KEY"] = userdata.get("TAVILY_API_KEY") 

tavily_search = TavilySearchAPIWrapper()


In [None]:
def web_search_agent(state: MovieQueryState) -> MovieQueryState:
    """Performs web searches based on the query"""
    search_results = []

    # Directly perform search for the query (no query analysis)
    results = tavily_search.results(state.original_query, max_results=5)  # Limit to top 5 results
    for result in results:
        result["query"] = state.original_query
        result["timestamp"] = datetime.now().isoformat()
        search_results.append(result)

    state.raw_search_results = search_results
    return state

In [None]:
content_extraction_prompt = ChatPromptTemplate.from_template("""
You are a Content Extraction Specialist. Your job is to extract the most relevant information from search results. extract the content on movies and tv series.

Search results:
{raw_search_results}

Extract the most relevant 1-2 paragraphs per result, focusing on clarity and directness.

Response:
""")

In [None]:
def content_extraction_agent(state: MovieQueryState) -> MovieQueryState:
    """Extracts relevant paragraphs from search results"""
    chain = LLMChain(llm=gemini_llm, prompt=content_extraction_prompt)
    response = chain.invoke({
        "original_query": state.original_query,      # ← add this
        "raw_search_results": state.raw_search_results
    })
    lines = response['text'].strip().splitlines()
    state.cleaned_content = [line.strip() for line in lines if line.strip()]
    return state


In [None]:
response_synthesis_prompt = ChatPromptTemplate.from_template("""
You are a Movie Information Specialist. Your job is to synthesize a helpful, accurate response.

Based on the content below, write a comprehensive, helpful response to the original query.
The response should be in a paragraph do not use points.
Chat History:
{chat_history}
Original Query: {original_query}

Content:
{cleaned_content}

Guidelines:
- For factual: Be direct and precise.
- For plot: Narrate like a storyteller.
- For opinion: Present balanced views.
- For analytical: Provide deep insights.
 

Response:
""")

In [None]:
def response_synthesis_agent(state: MovieQueryState) -> MovieQueryState:
    """Synthesizes the final response"""
    chain = LLMChain(llm=llama3_llm, prompt=response_synthesis_prompt)
    response = chain.invoke({
        "original_query": state.original_query,
        "cleaned_content": "\n".join(state.cleaned_content)
        "chat_history": "\n".join(state.chat_history) if state.chat_history else "None"
    })

    state.final_response = response['text'].strip()

    sources = []
    for item in state.raw_search_results:
        sources.append({
            "url": item["url"],
            "title": item.get("title", "Unknown"),
            "published_date": item.get("published", "Unknown")
        })

    state.sources = sources
    return state

In [None]:
movibot = StateGraph(MovieQueryState)


movibot.add_node("web_search", web_search_agent)
movibot.add_node("content_extraction", content_extraction_agent)
movibot.add_node("response_synthesis", response_synthesis_agent)

movibot.add_edge("web_search", "content_extraction")
movibot.add_edge("content_extraction", "response_synthesis")
movibot.add_edge("response_synthesis", END)

movibot.set_entry_point("web_search")

movie_companion_agent = movibot.compile()

In [None]:
def rewrite_followup_query(query: str, chat_history: List[str]) -> str:
    """
    Rewrites a followup query by incorporating context from the chat history,
    so it becomes self-contained.
    """
    rewriting_prompt = ChatPromptTemplate.from_template(
        """
You are a helpful query rewriting agent. Given the conversation history and the new followup query, 
reformulate the query so that it is self-contained and includes all necessary context to be answered properly.

Conversation History:
{chat_history}

Followup Query:
{query}

Self-Contained Query:""")
    chain = LLMChain(llm=llama3_llm, prompt=rewriting_prompt)
    response = chain.invoke({
        "chat_history": "\n".join(chat_history),
        "query": query
    })
    return response["text"].strip()

In [None]:
def get_movie_information(query: str, chat_history: List[str] = []) -> Dict[str, Any]:
    # If there is prior chat history, rewrite the query to include context.
    if chat_history:
        query = rewrite_followup_query(query, chat_history)
    
    initial_state = MovieQueryState(original_query=query, chat_history=chat_history)
    result_values = movie_companion_agent.invoke(initial_state)
    
    return {
        "response": result_values["final_response"],
        "sources": result_values["sources"]
    }

In [None]:
def validate_query_with_llm(query: str) -> bool:
    """Uses an LLM to check if the query is related to movies or TV series."""
    validation_prompt = ChatPromptTemplate.from_template("""
You are a Movie Query Validator. Determine if the following query is related to movies or TV series. 
It can be a question, a request for information, or a general inquiry.
It can contain titles, actors, directors, or any other relevant details.
Return "yes" if it is, or "no" if it is not, with no extra commentary.

Query: {query}

Answer:""")
    chain = LLMChain(llm=gemini_llm, prompt=validation_prompt)
    response = chain.invoke({"query": query})
    answer = response['text'].strip().lower()
    return answer.startswith("yes")

In [None]:
if __name__ == "__main__":
    # Simple CLI interface
    chat_count = 3
    print("Movie Companion System")
    print("-----------------------")
    chat_memory = [] 
    while True:
        query = input("\nEnter your movie question (or 'q' to quit, 'clear' to reset conversation): ")
        
        if query.lower() == 'q':
            break

        if query.lower() == 'clear':
            chat_memory = []
            print("Chat history cleared.")
            continue

        if not validate_query_with_llm(query):
            print("The query does not appear to be related to movies or TV series. Please ask a relevant question.")
            continue
        
        print("\nProcessing your query...")
        chat_memory.append(f"User: {query}")
        if len(chat_memory) > chat_count:
            chat_memory = chat_memory[-chat_count:]
        
        try:
            result = get_movie_information(query, chat_history=chat_memory)
            
            bot_response = result['response']
            chat_memory.append(f"Bot: {bot_response}")
            
            print("\nRESPONSE:")
            print(f"A: {bot_response}")
            

        except Exception as e:
            print(f"Error processing query: {e}")