In [1]:
from dotenv import load_dotenv
import os
import google.generativeai as genai

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()

True

In [121]:
from typing_extensions import TypedDict

class SubQuery(TypedDict):
        query: str
        status: str
        reference: str
        attempts: int
        result: dict
        
class AggregatedResults(TypedDict):
        summary: str
        references: str

class State(TypedDict):
        original_query: str
        subqueries: list[SubQuery]
        aggregated_results: AggregatedResults
        output: str

In [77]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest")


In [78]:
import re
from langchain_core.prompts import ChatPromptTemplate

In [79]:
QUERY_RECTIFY_TEMPLATE= """
        You are an assistant specialized in rewriting and correcting the input queries by user.
        You review the query for spelling, grammar and reconstruct it, ensuring it is correct and the original message is preserved.
        However, you let the query remain unchanged if nothing is to be corrected.

        User Query: "{query}"
        """

rectify_prompt = ChatPromptTemplate.from_template(QUERY_RECTIFY_TEMPLATE)

def rectify_query(state: State)-> State:
    out = llm.invoke(rectify_prompt.format(query=state["original_query"]))
    original_query = out.content
    return {"original_query": original_query}

In [80]:

QUERY_PARSING_TEMPLATE= """
        You are an assistant specialized in breaking user queries into smaller, logical subqueries.
        Decompose the following query into distinct, manageable subqueries:

        User Query: "{query}"

        Provide the subqueries as a numbered list, one per line.
        Example input: What is X and how does it affect Y?
        Example output:
        1. What is X?
        2. How does X affect Y?"""
        

parsing_prompt = ChatPromptTemplate.from_template(QUERY_PARSING_TEMPLATE)

def parse_query(state: State)-> State:
    out = llm.invoke(parsing_prompt.format(query=state["original_query"]))
    subqueries = []
    for subquery in out.content.split("\n"):
            if subquery.strip():
                    s = SubQuery()
                    s["query"]=re.sub(r"^\d+\.\s+", "", subquery)
                    s["attempts"]=0
                    s["status"]="pending"
                    subqueries.append(s)
    return {"subqueries": subqueries}

In [None]:
from duckduckgo_search import DDGS
from bs4 import BeautifulSoup
import requests


SUMMARIZE_TEMPLATE = """
You are an AI assistant tasked with summarizing lengthy articles.
Summarize the following article content in a concise and clear way:

Article Content:
{content}
"""

summarize_prompt = ChatPromptTemplate.from_template(SUMMARIZE_TEMPLATE)

def search_duckduck_go(query):
    result = DDGS().text(
        keywords=query,
        region="wt-wt",
        safesearch="off",
        timelimit="7d",
        max_results=10
    )
    
    return result

def extract_content(url: str):
    response=requests.get(url, timeout=10)
    if response.status_code!=200:
        print(f"❌ Failed to fetch {url}, status code: {response.status_code}")
        return ""

    soup = BeautifulSoup(response.content, 'html.parser')
    
    for script in soup(["script", "style", "footer", "nav", "aside"]):
            script.extract()
            
    text = soup.get_text(separator='\n')
    text = re.sub(r'\n+', '\n', text).strip()
    
    return text[:5000] 

def retrieve_content(state: State)-> State:
    for subquery in state["subqueries"]:
        if subquery.get("status") == "completed":
            continue
        subquery["status"] = "in_progress"
        subquery["attempts"] += 1
        
        try:
            print(f"🔍 Searching for subquery: '{subquery['query']}' (Attempt {subquery['attempts']})")
            search_results = search_duckduck_go(subquery['query'])
            
            result = dict()
            for search in search_results:
                if "content" not in result:
                    if "content" not in result:
                        result["content"] = extract_content(search["href"])
                    if result.get("content"):
                        out = llm.invoke(summarize_prompt.format(content=result["content"]))
                        result["summary"] = out.content
                        result["reference"] = search["href"]
                        break
                    else:
                        result["summary"] = "Content extraction failed."
            
            subquery["result"] = result
            subquery["status"] = "completed"
            
        except Exception as e:
            subquery["status"] = "failed"
            print(f"❌ Failed to retrieve results for subquery: '{subquery['query']}' | Error: {e}")
        return state

In [119]:
AGGREGATE_TEMPLATE = """
You are an AI assistant tasked with aggregating and summarizing multiple content pieces into a single, concise, and coherent summary.
Ensure that the summary maintains clarity and relevance.

Individual Summaries:
{summaries}

Generate a final cohesive summary:
"""

aggregate_prompt = ChatPromptTemplate.from_template(AGGREGATE_TEMPLATE)

def aggregate_results(state: State)-> State:
    summaries = []
    references = []
    result = AggregatedResults()
    
    for subquery in state["subqueries"]:
        if subquery["status"] == "completed" and "result" in subquery:
            summary = subquery["result"].get("summary", "")
            reference = subquery["result"].get("reference", "")
            
            if summary:
                summaries.append(f"- {summary}")
            if reference:
                references.append(reference)
    
    try:
        if summaries:
            combined_summaries = "\n".join(summaries)
            out = llm.invoke(aggregate_prompt.format(summaries=combined_summaries))
            result["summary"] = out.content
        else:
            result["summary"] = "No valid summaries were generated from the subqueries."
        
        result["references"] = ";".join(references)
    
    except Exception as e:
        state["final_summary"] = f"❌ Failed to generate final summary: {e}"
        print(f"❌ Error during final summarization: {e}")
    
    return {"aggregated_results": result}

In [122]:
FINAL_OUTPUT_TEMPLATE = """
You are an expert AI assistant tasked with presenting a final, polished answer based on aggregated summaries from multiple reliable sources.

### Original Query:  
"{query}"

### Aggregated Summaries:  
{aggregated_summaries}

### Task:  
1. Craft a **clear, concise, and informative response** directly addressing the original query.  
2. Ensure the response is **well-structured**, **coherent**, and captures the **key insights** from the aggregated summaries.  
3. Provide a **list of references** for further reading, formatted as hyperlinks.

### Format:  
**Answer:**  
[Your response here]

**References:**  
1. [Reference 1](Link 1)  
2. [Reference 2](Link 2)  
3. [Reference 3](Link 3)
"""

final_output_prompt = ChatPromptTemplate.from_template(FINAL_OUTPUT_TEMPLATE)

def generate_final_output(state: State) -> State:
    output=""
    try:
        out = llm.invoke(final_output_prompt.format(
            query=state["original_query"],
            aggregated_summaries=state["aggregated_results"]
        ))
        
        output = out.content
    
    except Exception as e:
        state["final_output"] = f"❌ Failed to generate final output: {e}"
        print(f"❌ Error during final output generation: {e}")
    
    return {"output": output}


In [124]:
from langgraph.graph import StateGraph, START, END

graph_builder = StateGraph(State)
graph_builder.add_node("rectify", rectify_query)
graph_builder.add_node("parse", parse_query)
graph_builder.add_node("retrieve", retrieve_content)
graph_builder.add_node("aggregate", aggregate_results)
graph_builder.add_node("final_output", generate_final_output)

graph_builder.add_edge(START, "rectify")
graph_builder.add_edge("rectify", "parse")
graph_builder.add_edge("parse", "retrieve")
graph_builder.add_edge("retrieve", "aggregate")
graph_builder.add_edge("aggregate", "final_output")

graph = graph_builder.compile()

In [125]:
state = graph.invoke({"original_query": "What are the laest breakthroughs in AI and what are there ethical implications?"})

print(state)

🔍 Searching for subquery: 'What are the latest breakthroughs in AI?' (Attempt 1)
🔍 Searching for subquery: 'What are the ethical implications of the latest AI breakthroughs?' (Attempt 1)
{'original_query': 'What are the latest breakthroughs in AI, and what are their ethical implications?\n', 'subqueries': [{'query': 'What are the latest breakthroughs in AI?', 'attempts': 1, 'status': 'completed', 'result': {'content': '6 Game-Changing AI Breakthroughs That Defined 2024\nSubscribe To Newsletters\nBETA\nTHIS IS A BETA EXPERIENCE. OPT-OUT\xa0\nHERE\nMore From Forbes\nDec 20, 2024,\n11:43pm EST\nSpace-Age Mobile Is Here To Connect Billions And Unlock Global Markets\nDec 20, 2024,\n01:47am EST\nThe Future Of Energy: How Shell Is Harnessing AI To Transform The Energy Sector\nDec 19, 2024,\n08:30am EST\nRetailers: How To Enhance User Confidence In Your Digital Experiences\nDec 19, 2024,\n02:39am EST\n6 Revolutionary Tech Developments That Defined 2024\nDec 18, 2024,\n08:30am EST\nProtect Your

In [126]:
state

{'original_query': 'What are the latest breakthroughs in AI, and what are their ethical implications?\n',
 'subqueries': [{'query': 'What are the latest breakthroughs in AI?',
   'attempts': 1,
   'status': 'completed',
   'result': {'content': '6 Game-Changing AI Breakthroughs That Defined 2024\nSubscribe To Newsletters\nBETA\nTHIS IS A BETA EXPERIENCE. OPT-OUT\xa0\nHERE\nMore From Forbes\nDec 20, 2024,\n11:43pm EST\nSpace-Age Mobile Is Here To Connect Billions And Unlock Global Markets\nDec 20, 2024,\n01:47am EST\nThe Future Of Energy: How Shell Is Harnessing AI To Transform The Energy Sector\nDec 19, 2024,\n08:30am EST\nRetailers: How To Enhance User Confidence In Your Digital Experiences\nDec 19, 2024,\n02:39am EST\n6 Revolutionary Tech Developments That Defined 2024\nDec 18, 2024,\n08:30am EST\nProtect Your Customers From Phishing This Holiday Season And Beyond\nDec 18, 2024,\n04:42am EST\nIEEE Trips To Singapore, Japan, New Jersey, San Francisco And Italy\nDec 18, 2024,\n01:30am 

In [72]:
%pip install duckduckgo_search pandas bs4

Collecting bs4
  Downloading bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)
Downloading bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)
Installing collected packages: bs4
Successfully installed bs4-0.0.2
Note: you may need to restart the kernel to use updated packages.


In [70]:
from duckduckgo_search import DDGS
import pandas as pd

def search_duckduck_go(query):
    result = DDGS().text(
        keywords=query,
        region="wt-wt",
        safesearch="off",
        timelimit="7d",
        max_results=10
    )
    
    return pd.DataFrame(result)

Unnamed: 0,title,href,body
0,Trailer Park Boys - Wikipedia,https://en.wikipedia.org/wiki/Trailer_Park_Boys,Trailer Park Boys is a Canadian mockumentary t...
1,Trailer Park Boys (TV Series 2001-2018) - IMDb,https://www.imdb.com/title/tt0290988/,Trailer Park Boys: Created by Mike Clattenburg...
2,SwearNet.com is Home of the Trailer Park Boys!,https://www.swearnet.com/,SWEARNET.COM is the streaming comedy network f...
3,Standing on the Shoulders of Kitties Trailer S...,https://www.yahoo.com/entertainment/standing-s...,Blue Fox Entertainment has shared the Standing...
4,Trailer Park Boys,https://www.netflix.com/title/70153385,The boys cause a panic in the trailer park whe...
5,Trailer Park Boys - YouTube,https://www.youtube.com/user/tpbSwearNet,The official Trailer Park Boys YouTube channel...
6,Trailer Park Boys - SwearNet,https://www.swearnet.com/shows/trailer-park-boys,Watch the legendary Canadian comedy about the ...
7,Trailer Park Boys (TV Series 2001-2018) - IMDb,https://www.imdb.com/title/tt0290988/fullcredits/,Trailer Park Boys (TV Series 2001-2018) cast a...
8,Trailer Park Boys - streaming tv show online -...,https://www.justwatch.com/us/tv-show/trailer-p...,"Trailer Park Boys - watch online: streaming, b..."
9,List of Trailer Park Boys episodes - Wikipedia,https://en.wikipedia.org/wiki/List_of_Trailer_...,Trailer Park Boys is a Canadian mockumentary t...


In [None]:
from bs4 import BeautifulSoup
import requests

def extract_content(url: str):
    response=requests.get(url, timeout=10)
    if response.status_code!=200:
        print(f"❌ Failed to fetch {url}, status code: {response.status_code}")
        return ""

    soup = BeautifulSoup(response.content, 'html.parser')
    
    for script in soup(["script", "style", "footer", "nav", "aside"]):
            script.extract()
            
    text = soup.get_text(separator='\n')
    text = re.sub(r'\n+', '\n', text).strip()
    
    return text[:5000] 

In [74]:
extract_content("https://en.wikipedia.org/wiki/Trailer_Park_Boys")

'Trailer Park Boys - Wikipedia\nJump to content\nSearch\nSearch\nTrailer Park Boys\n20 languages\nAlemannisch\nDansk\nDeitsch\nDeutsch\nΕλληνικά\nEspañol\nFrançais\n한국어\nItaliano\nMagyar\nNederlands\n日本語\nNorsk bokmål\nPolski\nPortuguês\nРусский\nSimple English\nSrpskohrvatski / српскохрватски\nSuomi\nSvenska\nEdit links\nFrom Wikipedia, the free encyclopedia\nCanadian mockumentary television series\nThis article \nneeds additional citations for \nverification\n.\n Please help \nimprove this article\n by \nadding citations to reliable sources\n. Unsourced material may be challenged and removed.\nFind sources:\n\xa0\n"Trailer Park Boys"\n\xa0–\xa0\nnews\n\xa0\n·\n \nnewspapers\n\xa0\n·\n \nbooks\n\xa0\n·\n \nscholar\n\xa0\n·\n \nJSTOR\n \n(\nJune 2024\n)\n (\nLearn how and when to remove this message\n)\nTrailer Park Boys\nGenre\nMockumentary\nSitcom\nBlack comedy\nCrime comedy\nCreated by\nMike Clattenburg\nStarring\nJohn Paul Tremblay\nRobb Wells\nMike Smith\nJohn Dunsworth\nPatrick R