In [2]:
import os
from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from typing import List, Dict, Any, Tuple

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
load_dotenv()

class NewsSummarizer:
    """Core class for loading and summarizing news articles"""
    
    def __init__(self, model_name="gpt-3.5-turbo-16k", temperature=0.5):
        """Initialize the news summarizer with specified model parameters"""
        self.llm = ChatOpenAI(
            model=model_name,
            temperature=temperature
        )
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200
        )
    
    def load_articles(self, urls: List[str]) -> Tuple[List[Dict], List[str]]:
        """
        Load and process content from multiple URLs
        
        Args:
            urls: List of URLs to process
            
        Returns:
            Tuple containing list of document splits and list of error messages
        """
        all_docs = []
        errors = []
        
        for url in urls:
            if url.strip():  # Skip empty URLs
                try:
                    loader = WebBaseLoader(url)
                    docs = loader.load()
                    # Add source metadata
                    for doc in docs:
                        doc.metadata["source"] = url
                    all_docs.extend(docs)
                except Exception as e:
                    errors.append(f"Error loading {url}: {str(e)}")
        
        # Split documents into chunks if documents were loaded
        if all_docs:
            splits = self.text_splitter.split_documents(all_docs)
            return splits, errors
        else:
            return [], errors
    
    def create_summary_chain(self):
        """Create the LangChain chain for summarizing news articles"""
        # Create the summary template
        summary_template = """
        You are a professional news editor and journalist. Your task is to create a consolidated news article based on the following sources.
        
        SOURCES:
        {context}
        
        Follow these guidelines to create a proper news article:
        1. Write in the inverted pyramid structure with the most important information at the beginning
        2. Include a compelling headline
        3. Add a byline as "AI News Summarizer"
        4. Start with a strong lead paragraph covering the 5W1H (who, what, when, where, why, how)
        5. Follow with supporting details and background information
        6. End with a conclusion that provides context or future implications
        7. Maintain an objective, journalistic tone
        8. Include any significant quotes from the original articles
        9. Synthesize information from all sources to provide a comprehensive view
        10. Format the output in markdown
        
        CONSOLIDATED NEWS ARTICLE:
        """
        
        # Create prompt from template
        prompt = PromptTemplate.from_template(summary_template)
        
        # Create chain
        chain = (
            {"context": lambda docs: "\n\n".join([doc.page_content for doc in docs])}
            | prompt
            | self.llm
            | StrOutputParser()
        )
        
        return chain
    
    def summarize(self, urls: List[str]) -> Dict[str, Any]:
        """
        Generate a news summary from the provided URLs
        
        Args:
            urls: List of URLs to news articles
            
        Returns:
            Dictionary with summary and errors
        """
        result = {
            "summary": None,
            "errors": []
        }
        
        # Load and process documents
        docs, errors = self.load_articles(urls)
        result["errors"] = errors
        
        if docs:
            try:
                # Create and run the chain
                chain = self.create_summary_chain()
                summary = chain.invoke(docs)
                result["summary"] = summary
            except Exception as e:
                result["errors"].append(f"Summarization error: {str(e)}")
        
        return result

In [7]:
news = ["https://www.bbc.com/news/articles/clyv789ejgxo", "https://www.bbc.com/news/articles/c04ezlyq4y0o","https://www.bbc.com/news/articles/c70nw7v5ykyo"]

summarizer = NewsSummarizer()
print(summarizer.summarize(news)["summary"])

# Sixteen Dead After Tornadoes Tear Through US States

**AI News Summarizer**

At least 16 people have been reported dead after tornadoes tore through parts of Kentucky and Missouri. The tornado in Kentucky struck in Laurel County, with officials expecting the death toll to rise. In Missouri, 5,000 buildings were damaged, roofs destroyed, and power lines knocked down as a tornado hit St Louis. The National Weather Service reported tornadoes in neighbouring Illinois as well, with severe weather conditions stretching to the Atlantic coast.

St Louis Mayor Cara Spencer described the loss of life and destruction as "truly horrendous," emphasizing the need for community support and safety measures. A curfew was imposed in the most affected areas to prevent injuries and looting.

The US National Weather Service highlighted the widespread impact of the severe weather, with additional tornadoes hitting Illinois and weather conditions extending to the Atlantic coast.

---

# Trump's Frantic Wee

In [4]:
loader = WebBaseLoader("https://www.bbc.com/travel/article/20250516-monacos-new-neighbourhood-rising-out-of-the-sea")
loader.load()

[Document(metadata={'source': 'https://www.bbc.com/travel/article/20250516-monacos-new-neighbourhood-rising-out-of-the-sea', 'title': "Monaco's new €2bn neighbourhood rising out of the sea", 'description': "Built on reclaimed land, Monaco's new Mareterra district blends cutting-edge sustainability with scenic sea views, offering a fresh way to explore the principality.", 'language': 'en-GB'}, page_content='Monaco\'s new €2bn neighbourhood rising out of the seaSkip to contentBritish Broadcasting CorporationHomeNewsSportBusinessInnovationCultureArtsTravelEarthAudioVideoLiveHomeNewsIsrael-Gaza WarWar in UkraineUS & CanadaUKUK PoliticsEnglandN. IrelandN. Ireland PoliticsScotlandScotland PoliticsWalesWales PoliticsAfricaAsiaChinaIndiaAustraliaEuropeLatin AmericaMiddle EastIn PicturesBBC InDepthBBC VerifySportBusinessExecutive LoungeTechnology of BusinessFuture of BusinessInnovationTechnologyScience & HealthArtificial IntelligenceAI v the MindCultureFilm & TVMusicArt & DesignStyleBooksEntert