In [1]:

#Import Libraries
import requests
from bs4 import BeautifulSoup
import nltk
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from gtts import gTTS
import gradio as gr
import pandas as pd

# Download NLTK data for tokenization and summarization
nltk.download('punkt')
nltk.download('stopwords')

#  Test Imports
print("Libraries imported successfully!")

Libraries imported successfully!


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
import nltk

# Download required NLTK data
nltk.download('punkt')  # For sentence tokenization
nltk.download('stopwords')  # For stopword removal
nltk.download('punkt_tab')  # Newer NLTK versions might need this

# Verify it works
sample_text = "This is a test. It should split into sentences."
sentences = nltk.sent_tokenize(sample_text)
print("NLTK punkt test:", sentences)

NLTK punkt test: ['This is a test.', 'It should split into sentences.']


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [3]:
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from collections import defaultdict

# Download required NLTK data
nltk.download('punkt')
nltk.download('stopwords')

def summarize_text(text, num_sentences=2):
    if not text.strip():
        return "No content to summarize."

    sentences = sent_tokenize(text)
    if len(sentences) <= num_sentences:
        return text.strip()
    
    # Handle stopwords loading properly
    try:
        stop_words = set(stopwords.words("english"))
    except LookupError:
        nltk.download('stopwords')
        stop_words = set(stopwords.words("english"))

    # Tokenize words and remove stopwords
    words = [word.lower() for word in word_tokenize(text) if word.isalnum() and word.lower() not in stop_words]
    
    # Calculate word frequencies
    word_freq = defaultdict(int)
    for word in words:
        word_freq[word] += 1
    
    # Score sentences based on word frequency
    sentence_scores = defaultdict(int)
    for sentence in sentences:
        for word, freq in word_freq.items():
            if word in sentence.lower():
                sentence_scores[sentence] += freq
    
    # Sort sentences by score and return top ones
    summary_sentences = sorted(sentence_scores, key=sentence_scores.get, reverse=True)[:num_sentences]
    
    return " ".join(summary_sentences).strip()


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [4]:
import requests
from bs4 import BeautifulSoup
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.tokenize import sent_tokenize
import nltk

# Ensure NLTK resources are downloaded
nltk.download('vader_lexicon')
nltk.download('punkt')

def fetch_news_articles(company_name):
    search_url = f"https://news.google.com/rss/search?q={company_name}"
    headers = {"User-Agent": "Mozilla/5.0"}
    
    try:
        response = requests.get(search_url, headers=headers, timeout=5)
        if response.status_code != 200:
            return {"error": f"Failed to fetch news (Status: {response.status_code})"}
    
        soup = BeautifulSoup(response.content, "xml")
        items = soup.find_all("item")[:10]  # Fetch top 10 articles
    
        articles = []
        for item in items:
            title = item.title.text.strip()
            link = item.link.text
            pub_date = item.pubDate.text if item.pubDate else "N/A"
            description = item.description.text if item.description else ""

            # Clean description by parsing HTML
            description = BeautifulSoup(description, "html.parser").get_text().strip()
            
            # If description is missing or same as title, fetch from the article link
            if not description or description == title:
                summary = fetch_summary_from_link(link)
            else:
                # Remove source names (e.g., " - CNN", " | BBC News")
                for separator in [" - ", " | "]:
                    if separator in description:
                        description = description.split(separator, 1)[-1].strip()

                # Extract the first meaningful sentence
                sentences = sent_tokenize(description)
                summary = sentences[0] if sentences else description

            articles.append({
                "title": title,
                "summary": summary,
                "link": link,
                "pub_date": pub_date
            })
    
        return articles

    except requests.exceptions.RequestException as e:
        return {"error": f"Network error: {str(e)}"}

# Fetch full article content if description is missing
def fetch_summary_from_link(url):
    try:
        headers = {"User-Agent": "Mozilla/5.0"}
        response = requests.get(url, headers=headers, timeout=5)
        
        if response.status_code != 200:
            return "No summary available"

        soup = BeautifulSoup(response.text, "html.parser")
        paragraphs = soup.find_all("p")
        text = " ".join(p.text.strip() for p in paragraphs[:3])  # Get first 3 paragraphs

        if text:
            sentences = sent_tokenize(text)
            return sentences[0] if sentences else text
    except Exception as e:
        return f"Error fetching summary: {str(e)}"

# Sentiment Analysis
def process_articles(articles):
    for article in articles:
        article["sentiment"] = get_sentiment(article["summary"])
        print(f"Sentiment for '{article['summary'][:50]}...': {article['sentiment']}")
    return articles

def get_sentiment(text):
    analyzer = SentimentIntensityAnalyzer()
    scores = analyzer.polarity_scores(text)
    compound = scores["compound"]
    if compound >= 0.05:
        return "Positive"
    elif compound <= -0.05:
        return "Negative"
    else:
        return "Neutral"

# Test the Updated Approach
company = "Tesla"
articles = fetch_news_articles(company)
if "error" not in articles:
    processed_articles = process_articles(articles)
    print(f"\nProcessed {len(processed_articles)} articles for {company}:")
    for i, article in enumerate(processed_articles, 1):
        print(f"{i}. Title: {article['title']}")
        print(f"   Summary: {article['summary']}")
        print(f"   Sentiment: {article['sentiment']}")
        print(f"   Link: {article['link']}")
        print()
else:
    print(articles["error"])


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Sentiment for 'Tesla loses ground as Chinese EVs dominate global ...': Negative
Sentiment for 'Tesla owners’ personal data leaked amid anti-Musk ...': Negative
Sentiment for 'Extreme Domestic Terrorism ..., says Elon Musk as ...': Negative
Sentiment for 'Tesla cars torched,owners data leaked:Anti-Musk pr...': Negative
Sentiment for 'Tesla’s India strategy unveiled: Contract manufact...': Neutral
Sentiment for 'Tesla's China rival Zeekr to roll out advanced dri...': Positive
Sentiment for 'Mint  Mint...': Neutral
Sentiment for 'Tesla's Most Bullish Analyst Says 2 Things Need to...': Neutral
Sentiment for 'Pink paint attack on Tesla dealership: 2 arrested ...': Negative
Sentiment for 'Kim Kardashian poses intimately with Tesla robot—a...': Positive

Processed 10 articles for Tesla:
1. Title: Tesla loses ground as Chinese EVs dominate global markets - Rest of World
   Summary: Tesla loses ground as Chinese EVs dominate global markets  Rest of World
   Sentiment: Negative
   Link: https://

In [6]:
# Comparative Analysis Function
from collections import Counter

def comparative_analysis(articles):
    # Sentiment Distribution
    sentiments = [article["sentiment"] for article in articles]
    sentiment_dist = Counter(sentiments)
    
    # Extract Topics (simplified: keywords from summaries)
    stop_words = set(stopwords.words("english"))
    all_summaries = " ".join(article["summary"] for article in articles).lower()
    words = [word for word in word_tokenize(all_summaries) if word.isalnum() and word not in stop_words]
    common_topics = Counter(words).most_common(5)  # Top 5 frequent words as topics
    
    # Comparative Insights
    insights = []
    pos_articles = [a for a in articles if a["sentiment"] == "Positive"]
    neg_articles = [a for a in articles if a["sentiment"] == "Negative"]
    
    if pos_articles:
        pos_example = pos_articles[0]["title"]
        insights.append(f"Positive coverage (e.g., '{pos_example}') focuses on achievements or growth.")
    if neg_articles:
        neg_example = neg_articles[0]["title"]
        insights.append(f"Negative coverage (e.g., '{neg_example}') highlights challenges or controversies.")
    if sentiment_dist["Neutral"] > 0:
        insights.append(f"Neutral articles ({sentiment_dist['Neutral']}) provide factual updates.")
    
    # Structured Report
    report = {
        "Company": articles[0]["title"].split(" - ")[0].split(" ")[0],  # Extract company name
        "Sentiment Distribution": dict(sentiment_dist),
        "Common Topics": [word for word, _ in common_topics],
        "Comparative Insights": insights
    }
    return report

# Full Processing and Output
def process_and_analyze(company):
    articles = fetch_news_articles(company)
    if "error" in articles:
        return articles
    
    processed_articles = process_articles(articles)
    report = comparative_analysis(processed_articles)
    
    # Print Articles
    print(f"Processed {len(processed_articles)} articles for {company}:")
    for i, article in enumerate(processed_articles, 1):
        print(f"{i}. Title: {article['title']}")
        print(f"   Summary: {article['summary']}")
        print(f"   Sentiment: {article['sentiment']}")
        print(f"   Link: {article['link']}")
        print()
    
    # Print Comparative Analysis
    print("Comparative Analysis:")
    print(f"Company: {report['Company']}")
    print("Sentiment Distribution:")
    for sent, count in report["Sentiment Distribution"].items():
        print(f"  {sent}: {count}")
    print(f"Common Topics: {', '.join(report['Common Topics'])}")
    print("Insights:")
    for insight in report["Comparative Insights"]:
        print(f"  - {insight}")
    
    return processed_articles, report

# Test Step 4
company = "Tesla"
articles, report = process_and_analyze(company)

Sentiment for 'Tesla loses ground as Chinese EVs dominate global ...': Negative
Sentiment for 'Tesla owners’ personal data leaked amid anti-Musk ...': Negative
Sentiment for 'Extreme Domestic Terrorism ..., says Elon Musk as ...': Negative
Sentiment for 'Tesla cars torched,owners data leaked:Anti-Musk pr...': Negative
Sentiment for 'Tesla’s India strategy unveiled: Contract manufact...': Neutral
Sentiment for 'Tesla's China rival Zeekr to roll out advanced dri...': Positive
Sentiment for 'Mint  Mint...': Neutral
Sentiment for 'Tesla's Most Bullish Analyst Says 2 Things Need to...': Neutral
Sentiment for 'Pink paint attack on Tesla dealership: 2 arrested ...': Negative
Sentiment for 'Kim Kardashian poses intimately with Tesla robot—a...': Positive
Processed 10 articles for Tesla:
1. Title: Tesla loses ground as Chinese EVs dominate global markets - Rest of World
   Summary: Tesla loses ground as Chinese EVs dominate global markets  Rest of World
   Sentiment: Negative
   Link: https://n

In [7]:
# Simplified TTS Function (No Translation Dependency)
from gtts import gTTS
import os

def generate_hindi_tts(report, filename="output.mp3"):
    # Hindi template for summary (manually crafted for simplicity)
    hindi_template = (
        "कंपनी: {}. "  # Company
        "भावना: सकारात्मक {}, नकारात्मक {}, तटस्थ {}. "  # Sentiment: Positive, Negative, Neutral
        "अंतर्दृष्टि: {}"  # Insights
    )
    
    # Extract sentiment counts
    sent_dist = report["Sentiment Distribution"]
    positive = sent_dist.get("Positive", 0)
    negative = sent_dist.get("Negative", 0)
    neutral = sent_dist.get("Neutral", 0)
    
    # Join insights into a single string
    insights = " ".join(report["Comparative Insights"])
    
    # Format the Hindi text
    hindi_text = hindi_template.format(report["Company"], positive, negative, neutral, insights)
    print(f"Hindi Text: {hindi_text}")  # Debug to confirm
    
    # Generate TTS in Hindi
    tts = gTTS(text=hindi_text, lang="hi", slow=False)
    tts.save(filename)
    return filename

# Updated Process and Analyze with Simplified TTS
def process_and_analyze_with_tts(company):
    articles = fetch_news_articles(company)
    if "error" in articles:
        return articles
    
    processed_articles = process_articles(articles)
    report = comparative_analysis(processed_articles)
    
    # Generate TTS in Hindi
    audio_file = generate_hindi_tts(report, "summary_hindi.mp3")
    
    # Print Articles
    print(f"Processed {len(processed_articles)} articles for {company}:")
    for i, article in enumerate(processed_articles, 1):
        print(f"{i}. Title: {article['title']}")
        print(f"   Summary: {article['summary']}")
        print(f"   Sentiment: {article['sentiment']}")
        print(f"   Link: {article['link']}")
        print()
    
    # Print Comparative Analysis
    print("Comparative Analysis:")
    print(f"Company: {report['Company']}")
    print("Sentiment Distribution:")
    for sent, count in report["Sentiment Distribution"].items():
        print(f"  {sent}: {count}")
    print(f"Common Topics: {', '.join(report['Common Topics'])}")
    print("Insights:")
    for insight in report["Comparative Insights"]:
        print(f"  - {insight}")
    print(f"Hindi TTS Audio saved as: {audio_file}")
    
    # Display audio in Jupyter
    from IPython.display import Audio, display
    display(Audio(audio_file))
    
    return processed_articles, report, audio_file

#  Test the Simplified TTS
company = "Tesla"
articles, report, audio_file = process_and_analyze_with_tts(company)

Sentiment for 'Tesla loses ground as Chinese EVs dominate global ...': Negative
Sentiment for 'Tesla owners’ personal data leaked amid anti-Musk ...': Negative
Sentiment for 'Extreme Domestic Terrorism ..., says Elon Musk as ...': Negative
Sentiment for 'Tesla cars torched,owners data leaked:Anti-Musk pr...': Negative
Sentiment for 'Tesla’s India strategy unveiled: Contract manufact...': Neutral
Sentiment for 'Tesla's China rival Zeekr to roll out advanced dri...': Positive
Sentiment for 'Mint  Mint...': Neutral
Sentiment for 'Tesla's Most Bullish Analyst Says 2 Things Need to...': Neutral
Sentiment for 'Pink paint attack on Tesla dealership: 2 arrested ...': Negative
Sentiment for 'Kim Kardashian poses intimately with Tesla robot—a...': Positive
Hindi Text: कंपनी: Tesla. भावना: सकारात्मक 2, नकारात्मक 5, तटस्थ 3. अंतर्दृष्टि: Positive coverage (e.g., 'Tesla's China rival Zeekr to roll out advanced driver assistance-system for free - CNBC') focuses on achievements or growth. Negative co

In [8]:
import gradio as gr

def gradio_process(company, progress=gr.Progress()):
    progress(0, desc="Starting analysis...")
    
    # Simulate processing steps
    progress(0.2, desc="Fetching news articles...")
    articles, report, audio_file = process_and_analyze_with_tts(company)
    
    if "error" in articles:
        progress(1.0, desc="Analysis failed.")
        return articles["error"], None, None
    
    progress(0.5, desc="Generating report...")
    # Generate the output text
    output_text = f"### Processed {len(articles)} articles for {company}:\n\n"
    for i, article in enumerate(articles, 1):
        output_text += (
            f"**{i}. Title:** {article['title']}\n"
            f"**Summary:** {article['summary']}\n"
            f"**Sentiment:** {article['sentiment']}\n"
            f"**Link:** [{article['link']}]({article['link']})\n\n"
        )
    
    output_text += "### Comparative Analysis:\n"
    output_text += f"**Company:** {report['Company']}\n"
    output_text += "**Sentiment Distribution:**\n"
    for sent, count in report["Sentiment Distribution"].items():
        output_text += f"  {sent}: {count}\n"
    output_text += f"**Common Topics:** {', '.join(report['Common Topics'])}\n"
    output_text += "**Insights:**\n"
    for insight in report["Comparative Insights"]:
        output_text += f"  - {insight}\n"
    
    progress(0.8, desc="Generating audio summary...")
    # Simulate audio generation
    progress(1.0, desc="Analysis complete.")
    
    return output_text, audio_file, articles

# Custom CSS for both light and dark modes
custom_css = """
/* Import Google Font */
@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;500;700&display=swap');

/* Gradient background for light mode */
.gradio-container {
    background: linear-gradient(135deg, #f5f7fa, #c3cfe2);
    font-family: 'Roboto', sans-serif;
    padding: 20px;
    border-radius: 10px;
}

/* Dark mode overrides */
.dark .gradio-container {
    background: linear-gradient(135deg, #2c3e50, #34495e);
    color: #ffffff;
}

/* Title styling */
h1 {
    color: #4CAF50;
    text-align: center;
    font-weight: 700;
}

/* Dark mode title color */
.dark h1 {
    color: #4CAF50;
}

/* Markdown text styling */
.markdown-text {
    background-color: white;
    padding: 15px;
    border-radius: 5px;
    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
    animation: fadeIn 1s ease-in-out;
}

/* Dark mode Markdown text styling */
.dark .markdown-text {
    background-color: #2c3e50;
    color: #ffffff;
}

/* Audio player styling */
.audio-player {
    margin-top: 20px;
    animation: fadeIn 1s ease-in-out;
}

/* Button hover animation */
button {
    transition: transform 0.2s ease, box-shadow 0.2s ease;
}
button:hover {
    transform: scale(1.05);
    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
}

/* Dark mode button styling */
.dark button {
    background-color: #4CAF50;
    color: white;
}

/* Fade-in animation */
@keyframes fadeIn {
    from { opacity: 0; }
    to { opacity: 1; }
}
"""

# Define the Gradio interface
with gr.Blocks(css=custom_css) as demo:
    gr.Markdown("# News Summarization and Sentiment Analysis")
    gr.Markdown("Enter a company name to get a sentiment report and Hindi audio summary.")
    
    with gr.Row():
        with gr.Column():
            company_input = gr.Textbox(label="Enter Company Name", placeholder="e.g., Tesla", interactive=True)
            submit_button = gr.Button("Analyze", variant="primary")
        with gr.Column():
            # No ProgressBar component, progress is handled within the function
            pass
    
    with gr.Row():
        with gr.Column():
            output_markdown = gr.Markdown(label="Sentiment Report", elem_classes=["markdown-text"])
        with gr.Column():
            output_audio = gr.Audio(label="Hindi TTS Output", elem_classes=["audio-player"])
            output_json = gr.JSON(label="Raw Data (for debugging)", visible=False)
    
    # Add examples
    examples = gr.Examples(
        examples=["Tesla", "Microsoft", "Apple"],
        inputs=[company_input],
        outputs=[output_markdown, output_audio, output_json],
        fn=gradio_process,
        cache_examples=True
    )
    
    # Link the button to the function
    submit_button.click(
        gradio_process,
        inputs=[company_input],
        outputs=[output_markdown, output_audio, output_json],
        show_progress=True
    )

# Launch the interface
demo.launch()

* Running on local URL:  http://127.0.0.1:7860
Caching examples at: 'c:\Users\DELL\OneDrive\Desktop\Assignment\.gradio\cached_examples\15'
Sentiment for 'Tesla loses ground as Chinese EVs dominate global ...': Negative
Sentiment for 'Tesla owners’ personal data leaked amid anti-Musk ...': Negative
Sentiment for 'Extreme Domestic Terrorism ..., says Elon Musk as ...': Negative
Sentiment for 'Tesla cars torched,owners data leaked:Anti-Musk pr...': Negative
Sentiment for 'Tesla’s India strategy unveiled: Contract manufact...': Neutral
Sentiment for 'Tesla's China rival Zeekr to roll out advanced dri...': Positive
Sentiment for 'Mint  Mint...': Neutral
Sentiment for 'Tesla's Most Bullish Analyst Says 2 Things Need to...': Neutral
Sentiment for 'Pink paint attack on Tesla dealership: 2 arrested ...': Negative
Sentiment for 'Kim Kardashian poses intimately with Tesla robot—a...': Positive
Hindi Text: कंपनी: Tesla. भावना: सकारात्मक 2, नकारात्मक 5, तटस्थ 3. अंतर्दृष्टि: Positive coverage (e.g.

Sentiment for 'Microsoft is replacing its chief people officer as...': Neutral
Sentiment for 'Microsoft’s Free Windows Upgrade—‘Don’t Wait Until...': Positive
Sentiment for 'Adobe and Microsoft Empower Marketers with AI Agen...': Neutral
Sentiment for 'Microsoft confirms Amy Coleman as new Chief People...': Neutral
Sentiment for 'Taboola & Microsoft expand partnership to Outlook ...': Positive
Sentiment for 'Microsoft Malaysia to launch three data centres in...': Neutral
Sentiment for 'Bill Gates says Satya Nadella was ‘almost’ passed ...': Neutral
Sentiment for 'Microsoft: March Windows updates mistakenly uninst...': Negative
Sentiment for 'Microsoft Ability Summit 2025: Accessibility in th...': Positive
Processed 10 articles for Microsoft:
1. Title: Microsoft is replacing its chief people officer as it rethinks performance reviews. Read CEO Satya Nadella's email. - Business Insider
   Summary: Microsoft is replacing its chief people officer as it rethinks performance reviews.
   Sent

Sentiment for 'Apple iPhone 15 is available for just Rs 28,205 on...': Positive
Sentiment for 'GSMArena.com news  GSMArena.com...': Neutral
Sentiment for 'Apple slams EU order to open up, warns it’s bad fo...': Negative
Sentiment for 'Google hits out at Apple with the ‘affordable’ Pix...': Neutral
Sentiment for 'Google Pixel 9a vs Apple iPhone 16e: Which one off...': Positive
Sentiment for 'Google rivals Apple iPhone 16e with AI-enhanced Pi...': Neutral
Sentiment for 'Apple deepens presence in India; in talks with Wip...': Neutral
Sentiment for 'Apple looks to deepen India presence; in talks wit...': Positive
Sentiment for 'Apple In Talks With Wipro Enterprises, Lakshmi Mac...': Positive
Sentiment for 'Apple's foldable device may cost twice that of iPh...': Neutral
Hindi Text: कंपनी: Apple. भावना: सकारात्मक 4, नकारात्मक 1, तटस्थ 5. अंतर्दृष्टि: Positive coverage (e.g., 'Apple iPhone 15 is available for just Rs 28,205 on Amazon; know how to grab the deal - The Times of India') focuses o


To create a public link, set `share=True` in `launch()`.


