In [25]:
import streamlit as st
from pymongo import MongoClient
from textblob import TextBlob
from collections import Counter
import nltk
from nltk.corpus import stopwords
import matplotlib.pyplot as plt

nltk.download('punkt')
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# ---------- DATABASE CONNECTION ---------- #
def connect_to_mongodb():
    client = MongoClient("mongodb://localhost:27017/")
    db = client["news_articles"]

    # Ensure text indexes exist
    db.nytimes.create_index([("title", "text"), ("abstract", "text")])
    db.newsapi.create_index([("title", "text"), ("abstract", "text")])
    db.guardian.create_index([("title", "text"), ("abstract", "text")])

    return db

# ---------- HELPER FUNCTIONS ---------- #
def fetch_articles_with_keyword(db, collection_name, keyword):
    return list(db[collection_name].find({"$text": {"$search": keyword}}))

def analyze_sentiment(text):
    blob = TextBlob(text)
    polarity = blob.sentiment.polarity
    if polarity > 0.1:
        return "Positive", polarity
    elif polarity < -0.1:
        return "Negative", polarity
    else:
        return "Neutral", polarity

def extract_drivers(texts):
    words = []
    for text in texts:
        blob = TextBlob(text)
        words += [word.lower() for word in blob.words if word.lower() not in stop_words and word.isalpha()]
    return [word for word, _ in Counter(words).most_common(10)]

def sentiment_scatter(title_scores, content_scores):
    plt.figure(figsize=(6, 4))
    plt.scatter(title_scores, content_scores, color='blue')
    plt.plot([-1, 1], [-1, 1], linestyle='--', color='gray')
    plt.xlabel("Title Sentiment")
    plt.ylabel("Content Sentiment")
    plt.title("Title vs. Content Sentiment")
    st.pyplot(plt)

# ---------- MAIN APP ---------- #
def run_sentiment_dashboard():
    db = connect_to_mongodb()
    # Add background and white box CSS
    st.markdown("""
    <style>
    [data-testid="stAppViewContainer"] > .main {
        background-image: url("https://www.shutterstock.com/image-vector/newspaper-background-torn-paper-style-600nw-2261765635.jpg");
        background-size: cover;
        background-position: center;
        background-repeat: no-repeat;
        background-attachment: fixed;
    }

    [data-testid="stHeader"] {
        background: rgba(255, 255, 255, 0.0);
    }

    .white-box {
        background-color: rgba(255, 255, 255, 0.96);
        padding: 2rem;
        border-radius: 18px;
        box-shadow: 3px 3px 15px rgba(0,0,0,0.15);
        max-width: 1100px;
        margin: 2rem auto;
    }

    h1, h2, h3 {
        text-align: center;
        color: #1f2937;
    }
    </style>
    """, unsafe_allow_html=True)

    # Open white box
    st.markdown("<div class='white-box'>", unsafe_allow_html=True)

    # Page title
    
    st.title("Sentiment Analyzer for News Articles")
    keyword = st.text_input("Enter a keyword to analyze sentiment across media:")

    if keyword:
        tabs = st.tabs(["NYTIMES", "FOXNEWS", "GUARDIAN"])
        sources = ["nytimes", "newsapi", "guardian"]

        for tab, source in zip(tabs, sources):
            with tab:
                articles = fetch_articles_with_keyword(db, source, keyword)

                if not articles:
                    st.warning("No articles found for this keyword.")
                    continue

                title_sentiments, content_sentiments, emotion_labels = [], [], []
                contents = []

                for article in articles:
                    title = article.get("title", "")
                    content = article.get("abstract", "")
                    contents.append(content)

                    t_label, t_score = analyze_sentiment(title)
                    c_label, c_score = analyze_sentiment(content)

                    title_sentiments.append(t_score)
                    content_sentiments.append(c_score)
                    emotion_labels.append(c_label)

                # 1. Common Emotion
                common_emotion = Counter(emotion_labels).most_common(1)[0][0]
                badge_color = "green" if common_emotion == "Positive" else "red" if common_emotion == "Negative" else "blue"
                st.success(f"Most common emotion: {common_emotion}", icon="✅")

                # 2. Key Drivers
                with st.expander(f"Key Drivers of {common_emotion} Sentiment"):
                    drivers = extract_drivers(contents)
                    st.write(drivers)

                # 3. Words that contribute to Polarized Tone
                with st.expander("Words that Contribute to a Polarized Tone"):
                    polarized = extract_drivers([c for c, l in zip(contents, emotion_labels) if l != "Neutral"])
                    st.write(polarized)

                # 4. Title vs Content Sentiment Plot
                st.markdown("**Title Sentiment vs Content Sentiment**")
                sentiment_scatter(title_sentiments, content_sentiments)
     
    # Close white box
    st.markdown("</div>", unsafe_allow_html=True)

# ---------- STREAMLIT ENTRY POINT ---------- #
if __name__ == '__main__':
    run_sentiment_dashboard()

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/suhasinisingh/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/suhasinisingh/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
