<a href="https://colab.research.google.com/github/ramindersinghusd/aai-520-in3-project/blob/main/StockSentimentAgent_MultiStock_Member1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Member 1: Data + Sentiment Agent (Multi-Stock Version)

This notebook implements my task as member of Group2:
- Build Data Retrieval Agent (Yahoo Finance + NewsAPI)
- Implement Sentiment Analysis Agent (Ingest → Preprocess → Classify → Summarize)
- Provide sentiment vs stock price plots
- Extend to multiple stocks (Tech & Fintech)


## Step 1: Install Dependencies
We install required packages for data retrieval, sentiment analysis, and visualization.

In [25]:
!pip install yfinance newsapi-python transformers torch scikit-learn matplotlib seaborn nltk --quiet

## Step 2: Import Libraries

In [26]:
# Install required packages (run once in Colab or local)
!pip install yfinance newsapi-python transformers plotly --quiet

# Imports
import os
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from newsapi import NewsApiClient
from transformers import pipeline
import plotly.express as px


# Step 3: Parameters/ Input

In [44]:
# -------------------------
# USER PARAMETERS
# -------------------------
tickers = ['AAPL','MSFT','GOOGL','AMZN','TSLA','PYPL']  # Add or remove companies dynamically
start_date = '2024-01-01'
end_date = '2025-09-30'
news_per_company = 20
sentiment_model_name = "distilbert-base-uncased-finetuned-sst-2-english"

# Initialize NewsAPI
newsapi = NewsApiClient(api_key='64262421a79e487b846b704dd1ee3dfd')

# Initialize sentiment pipeline
sentiment_pipeline = pipeline("sentiment-analysis", model=sentiment_model_name)


Device set to use cpu


## Step 4: Define the Agent Class


In [70]:
import os
import pandas as pd
import yfinance as yf
from newsapi import NewsApiClient
from transformers import pipeline

class StockSentimentAgent:
    def __init__(self, tickers, start, end, news_api, sentiment_pipeline):
        """
        tickers: list of stock tickers to analyze
        start, end: date strings (yyyy-mm-dd)
        news_api: initialized NewsAPI object
        sentiment_pipeline: HuggingFace pipeline for sentiment analysis
        """
        self.tickers = tickers
        self.start = start
        self.end = end
        self.news_api = news_api
        self.sentiment_pipeline = sentiment_pipeline
        self.results = {}

    # -----------------------------
    # Fetch stock data
    # -----------------------------
    def fetch_stock(self, ticker):
        # Ensure we keep standard column names
        df = yf.download(ticker, start=self.start, end=self.end, auto_adjust=False)
        df = df.reset_index()
        df.columns = ['date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
        return df

    # -----------------------------
    # Fetch news data
    # -----------------------------
    def fetch_news(self, query, page_size=20):
        try:
            articles = self.news_api.get_everything(
                q=query,
                language='en',
                sort_by='relevancy',
                page_size=page_size
            )
            df = pd.DataFrame([{
                'title': art['title'],
                'description': art['description'],
                'publishedAt': art['publishedAt']
            } for art in articles['articles']])
            return df
        except Exception as e:
            print(f"NewsAPI error for {query}: {e}")
            return pd.DataFrame(columns=['title','description','publishedAt'])

    # -----------------------------
    # Sentiment analysis
    # -----------------------------
    def analyze_sentiment(self, text):
        if text:
            result = self.sentiment_pipeline(text[:512])[0]
            return result['label'].upper(), result['score']
        return "NEUTRAL", 0

    # -----------------------------
    # Run analysis for all tickers
    # -----------------------------
    def run(self, news_per_company=20):
        import os
        os.makedirs("data", exist_ok=True)

        for ticker in self.tickers:
            print(f"\nProcessing {ticker}...")

            # -----------------------------
            # Fetch stock data
            # -----------------------------
            stock_df = self.fetch_stock(ticker)

            # Detect Close, Volume, and MarketCap columns
            stock_cols = stock_df.columns
            if 'Close' not in stock_cols:
                close_candidates = [c for c in stock_cols if 'Close' in c]
                if close_candidates:
                    stock_df = stock_df.rename(columns={close_candidates[0]: 'Close'})
                else:
                    print(f"No Close column found for {ticker}, skipping...")
                    continue

            if 'Volume' not in stock_cols:
                vol_candidates = [c for c in stock_cols if 'Volume' in c]
                if vol_candidates:
                    stock_df = stock_df.rename(columns={vol_candidates[0]: 'Volume'})
                else:
                    stock_df['Volume'] = 0

            # Add Market Cap from Yahoo Finance
            try:
                ticker_info = yf.Ticker(ticker).info
                market_cap = ticker_info.get('marketCap', 0)
                stock_df['MarketCap'] = market_cap
            except:
                stock_df['MarketCap'] = 0

            stock_df['date'] = pd.to_datetime(stock_df['date']).dt.date

            # -----------------------------
            # Fetch news data
            # -----------------------------
            news_df = self.fetch_news(ticker, page_size=news_per_company)
            print(f"{ticker} news count before filtering:", len(news_df))

            if news_df.empty:
                print(f"No news returned for {ticker}. Filling zeros.")
                combined = stock_df.copy()
                for col in ['POSITIVE','NEGATIVE','NEUTRAL','sentiment_score','POSITIVE_rolling','NEGATIVE_rolling']:
                    combined[col] = 0
            else:
                # Convert publishedAt to date
                news_df['publishedAt'] = pd.to_datetime(news_df['publishedAt']).dt.date

                # Sentiment analysis
                news_df[['sentiment','confidence']] = news_df['description'].apply(
                    lambda x: pd.Series(self.analyze_sentiment(x))
                )
                news_df['sentiment'] = news_df['sentiment'].str.upper()

                # -----------------------------
                # Daily sentiment aggregation
                # -----------------------------
                daily_sentiment = (
                    news_df.groupby(['publishedAt','sentiment']).size()
                    .unstack(fill_value=0)
                )

                # Convert counts to proportions
                daily_sentiment = daily_sentiment.div(daily_sentiment.sum(axis=1), axis=0)

                # Flatten columns to strings
                daily_sentiment.columns = [str(col) for col in daily_sentiment.columns]
                daily_sentiment = daily_sentiment.reset_index()

                # Ensure all expected columns exist
                for col in ['POSITIVE','NEGATIVE','NEUTRAL']:
                    if col not in daily_sentiment.columns:
                        daily_sentiment[col] = 0

                # Merge with full date range to avoid missing dates
                full_dates = pd.DataFrame({'publishedAt': pd.date_range(
                    stock_df['date'].min(), stock_df['date'].max()
                ).date})
                daily_sentiment = full_dates.merge(daily_sentiment, on='publishedAt', how='left')
                daily_sentiment[['POSITIVE','NEGATIVE','NEUTRAL']] = daily_sentiment[['POSITIVE','NEGATIVE','NEUTRAL']].fillna(0)

                # -----------------------------
                # Merge stock + sentiment safely
                # -----------------------------
                combined = pd.merge(
                    stock_df,
                    daily_sentiment,
                    left_on='date',
                    right_on='publishedAt',
                    how='left'
                )

                # -----------------------------
                # Sentiment score & rolling averages
                # -----------------------------
                combined['sentiment_score'] = combined['POSITIVE'] - combined['NEGATIVE']
                combined['POSITIVE_rolling'] = combined['POSITIVE'].rolling(5, min_periods=1).mean()
                combined['NEGATIVE_rolling'] = combined['NEGATIVE'].rolling(5, min_periods=1).mean()

            # -----------------------------
            # Save CSV & store result
            # -----------------------------
            combined.to_csv(f"data/{ticker}_sentiment.csv", index=False)
            self.results[ticker] = combined

        print("\nAnalysis complete for all tickers.")
        return self.results



# Step 5: Trigger the Agent

In [71]:
agent = StockSentimentAgent(
    tickers=tickers,
    start=start_date,
    end=end_date,
    news_api=newsapi,
    sentiment_pipeline=sentiment_pipeline
)

results = agent.run()


[*********************100%***********************]  1 of 1 completed


Processing AAPL...





AAPL news count before filtering: 20


[*********************100%***********************]  1 of 1 completed


Processing MSFT...





MSFT news count before filtering: 20


[*********************100%***********************]  1 of 1 completed


Processing GOOGL...





GOOGL news count before filtering: 20


[*********************100%***********************]  1 of 1 completed


Processing AMZN...





AMZN news count before filtering: 20


[*********************100%***********************]  1 of 1 completed


Processing TSLA...





TSLA news count before filtering: 20


[*********************100%***********************]  1 of 1 completed


Processing PYPL...





PYPL news count before filtering: 20

Analysis complete for all tickers.


## Step 6: Dynamic Plotting (Plotly)


In [72]:
import plotly.graph_objects as go

for ticker, combined in agent.results.items():
    # Detect Close column
    close_col_candidates = [c for c in combined.columns if 'Close' in c]
    if close_col_candidates:
        close_col = close_col_candidates[0]
    else:
        continue

    fig = go.Figure()

    # Stock Price line
    fig.add_trace(go.Scatter(
        x=combined['date'], y=combined[close_col],
        name='Stock Price', line=dict(color='blue', width=2), yaxis='y1'
    ))

    # Positive Sentiment rolling line
    if 'POSITIVE_rolling' in combined.columns:
        fig.add_trace(go.Scatter(
            x=combined['date'], y=combined['POSITIVE_rolling'],
            name='Positive Sentiment', line=dict(color='green', width=2, dash='dot'), yaxis='y2'
        ))

    # Negative Sentiment rolling line
    if 'NEGATIVE_rolling' in combined.columns:
        fig.add_trace(go.Scatter(
            x=combined['date'], y=combined['NEGATIVE_rolling'],
            name='Negative Sentiment', line=dict(color='red', width=2, dash='dot'), yaxis='y2'
        ))

    # Volume bars
    if 'Volume' in combined.columns:
        fig.add_trace(go.Bar(
            x=combined['date'], y=combined['Volume'],
            name='Volume', marker_color='lightgrey', opacity=0.5, yaxis='y3'
        ))

    # Market Cap line
    if 'MarketCap' in combined.columns:
        fig.add_trace(go.Scatter(
            x=combined['date'], y=combined['MarketCap'],
            name='Market Cap', line=dict(color='purple', width=2, dash='dash'), yaxis='y4'
        ))

    # Layout with corrected positions
    fig.update_layout(
        title=f"{ticker} - Price, Sentiment, Volume & Market Cap",
        xaxis=dict(title='Date'),
        yaxis=dict(title='Stock Price', side='left', position=0),
        yaxis2=dict(title='Sentiment Proportion', overlaying='y', side='right', position=0.95),
        yaxis3=dict(title='Volume', overlaying='y', side='right', position=0.9),
        yaxis4=dict(title='Market Cap', overlaying='y', side='right', position=0.85),
        legend=dict(x=0.02, y=0.98),
        template='plotly_white',
        width=1200, height=600,
        bargap=0.2
    )

    fig.show()


## Step 7: Summary Table
We create a summary showing average sentiment vs average returns.

In [73]:
# -----------------------------
# Summary Table
# -----------------------------
summary = []

for ticker, combined in results.items():
    # Detect Close column dynamically
    close_col_candidates = [c for c in combined.columns if 'Close' in c]
    if close_col_candidates:
        close_col = close_col_candidates[0]
    else:
        print(f"No Close column found for {ticker}, skipping summary.")
        continue

    # Avg daily return %
    avg_return = combined[close_col].pct_change().mean() * 100

    # Average sentiment proportions
    pos_sent = combined['POSITIVE'].mean()
    neg_sent = combined['NEGATIVE'].mean()
    neu_sent = combined['NEUTRAL'].mean()
    sentiment_score = combined['sentiment_score'].mean()
    pos_rolling = combined['POSITIVE_rolling'].mean()
    neg_rolling = combined['NEGATIVE_rolling'].mean()

    # Correlations with volume
    vol_corr_pos = combined['Volume'].corr(combined['POSITIVE'])
    vol_corr_neg = combined['Volume'].corr(combined['NEGATIVE'])

    # Correlation with market cap
    if 'MarketCap' in combined.columns:
        mc_corr_pos = combined['MarketCap'].corr(combined['POSITIVE'])
        mc_corr_neg = combined['MarketCap'].corr(combined['NEGATIVE'])
    else:
        mc_corr_pos = mc_corr_neg = None

    summary.append([
        ticker,
        avg_return,
        pos_sent,
        neg_sent,
        neu_sent,
        sentiment_score,
        pos_rolling,
        neg_rolling,
        vol_corr_pos,
        vol_corr_neg,
        mc_corr_pos,
        mc_corr_neg
    ])

# Create DataFrame
summary_df = pd.DataFrame(
    summary,
    columns=[
        'Ticker', 'Avg Daily Return (%)',
        'Avg Positive Sentiment', 'Avg Negative Sentiment', 'Avg Neutral Sentiment',
        'Avg Sentiment Score', 'POSITIVE Rolling Avg', 'NEGATIVE Rolling Avg',
        'Volume vs Positive Corr', 'Volume vs Negative Corr',
        'MarketCap vs Positive Corr', 'MarketCap vs Negative Corr'
    ]
)

summary_df



invalid value encountered in divide


invalid value encountered in divide



Unnamed: 0,Ticker,Avg Daily Return (%),Avg Positive Sentiment,Avg Negative Sentiment,Avg Neutral Sentiment,Avg Sentiment Score,POSITIVE Rolling Avg,NEGATIVE Rolling Avg,Volume vs Positive Corr,Volume vs Negative Corr,MarketCap vs Positive Corr,MarketCap vs Negative Corr
0,AAPL,0.088206,0.010297,0.019451,0.0,-0.009153,0.008924,0.018535,0.078594,0.028639,,
1,MSFT,0.084678,0.014874,0.008009,0.0,0.006865,0.013043,0.008009,-0.055632,-0.016435,,
2,GOOGL,0.149961,0.015637,0.007246,0.0,0.008391,0.012128,0.006636,-0.013723,0.170141,,
3,AMZN,0.10808,0.017925,0.004958,0.0,0.012967,0.013806,0.004958,-0.002131,0.026026,,
4,TSLA,0.203317,0.010297,0.008009,0.0,0.002288,0.010297,0.007094,0.025682,0.025919,,
5,PYPL,0.04769,0.009153,0.016018,0.0,-0.006865,0.007094,0.014874,0.090928,-0.006681,,


## Next Steps for Team Integration
- Member 2 can add Technical Analysis Agent (RSI, MACD, etc.)
- Member 3 can integrate with Portfolio Simulation and Coordinator Agent
- Evaluator–Optimizer loop can adjust classification thresholds
