<a href="https://colab.research.google.com/github/ramindersinghusd/aai-520-in3-project/blob/main/StockSentimentAgent_MultiStock_Member1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Member 1: Data + Sentiment Agent (Multi-Stock Version)

This notebook implements my task as member of Group2:
- Build Data Retrieval Agent (Yahoo Finance + NewsAPI)
- Implement Sentiment Analysis Agent (Ingest → Preprocess → Classify → Summarize)
- Provide sentiment vs stock price plots
- Extend to multiple stocks (Tech & Fintech)


## Step 1: Install Dependencies
We install required packages for data retrieval, sentiment analysis, and visualization.

In [None]:
!pip install yfinance newsapi-python transformers torch scikit-learn matplotlib seaborn nltk --quiet

## Step 2: Import Libraries

In [None]:
# Install required packages (run once in Colab or local)
!pip install yfinance newsapi-python transformers plotly --quiet

# Imports
import os
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from newsapi import NewsApiClient
from transformers import pipeline
import plotly.express as px


# Step 3: Parameters/ Input

In [None]:
# -------------------------
# USER PARAMETERS
# -------------------------
tickers = ['AAPL','MSFT','GOOGL','AMZN','TSLA','PYPL']  # Add or remove companies dynamically
start_date = '2024-01-01'
end_date = '2024-12-31'
news_per_company = 20
sentiment_model_name = "distilbert-base-uncased-finetuned-sst-2-english"

# Initialize NewsAPI (replace 'YOUR_API_KEY' with your key)
newsapi = NewsApiClient(api_key='64262421a79e487b846b704dd1ee3dfd')

# Initialize sentiment pipeline
sentiment_pipeline = pipeline("sentiment-analysis", model=sentiment_model_name)


Device set to use cpu


## Step 4: Define the Agent Class


In [None]:
def run(self):
    import os
    os.makedirs("data", exist_ok=True)  # ensure folder exists

    for ticker in self.tickers:
        print(f"\nProcessing {ticker}...")

        # -----------------------------
        # Fetch stock data
        # -----------------------------
        stock_df = self.fetch_stock(ticker)
        stock_df['date'] = pd.to_datetime(stock_df['date']).dt.date  # convert to plain date

        # -----------------------------
        # Fetch news data
        # -----------------------------
        news_df = self.fetch_news(ticker, page_size=news_per_company)

        # --- DEBUG: Check if news returned ---
        print(f"{ticker} news count:", len(news_df))
        if not news_df.empty:
            display(news_df.head())
        else:
            print(f"No news returned for {ticker} in the given date range.")

        if news_df.empty:
            # No news, fill zeros
            combined = stock_df.copy()
            for col in ['POSITIVE','NEGATIVE','NEUTRAL']:
                combined[col] = 0
            combined['sentiment_score'] = 0
        else:
            # Convert publishedAt to plain date
            news_df['publishedAt'] = pd.to_datetime(news_df['publishedAt']).dt.date

            # -----------------------------
            # Sentiment analysis
            # -----------------------------
            news_df[['sentiment','confidence']] = news_df['description'].apply(
                lambda x: pd.Series(self.analyze_sentiment(x))
            )

            # --- DEBUG: Check sentiment results ---
            print(news_df[['description','sentiment','confidence']].head())

            # Normalize labels
            news_df['sentiment'] = news_df['sentiment'].str.upper()

            # -----------------------------
            # Daily sentiment proportions
            # -----------------------------
            daily_sentiment = (
                news_df.groupby('publishedAt')['sentiment']
                .value_counts(normalize=True)
                .unstack(fill_value=0)
                .reset_index()
            )

            # --- DEBUG: Check daily sentiment ---
            print(daily_sentiment.head())
            print("Columns:", daily_sentiment.columns)

            # Ensure sentiment columns exist
            for col in ['POSITIVE','NEGATIVE','NEUTRAL']:
                if col not in daily_sentiment.columns:
                    daily_sentiment[col] = 0

            # Fill missing dates in sentiment to match stock range
            full_dates = pd.DataFrame({'publishedAt': pd.date_range(stock_df['date'].min(),
                                                                    stock_df['date'].max()).date})
            daily_sentiment = full_dates.merge(daily_sentiment, on='publishedAt', how='left')
            daily_sentiment[['POSITIVE','NEGATIVE','NEUTRAL']] = daily_sentiment[['POSITIVE','NEGATIVE','NEUTRAL']].fillna(0)

            # -----------------------------
            # Merge stock + sentiment
            # -----------------------------
            combined = pd.merge(
                stock_df,
                daily_sentiment,
                left_on='date',
                right_on='publishedAt',
                how='left'
            )

            # --- DEBUG: Check merged sentiment values ---
            print(combined[['date','POSITIVE','NEGATIVE','NEUTRAL']].head(10))

            # -----------------------------
            # Compute sentiment score & rolling averages
            # -----------------------------
            combined['sentiment_score'] = combined['POSITIVE'] - combined['NEGATIVE']
            combined['POSITIVE_rolling'] = combined['POSITIVE'].rolling(5).mean()
            combined['NEGATIVE_rolling'] = combined['NEGATIVE'].rolling(5).mean()

        # -----------------------------
        # Save CSV & store result
        # -----------------------------
        combined.to_csv(f"data/{ticker}_sentiment.csv", index=False)
        self.results[ticker] = combined

    print("\nAnalysis complete for all tickers.")
    return self.results


In [None]:
agent = StockSentimentAgent(
    tickers=tickers,
    start=start_date,
    end=end_date,
    news_api=newsapi,
    sentiment_pipeline=sentiment_pipeline
)

results = agent.run()




YF.download() has changed argument auto_adjust default to True

[*********************100%***********************]  1 of 1 completed

Processing AAPL...




Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`


YF.download() has changed argument auto_adjust default to True

[*********************100%***********************]  1 of 1 completed


Saved data for AAPL to data/AAPL_sentiment.csv
Processing MSFT...



Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`


YF.download() has changed argument auto_adjust default to True

[*********************100%***********************]  1 of 1 completed


Saved data for MSFT to data/MSFT_sentiment.csv
Processing GOOGL...



Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`


YF.download() has changed argument auto_adjust default to True

[*********************100%***********************]  1 of 1 completed


Saved data for GOOGL to data/GOOGL_sentiment.csv
Processing AMZN...



Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`


YF.download() has changed argument auto_adjust default to True

[*********************100%***********************]  1 of 1 completed


Saved data for AMZN to data/AMZN_sentiment.csv
Processing TSLA...



Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`


YF.download() has changed argument auto_adjust default to True

[*********************100%***********************]  1 of 1 completed


Saved data for TSLA to data/TSLA_sentiment.csv
Processing PYPL...
Saved data for PYPL to data/PYPL_sentiment.csv



Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



## Step 5: Initialize & Run Agent

In [None]:
agent = StockSentimentAgent(
    tickers=tickers,
    start=start_date,
    end=end_date,
    news_api=newsapi,
    sentiment_pipeline=sentiment_pipeline
)

results = agent.run()



YF.download() has changed argument auto_adjust default to True

[*********************100%***********************]  1 of 1 completed

Processing AAPL...




Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`


YF.download() has changed argument auto_adjust default to True

[*********************100%***********************]  1 of 1 completed


Saved data for AAPL to data/AAPL_sentiment.csv
Processing MSFT...



Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`


YF.download() has changed argument auto_adjust default to True

[*********************100%***********************]  1 of 1 completed


Saved data for MSFT to data/MSFT_sentiment.csv
Processing GOOGL...



Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`


YF.download() has changed argument auto_adjust default to True

[*********************100%***********************]  1 of 1 completed


Saved data for GOOGL to data/GOOGL_sentiment.csv
Processing AMZN...



Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`


YF.download() has changed argument auto_adjust default to True

[*********************100%***********************]  1 of 1 completed


Saved data for AMZN to data/AMZN_sentiment.csv
Processing TSLA...



Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`


YF.download() has changed argument auto_adjust default to True

[*********************100%***********************]  1 of 1 completed


Saved data for TSLA to data/TSLA_sentiment.csv
Processing PYPL...
Saved data for PYPL to data/PYPL_sentiment.csv



Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



## Step 6: Dynamic Plotting (Plotly)


In [None]:
for ticker, combined in results.items():
    close_col_candidates = [c for c in combined.columns if 'Close' in c]
    if close_col_candidates:
        close_col = close_col_candidates[0]
    else:
        continue

    fig = px.line(
        combined,
        x='date',
        y=[close_col, 'POSITIVE', 'NEGATIVE'],
        labels={'value':'Price / Sentiment', 'date':'Date'},
        title=f"{ticker} - Stock Price vs Sentiment"
    )
    fig.show()


## Step 8: Summary Table
We create a summary showing average sentiment vs average returns.

In [None]:
summary = []

for ticker, combined in results.items():
    # Average daily stock return in %
    avg_return = combined['Close'].pct_change().mean() * 100

    # Average sentiment
    avg_sentiment_score = combined['sentiment_score'].mean()
    avg_positive_rolling = combined['POSITIVE_rolling'].mean()
    avg_negative_rolling = combined['NEGATIVE_rolling'].mean()

    summary.append([
        ticker,
        companies.get(ticker, ticker),
        round(avg_return, 3),
        round(avg_sentiment_score, 3),
        round(avg_positive_rolling, 3),
        round(avg_negative_rolling, 3)
    ])

# Create summary DataFrame
summary_df = pd.DataFrame(
    summary,
    columns=[
        'Ticker',
        'Company',
        'Avg Daily Return (%)',
        'Avg Sentiment Score',
        'Avg Positive Rolling (5d)',
        'Avg Negative Rolling (5d)'
    ]
)

summary_df


KeyError: 'Close'

## Next Steps for Team Integration
- Member 2 can add Technical Analysis Agent (RSI, MACD, etc.)
- Member 3 can integrate with Portfolio Simulation and Coordinator Agent
- Evaluator–Optimizer loop can adjust classification thresholds
