In [1]:
import pandas as pd
from textblob import TextBlob
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [4]:
# ========== PATHS ==========
news_path = "../data/raw_analyst_ratings.csv"
stock_folder = "./"
plot_folder = "plots/task3"
output_folder = "outputs/task3"

os.makedirs(plot_folder, exist_ok=True)
os.makedirs(output_folder, exist_ok=True)

In [5]:
# ========== SENTIMENT ANALYSIS ==========
news_df = pd.read_csv(news_path, parse_dates=["date"])
news_df['date'] = pd.to_datetime(
    news_df['date'], format='mixed', errors='coerce', utc=True)
news_df['date'] = news_df['date'].dt.date
news_df.dropna(subset=["headline"], inplace=True)


def get_sentiment(text):
    return TextBlob(str(text)).sentiment.polarity


news_df['sentiment'] = news_df['headline'].apply(get_sentiment)
daily_sentiment = news_df.groupby("date")["sentiment"].mean().reset_index()
daily_sentiment.columns = ["date", "avg_sentiment"]

In [6]:
# ========== TICKERS TO PROCESS ==========
ticker_files = {
    "AAPL": "../data/AAPL_historical_data.csv",
    "AMZN": "../data/AMZN_historical_data.csv",
    "GOOG": "../data/GOOG_historical_data.csv",
    "META": "../data/META_historical_data.csv",
    "MSFT": "../data/MSFT_historical_data.csv",
    "NVDA": "../data/NVDA_historical_data.csv",
    "TSLA": "../data/TSLA_historical_data.csv"
}

results = []

In [7]:
# ========== PROCESS EACH TICKER ==========
for ticker, file in ticker_files.items():
    print(f"📈 Processing {ticker}...")

    df = pd.read_csv(os.path.join(stock_folder, file), parse_dates=["Date"])
    df["date"] = df["Date"].dt.date
    df.sort_values("date", inplace=True)
    df["daily_return"] = df["Close"].pct_change()

    # Merge with sentiment
    merged = pd.merge(
        daily_sentiment, df[["date", "daily_return"]], on="date", how="inner").dropna()
    merged.to_csv(
        f"{output_folder}/{ticker}_sentiment_vs_return.csv", index=False)

    if not merged.empty:
        corr = merged["avg_sentiment"].corr(merged["daily_return"])
        results.append({"Ticker": ticker, "Correlation": corr})

        # Plot: Sentiment vs Return
        fig, ax1 = plt.subplots(figsize=(12, 5))
        ax1.plot(merged["date"], merged["daily_return"],
                 color="tab:blue", label="Daily Return")
        ax1.set_ylabel("Daily Return", color="tab:blue")
        ax1.tick_params(axis='y', labelcolor='tab:blue')

        ax2 = ax1.twinx()
        ax2.plot(merged["date"], merged["avg_sentiment"],
                 color="tab:orange", label="Sentiment", alpha=0.6)
        ax2.set_ylabel("Avg Sentiment", color="tab:orange")
        ax2.tick_params(axis='y', labelcolor='tab:orange')

        plt.title(f"{ticker} – Sentiment vs Daily Return")
        fig.tight_layout()
        plt.savefig(f"{plot_folder}/{ticker}_sentiment_vs_return.png")
        plt.close()

📈 Processing AAPL...
📈 Processing AMZN...
📈 Processing GOOG...
📈 Processing META...
📈 Processing MSFT...
📈 Processing NVDA...
📈 Processing TSLA...


In [8]:
# ========== SUMMARY CHART ==========
summary_df = pd.DataFrame(results)
summary_df.to_csv(
    f"{output_folder}/sentiment_return_correlation_summary.csv", index=False)

In [9]:
# Barplot
plt.figure(figsize=(10, 6))
sns.barplot(data=summary_df, x="Ticker", y="Correlation", palette="coolwarm")
plt.axhline(0, color="black", linestyle="--")
plt.title("Correlation Between Daily Sentiment & Stock Return")
plt.ylabel("Pearson Correlation")
plt.tight_layout()
plt.savefig(f"{plot_folder}/correlation_barplot.png")
plt.close()

print("✅ Task 3 completed. Plots saved to 'plots/task3/' and results to 'outputs/task3/'")


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=summary_df, x="Ticker", y="Correlation", palette="coolwarm")


✅ Task 3 completed. Plots saved to 'plots/task3/' and results to 'outputs/task3/'
