In [None]:
import pandas as pd
from textblob import TextBlob

In [None]:
# Load news dataset
df = pd.read_csv("../data/raw_analyst_ratings.csv")

In [None]:
df.head()

In [None]:
# Data Preparation
# Normalize Dates without considering timezone
df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d", errors="coerce")

In [None]:
# Drop rows with NaT (Not a Time) values, if any
df = df.dropna(subset=["date"])

In [None]:
# Sentiment Analysis
def analyze_sentiment(text):
    analysis = TextBlob(text)
    return analysis.sentiment.polarity

In [None]:
df["Sentiment"] = df["headline"].apply(analyze_sentiment)

In [None]:
# Calculate Stock Movements
stock_counts = df["stock"].value_counts()

In [None]:
# Correlation Analysis
# Aggregate Sentiments
daily_sentiment = df.groupby("date")["Sentiment"].mean()

In [None]:
# Correlation Analysis
# We can analyze the correlation between sentiment and the occurrence of each stock symbol
correlation_results = {}
for stock_symbol in stock_counts.index:
    # Filter dataframe for each stock symbol
    stock_df = df[df["stock"] == stock_symbol]
    # Aggregate Sentiments for this stock
    daily_sentiment = stock_df.groupby("date")["Sentiment"].mean()
    # Calculate Correlation
    correlation = stock_counts[stock_symbol].corr(daily_sentiment)
    correlation_results[stock_symbol] = correlation

print("Correlation between daily news sentiment and stock symbols:")
for stock_symbol, correlation in correlation_results.items():
    print(f"{stock_symbol}: {correlation}")
    stock_symbol

In [None]:
print("Correlation between daily news sentiment and stock movements:", correlation)