In [None]:
# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
from src.data.loader import load_news_csv, load_stock_csv
from src.analysis.correlation import daily_average_sentiment, compute_sentiment_return_correlation
from src.indicators import compute_basic_indicators, compute_returns

## Load news dataset and preview

In [None]:
news = load_news_csv()
news.head(5)

## Basic EDA: Headline lengths and publisher counts

In [None]:
news['headline_len'] = news['headline'].astype(str).str.len()
news['headline_len'].describe()

In [None]:
publisher_counts = news['publisher'].value_counts().head(10)
publisher_counts.plot(kind='bar', title='Top 10 publishers');

## Time-series: Publication frequency over time
Plot daily counts of articles to find spikes.

In [None]:
news['date_dt'] = pd.to_datetime(news['date']).dt.floor('D')
daily_counts = news.groupby('date_dt').size()
ax = daily_counts.plot(title='Daily article counts', figsize=(12,4));

## Indicators & Sentiment Correlation (AAPL example)

In [None]:
symbol = 'AAPL'
stock = load_stock_csv(symbol)
stock_ind = compute_basic_indicators(stock)
stock_ind[['Close', 'SMA_20', 'EMA_20']].tail()

In [None]:
r, n = compute_sentiment_return_correlation(news, stock)
print(f'Pearson correlation (sentiment vs returns) for {symbol}: {r:.4f} (aligned days: {n})')