In [81]:
# -----------------------------
# Cell 1: Setup paths
# -----------------------------
import sys
from pathlib import Path

# Notebook location
notebook_path = Path().resolve()  # current notebook folder
project_root = notebook_path.parent  # Notebook/ -> project root
src_path = project_root / "src"

# Add to Python path
sys.path.insert(0, str(src_path))
sys.path.insert(0, str(project_root))

print("Notebook path:", notebook_path)
print("Project root:", project_root)
print("SRC path:", src_path)

Notebook path: D:\10Acadamy\Week 1\Task\Predicting-Price-Moves-with-News-Sentiment\notebooks
Project root: D:\10Acadamy\Week 1\Task\Predicting-Price-Moves-with-News-Sentiment
SRC path: D:\10Acadamy\Week 1\Task\Predicting-Price-Moves-with-News-Sentiment\src


In [82]:
# -----------------------------
# Cell 2: Imports
# -----------------------------
import logging
import pandas as pd

from fns_project.config import ConfigLoader
from fns_project.logging_config import setup_logger

# Data handling
from fns_project.data.loader import load_news_csv, load_price_csv
from fns_project.data.preprocess import clean_text, preprocess_headlines, add_headline_metrics
from fns_project.utils.notebook_bootstrap import cfg, logger, RAW_DIR, PROCESSED_DIR, FEATURE_DIR, PLOTS_DIR, ASSETS, START_DATE, END_DATE
from fns_project.data.align_dates import align_news_to_trading_days
# Feature engineering
from fns_project.features.sentiment_features import (
    aggregate_daily_sentiment,
    add_rolling_sentiment,
    add_lagged_sentiment,
)


# Analysis
from fns_project.analysis.returns import compute_daily_returns
from fns_project.analysis.correlation import correlation_with_returns
from fns_project.analysis.sentiment import add_sentiment_columns


# Visualization
from fns_project.viz.plots import (
    plot_daily_sentiment,
    plot_price_with_indicators,
    plot_lagged_correlation,
    plot_returns_vs_sentiment
)

# Utilities
from fns_project.utils.dates import ensure_tz_naive, normalize_dates

print("All modules imported successfully")

All modules imported successfully


In [61]:
# -----------------------------
# Cell 3: Load input data
# -----------------------------
RAW_DIR = (project_root / "data/raw").resolve()
news_file = RAW_DIR / "news" / "raw_analyst_ratings.csv"
price_file = RAW_DIR / "price" / f"{ASSETS[0]}.csv"

logger.info(f"Using news file: {news_file}")
logger.info(f"Using price file: {price_file}")

news_df = load_news_csv(news_file)
price_df = load_price_csv(price_file)

logger.info(f"Loaded news: {len(news_df)} rows")
logger.info(f"Loaded prices: {len(price_df)} rows")

2025-11-25 22:05:34,784 INFO Using news file: D:\10Acadamy\Week 1\Task\Predicting-Price-Moves-with-News-Sentiment\data\raw\news\raw_analyst_ratings.csv
2025-11-25 22:05:34,784 [INFO] notebook: Using news file: D:\10Acadamy\Week 1\Task\Predicting-Price-Moves-with-News-Sentiment\data\raw\news\raw_analyst_ratings.csv
2025-11-25 22:05:34,788 INFO Using price file: D:\10Acadamy\Week 1\Task\Predicting-Price-Moves-with-News-Sentiment\data\raw\price\AAPL.csv
2025-11-25 22:05:34,788 [INFO] notebook: Using price file: D:\10Acadamy\Week 1\Task\Predicting-Price-Moves-with-News-Sentiment\data\raw\price\AAPL.csv
2025-11-25 22:05:52,944 INFO Loaded news: 55987 rows from D:\10Acadamy\Week 1\Task\Predicting-Price-Moves-with-News-Sentiment\data\raw\news\raw_analyst_ratings.csv
2025-11-25 22:05:52,944 [INFO] fns_project.data.loader: Loaded news: 55987 rows from D:\10Acadamy\Week 1\Task\Predicting-Price-Moves-with-News-Sentiment\data\raw\news\raw_analyst_ratings.csv
2025-11-25 22:05:53,495 INFO Loaded pri

In [62]:
# Cell 4: Preprocess news
# -----------------------------
# 1) Clean headlines
news_clean = preprocess_headlines(news_df, text_col="headline")

# 2) Add metrics
news_clean = add_headline_metrics(news_clean)

logger.info("News preprocessing complete")
news_clean.head()

2025-11-25 22:06:08,099 INFO Added headline metrics to DataFrame (rows=55987)
2025-11-25 22:06:08,099 [INFO] fns_project.data.preprocess: Added headline metrics to DataFrame (rows=55987)
2025-11-25 22:06:08,109 INFO News preprocessing complete
2025-11-25 22:06:08,109 [INFO] notebook: News preprocessing complete


Unnamed: 0.1,Unnamed: 0,headline,url,publisher,date,stock,headline_len_chars,headline_word_count,headline_char_per_word
0,357064,etfs watch april 28 2011 dgp ieo prn idx,https://www.benzinga.com/etfs/bond-etfs/11/04/...,ETF Professor,2011-04-27 21:01:48-04:00,DGP,40,9,4.444444
1,437774,new regional emerging markets etf,https://www.benzinga.com/etfs/emerging-market-...,Sam Subramanian,2011-04-28 13:49:29-04:00,ESR,33,5,6.6
2,350150,dejour extends credit facility october 31st fa...,https://www.benzinga.com/news/11/04/1041764/de...,Theo Kratz,2011-04-28 15:00:36-04:00,DEJ,64,11,5.818182
3,49280,etf showdown crouching tigers hidden etfs,https://www.benzinga.com/etfs/emerging-market-...,ETF Professor,2011-04-29 13:47:06-04:00,AIA,41,6,6.833333
4,539428,gdl fund redeem outstanding 8 50 series cumula...,https://www.benzinga.com/news/11/04/1045464/th...,Benzinga Staff,2011-04-29 16:11:05-04:00,GDL,76,11,6.909091


In [80]:
# -----------------------------
# Cell 5: Align news & compute sentiment
# -----------------------------

# 1️⃣ Make date tz-naive
news_clean = ensure_tz_naive(news_clean, date_col="date")

# 2️⃣ Preprocess headlines if needed
news_clean = preprocess_headlines(news_clean, text_col="headline")

# 3️⃣ Add sentiment columns (this creates 'sentiment_ensemble' or similar)
news_clean = add_sentiment_columns(news_clean, text_col="headline")

# 4️⃣ Align news timestamps to trading days
news_aligned = align_news_to_trading_days(news_clean, price_df)

# 5️⃣ Aggregate daily sentiment
daily_sentiment = aggregate_daily_sentiment(
    news_aligned,
    date_col="date",
    sentiment_col="sentiment_ensemble"
)

# 6️⃣ Add rolling sentiment features
daily_sentiment = add_rolling_sentiment(
    daily_sentiment, window_sizes=[3, 5, 7])

# 7️⃣ Add lagged sentiment features
daily_sentiment = add_lagged_sentiment(daily_sentiment, lags=[1, 2, 3])

logger.info("Daily sentiment features computed")
daily_sentiment.head()

TypeError: add_sentiment_columns() got an unexpected keyword argument 'text_col'

In [None]:
# -----------------------------
# Daily stock returns
# -----------------------------
daily_returns = compute_daily_returns(price_df)

logger.info("Daily returns computed.")
daily_returns.head()

In [None]:
# -----------------------------
# Sentiment ↔ Returns correlation
# -----------------------------
corr_df = correlation_with_returns(
    daily_sentiment=daily_sentiment,
    daily_returns=daily_returns,
    max_lag=5,
    sentiment_col="sentiment_mean",
    returns_col="daily_return"
)

logger.info("Correlation table computed.")
corr_df

In [None]:
# -----------------------------
# Visualization: Lagged correlation
# -----------------------------
fig = plot_correlation_bars(corr_df)
fig.savefig(PLOTS_DIR / "correlation_lags.png", dpi=150)

fig

In [None]:
# -----------------------------
# Visualization: Price + sentiment
# -----------------------------
fig = plot_sentiment_vs_price(
    price_df=price_df,
    sentiment_df=daily_sentiment,
    price_col="close",
    sentiment_col="sentiment_mean"
)

fig.savefig(PLOTS_DIR / "sentiment_vs_price.png", dpi=150)
fig

In [None]:
fig = plot_rolling_sentiment(
    daily_sentiment,
    base_col="sentiment_mean",
    windows=[3, 7, 14]
)

fig.savefig(PLOTS_DIR / "rolling_sentiment.png", dpi=150)
fig