In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import sys
sys.path.append('../scripts') # adjust the path based on actual location
from quantitative_analysis import StockAnalyzer


In [None]:
# Load your CSV

apple_price_data = pd.read_csv('../src/data/yfinance_data/AAPL_historical_data.csv')
# Create analyzer object

appl_analyzer = StockAnalyzer(apple_price_data )
# Prepare data
appl_analyzer.prepare_data()


In [None]:
appl_analyzer.add_technical_indicators()

In [None]:
appl_analyzer.plot_indicators(save_path="../images/quantitative/apple/rsi_14_plot.png")


In [None]:
appl_analyzer.plot_candlestick(
    save_path="../images/quantitative/apple/apple_stock_price_sma50.png"
)


In [None]:
appl_analyzer.plot_volume(save_path="../images/quantitative/apple/apple_volume.png")



Correlation analysis 

In [None]:
# Load sentiment CSV
sentiment_df = pd.read_csv("../src/sentiment_with_polarity.csv")

# Filter for a specific ticker
sentiment_df = sentiment_df[sentiment_df['stock'] == 'AAPL']

print(f"✅ Sentiment data cleaned. Rows remaining: {len(sentiment_df)}")


In [None]:
# Get stock price DataFrame from your StockAnalyzer
stock_df = appl_analyzer.df.copy()

# If 'Date' is a column
if 'Date' in stock_df.columns:
    print("📌 'Date' column dtype:", stock_df['Date'].dtype)
else:
    print("🕓 'Date' is not a column. Checking index...")
    print("📌 Index dtype:", stock_df.index.dtype)

# Check if all datetime values have time set to midnight
all_normalized = (stock_df.index.time == pd.to_datetime("00:00:00").time())

if all_normalized.all():
    print("✅ All dates are normalized (00:00:00).")
else:
    print("⚠️ Some dates are not normalized. Example non-normalized rows:")
    print(stock_df[~all_normalized].head())



In [None]:
# Filter for AAPL sentiment only (or other ticker)
sentiment_aapl =sentiment_df.df.copy()
sentiment_aapl = sentiment_aapl[sentiment_aapl['stock'] == 'AAPL']

# 2. Group by date to get average sentiment polarity
daily_sentiment = sentiment_aapl.groupby('date')['polarity'].mean().to_frame()

# 3. Load stock price data (from your analyzer)
stock_df = appl_analyzer.df.copy()

# Ensure index is datetime and normalized (you already did this)
stock_df.index = pd.to_datetime(stock_df.index)
stock_df.index = stock_df.index.normalize()

# 4. Calculate daily stock return
stock_df['return'] = stock_df['Close'].pct_change() * 100
daily_returns = stock_df[['return']]

# 5. Merge sentiment and returns on date
merged_df = pd.merge(daily_sentiment, daily_returns, left_index=True, right_index=True, how='inner')

# 6. Calculate correlation
correlation = merged_df['polarity'].corr(merged_df['return'])

# 7. Display results
print(f"🧮 Merged rows: {len(merged_df)}")
print("\n📄 Sample merged data:\n", merged_df.head())
print(f"\n📊 Correlation between AAPL news sentiment and stock return: {correlation:.4f}")

AttributeError: 'DataFrame' object has no attribute 'df'

In [None]:
# Compute correlation matrix
corr_matrix = merged_df.corr()

# Plot heatmap
plt.figure(figsize=(6, 4))
sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Heatmap: Sentiment vs Return")
plt.tight_layout()
plt.show()