In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import sys
sys.path.append('../scripts') # adjust the path based on actual location
from quantitative_analysis import StockAnalyzer


In [None]:
# Load your CSV

msf_price_data = pd.read_csv('../src/data/yfinance_data/MSFT_historical_data.csv')
# Create analyzer object

msf_analyzer = StockAnalyzer(msf_price_data)
# Prepare data
msf_analyzer .prepare_data()


In [None]:
msf_analyzer.add_technical_indicators()

In [None]:
msf_analyzer.plot_indicators()

In [None]:
msf_analyzer.plot_candlestick(
   
)

In [None]:
msf_analyzer.plot_volume()

Correlation analysis 

In [None]:
# Load sentiment CSV
sentiment_df = pd.read_csv("../src/sentiment_with_polarity.csv")

# Filter for a specific ticker
sentiment_df = sentiment_df[sentiment_df['stock'] == 'MSF']

print(f"✅ Sentiment data cleaned. Rows remaining: {len(sentiment_df)}")


In [None]:
# Get stock price DataFrame from your StockAnalyzer
stock_df = msf_analyzer.df.copy()

# If 'Date' is a column
if 'Date' in stock_df.columns:
    print("📌 'Date' column dtype:", stock_df['Date'].dtype)
else:
    print("🕓 'Date' is not a column. Checking index...")
    print("📌 Index dtype:", stock_df.index.dtype)

# Check if all datetime values have time set to midnight
all_normalized = (stock_df.index.time == pd.to_datetime("00:00:00").time())

if all_normalized.all():
    print("✅ All dates are normalized (00:00:00).")
else:
    print("⚠️ Some dates are not normalized. Example non-normalized rows:")
    print(stock_df[~all_normalized].head())



In [None]:
# 1. Filter sentiment
sentiment_df = sentiment_df[sentiment_df['stock'] == 'MSF']
daily_sentiment = sentiment_df.groupby('date')['polarity'].mean().to_frame()

# 2. Load and filter stock data
stock_df = msf_analyzer.df.copy()
stock_df['return'] = stock_df['Close'].pct_change() * 100
daily_returns = stock_df[['return']]

# 3. Normalize both indexes
daily_sentiment.index = pd.to_datetime(daily_sentiment.index).normalize()
daily_returns.index = pd.to_datetime(daily_returns.index).normalize()

# 4. Filter stock data to match sentiment range
start_date = daily_sentiment.index.min()
end_date = daily_sentiment.index.max()
daily_returns = daily_returns.loc[start_date:end_date]

# 5. Merge and correlate
merged_df = pd.merge(daily_sentiment, daily_returns, left_index=True, right_index=True, how='inner')
correlation = merged_df['polarity'].corr(merged_df['return'])

# 6. Output
print("📅 Sentiment dates:", start_date, "→", end_date)
print(f"🧮 Merged rows: {len(merged_df)}")
print("\n📄 Sample merged data:\n", merged_df.head())
print(f"\n📊 Correlation between AAPL sentiment and return: {correlation:.4f}")


In [None]:
# Compute correlation matrix
corr_matrix = merged_df.corr()

# Plot heatmap
plt.figure(figsize=(6, 4))
sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Heatmap: Sentiment vs Return")
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(merged_df['polarity'], merged_df['return'], alpha=0.7)
plt.title('Sentiment Polarity vs Stock Return (AAPL)', fontsize=14)
plt.xlabel('Sentiment Polarity')
plt.ylabel('Daily Stock Return (%)')
plt.grid(True)
plt.tight_layout()
plt.show()