In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid")

In [2]:
# Cell 2: Load and preprocess data
# Adjust path if needed
df = pd.read_csv("multi_ticker_earnings_dataset.csv", parse_dates=["Date"])
# Create time-based columns
df['Year'] = df['Date'].dt.year
df['Quarter'] = df['Date'].dt.to_period('Q')
df['YearMonth'] = df['Date'].dt.to_period('M')
# Set Date as index for resampling
df.set_index('Date', inplace=True)

In [None]:
# Cell 3: Monthly mean time series for key features
features = ["Return", "Volatility", "RSI", "Surprise_%"]
monthly_means = df[features].resample('M').mean()
plt.figure(figsize=(12, 8))
for feat in features:
    sns.lineplot(data=monthly_means, x=monthly_means.index, y=feat, label=feat)
plt.title("Monthly Mean of Features Over Time")
plt.xlabel("Date")
plt.ylabel("Mean Value")
plt.legend()
plt.show()

In [None]:
# Cell 4: Distribution of Volatility by Year
plt.figure(figsize=(12, 8))
sns.boxplot(data=df.reset_index(), x='Year', y='Volatility', palette='Blues')
plt.title("Distribution of Volatility by Year")
plt.xlabel("Year")
plt.ylabel("Volatility")
plt.xticks(rotation=45)
plt.show()

In [None]:
# Cell 5: Distribution of RSI by Quarter
plt.figure(figsize=(12, 8))
sns.violinplot(data=df.reset_index(), x='Quarter', y='RSI', palette='Greens')
plt.title("Distribution of RSI by Quarter")
plt.xlabel("Quarter")
plt.ylabel("RSI")
plt.xticks(rotation=45)
plt.show()

In [None]:

# Cell 6: Earnings Surprise % Distribution by Year
plt.figure(figsize=(12, 8))
sns.boxplot(data=df.reset_index(), x='Year', y='Surprise_%', palette='Oranges')
plt.title("Earnings Surprise % Distribution by Year")
plt.xlabel("Year")
plt.ylabel("Surprise %")
plt.xticks(rotation=45)
plt.show()

In [None]:
# Cell 7: Correlation Heatmap Over Entire Period
corr = df[features + ['Momentum3', 'ATR14']].corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt='.2f')
plt.title("Feature Correlation Matrix")
plt.show()