# FOMC Sentiment Analysis and Market Impact

This notebook analyzes the relationship between Federal Open Market Committee (FOMC) communication tone and U.S. financial market movements.

## Research Question
How does the hawkish or dovish tone of FOMC communications affect major U.S. financial markets?

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

# Add src to path
import sys
import os
sys.path.insert(0, os.path.join(os.getcwd(), '..', 'src'))

## 1. Load Data

In [None]:
# Load sentiment data
sentiment_df = pd.read_csv('../data/processed/fomc_sentiment.csv')
sentiment_df['date'] = pd.to_datetime(sentiment_df['date'])

print(f"Loaded {len(sentiment_df)} FOMC documents")
print(f"Date range: {sentiment_df['date'].min()} to {sentiment_df['date'].max()}")
sentiment_df.head()

## 2. Sentiment Analysis Overview

In [None]:
# Sentiment distribution
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Tone distribution
sentiment_df['tone'].value_counts().plot(kind='bar', ax=axes[0], color=['#d62728', '#1f77b4', '#7f7f7f'])
axes[0].set_title('FOMC Communication Tone Distribution', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Tone')
axes[0].set_ylabel('Count')
axes[0].tick_params(axis='x', rotation=0)

# Sentiment score distribution
axes[1].hist(sentiment_df['weighted_sentiment'], bins=20, edgecolor='black', alpha=0.7)
axes[1].axvline(0, color='red', linestyle='--', linewidth=2, label='Neutral')
axes[1].set_title('Weighted Sentiment Score Distribution', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Weighted Sentiment (Hawkish > 0, Dovish < 0)')
axes[1].set_ylabel('Frequency')
axes[1].legend()

plt.tight_layout()
plt.show()

print(f"\nSentiment Statistics:")
print(f"Mean sentiment: {sentiment_df['weighted_sentiment'].mean():.3f}")
print(f"Std deviation: {sentiment_df['weighted_sentiment'].std():.3f}")
print(f"Min: {sentiment_df['weighted_sentiment'].min():.3f}")
print(f"Max: {sentiment_df['weighted_sentiment'].max():.3f}")

## 3. Sentiment Over Time

In [None]:
# Plot sentiment over time
plt.figure(figsize=(14, 6))

# Color points by tone
colors = {'hawkish': '#d62728', 'dovish': '#1f77b4', 'neutral': '#7f7f7f'}
for tone in sentiment_df['tone'].unique():
    mask = sentiment_df['tone'] == tone
    plt.scatter(sentiment_df[mask]['date'], 
               sentiment_df[mask]['weighted_sentiment'],
               c=colors.get(tone, 'gray'), 
               label=tone.capitalize(), 
               s=100, 
               alpha=0.6)

# Add trend line
z = np.polyfit(sentiment_df['date'].astype(np.int64) // 10**9, 
               sentiment_df['weighted_sentiment'], 1)
p = np.poly1d(z)
plt.plot(sentiment_df['date'], 
         p(sentiment_df['date'].astype(np.int64) // 10**9), 
         "--", color='black', linewidth=2, label='Trend')

plt.axhline(y=0, color='gray', linestyle='-', linewidth=1, alpha=0.5)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Weighted Sentiment Score', fontsize=12)
plt.title('FOMC Communication Sentiment Over Time', fontsize=14, fontweight='bold')
plt.legend(loc='best')
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

## 4. Sentiment by Document Type

In [None]:
# Box plot by document type
plt.figure(figsize=(10, 6))
sentiment_df.boxplot(column='weighted_sentiment', by='type', ax=plt.gca())
plt.suptitle('')
plt.title('Sentiment Distribution by Document Type', fontsize=14, fontweight='bold')
plt.xlabel('Document Type')
plt.ylabel('Weighted Sentiment Score')
plt.axhline(y=0, color='red', linestyle='--', linewidth=1)
plt.tight_layout()
plt.show()

print("\nAverage Sentiment by Document Type:")
print(sentiment_df.groupby('type')['weighted_sentiment'].agg(['mean', 'std', 'count']))

## 5. Load Correlation Results

In [None]:
# Load correlation results if available
try:
    corr_results = pd.read_csv('../results/correlation_results.csv')
    summary_stats = pd.read_csv('../results/summary_statistics.csv')
    
    print("Summary Statistics:")
    print(summary_stats.T)
    
    print("\n" + "="*80)
    print("\nCorrelation Results Preview:")
    print(corr_results.head(10))
except FileNotFoundError:
    print("Correlation results not found. Run the main analysis pipeline first.")
    print("Command: python ../src/main.py")

## 6. Visualize Correlations

In [None]:
try:
    # Filter for 5-day window
    corr_5d = corr_results[corr_results['window_days'] == 5].copy()
    
    # Sort by correlation strength
    corr_5d = corr_5d.sort_values('post_pearson_r', ascending=True)
    
    # Plot
    fig, ax = plt.subplots(figsize=(12, 8))
    
    colors = ['#d62728' if x > 0 else '#1f77b4' for x in corr_5d['post_pearson_r']]
    bars = ax.barh(corr_5d['asset'], corr_5d['post_pearson_r'], color=colors, alpha=0.7)
    
    ax.axvline(x=0, color='black', linewidth=1)
    ax.set_xlabel('Pearson Correlation Coefficient', fontsize=12)
    ax.set_ylabel('Asset', fontsize=12)
    ax.set_title('Correlation: FOMC Sentiment vs. 5-Day Post-Event Returns', 
                 fontsize=14, fontweight='bold')
    
    # Add significance markers
    for i, (idx, row) in enumerate(corr_5d.iterrows()):
        if row['post_pearson_p'] < 0.05:
            ax.text(row['post_pearson_r'], i, ' *', 
                   va='center', fontsize=16, fontweight='bold')
    
    ax.text(0.02, 0.98, '* p < 0.05', transform=ax.transAxes, 
           va='top', fontsize=10, bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
    
    plt.tight_layout()
    plt.show()
    
    print("\nSignificant Correlations (p < 0.05):")
    sig_corr = corr_5d[corr_5d['post_pearson_p'] < 0.05]
    print(sig_corr[['asset', 'post_pearson_r', 'post_pearson_p', 'n_observations']])
    
except:
    print("Unable to visualize correlations. Ensure correlation analysis has been run.")

## 7. Key Findings and Interpretation

In [None]:
print("="*80)
print("KEY FINDINGS")
print("="*80)

print("\n1. FOMC Communication Tone:")
print(f"   - Analysis covers {len(sentiment_df)} FOMC documents since October 2020")
print(f"   - Document types: {sentiment_df['type'].unique().tolist()}")
tone_counts = sentiment_df['tone'].value_counts()
for tone, count in tone_counts.items():
    print(f"   - {tone.capitalize()}: {count} documents ({count/len(sentiment_df)*100:.1f}%)")

print("\n2. Sentiment Methodology:")
print("   - Keyword-based analysis using hawkish/dovish dictionaries")
print("   - Weighted by document type (Statements: 1.5x, Minutes: 1.2x, Speeches: 1.0x)")
print("   - Positive scores indicate hawkish tone, negative scores indicate dovish tone")

print("\n3. Market Impact Analysis:")
print("   - Event-window methodology: analyzing returns before, during, and after FOMC events")
print("   - Multiple time windows: 1-day, 2-day, and 5-day")
print("   - Assets tracked: Stock indices, Treasury yields, Dollar index, Volatility, Commodities")

try:
    print("\n4. Correlation Results:")
    sig_corr = corr_5d[corr_5d['post_pearson_p'] < 0.05]
    if len(sig_corr) > 0:
        print(f"   - {len(sig_corr)} assets show statistically significant correlations (p < 0.05)")
        strongest = sig_corr.iloc[sig_corr['post_pearson_r'].abs().argmax()]
        print(f"   - Strongest correlation: {strongest['asset']} (r = {strongest['post_pearson_r']:.3f})")
    else:
        print("   - No statistically significant correlations found at p < 0.05 level")
except:
    print("   - Run correlation analysis to see detailed results")

print("\n5. Interpretation:")
print("   - Hawkish communications (positive sentiment) typically signal tighter monetary policy")
print("   - Dovish communications (negative sentiment) typically signal looser monetary policy")
print("   - Market reactions may vary based on expectations and economic context")
print("   - Significant correlations suggest FOMC tone has predictive power for asset returns")

print("\n" + "="*80)

## 8. Export Results

In [None]:
# Create a summary report
report = f"""
FOMC SENTIMENT ANALYSIS - SUMMARY REPORT
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

DATASET OVERVIEW:
- Total Documents: {len(sentiment_df)}
- Date Range: {sentiment_df['date'].min()} to {sentiment_df['date'].max()}
- Hawkish: {len(sentiment_df[sentiment_df['tone'] == 'hawkish'])}
- Dovish: {len(sentiment_df[sentiment_df['tone'] == 'dovish'])}
- Neutral: {len(sentiment_df[sentiment_df['tone'] == 'neutral'])}

SENTIMENT STATISTICS:
- Mean Weighted Sentiment: {sentiment_df['weighted_sentiment'].mean():.3f}
- Standard Deviation: {sentiment_df['weighted_sentiment'].std():.3f}
- Range: [{sentiment_df['weighted_sentiment'].min():.3f}, {sentiment_df['weighted_sentiment'].max():.3f}]

METHODOLOGY:
This analysis uses textual analysis to quantify hawkish/dovish tone in FOMC communications
and tests whether these tone shifts correlate with U.S. financial market movements.
"""

print(report)

# Save report
with open('../results/analysis_report.txt', 'w') as f:
    f.write(report)
    
print("\nReport saved to: ../results/analysis_report.txt")