# Financial Sentiment Analysis Demo

This notebook demonstrates the complete workflow of the Financial Sentiment Analysis tool:
1. Data Collection from news, Twitter, and RSS feeds
2. Sentiment Analysis using multiple NLP models
3. Price Data Collection for Bitcoin and USD/NGN
4. Correlation Analysis between sentiment and prices
5. Visualization and Reporting

## Setup and Imports

In [None]:
import sys
from pathlib import Path
import warnings

# Add parent directory to path
sys.path.insert(0, str(Path.cwd().parent))

warnings.filterwarnings('ignore')

# Import required modules
from src.config.settings import Config
from src.data.collectors import NewsCollector, RSSFeedCollector, PriceCollector
from src.data.twitter_collector import TwitterCollector
from src.data.preprocessors import DataFramePreprocessor, TextPreprocessor
from src.sentiment.analyzers import EnsembleAnalyzer, ArticleSentimentAnalyzer
from src.models.correlation import CorrelationAnalyzer, PredictiveAnalyzer
from src.utils.visualization import SentimentVisualizer, PriceVisualizer, CorrelationVisualizer

import pandas as pd
import numpy as np
from datetime import datetime, timedelta

print("✓ Imports successful!")
print(f"Base directory: {Config.BASE_DIR}")

## 1. Data Collection

### 1.1 Collect News Articles

In [None]:
# Initialize news collector
news_collector = NewsCollector()

# Collect news about Bitcoin and Nigeria
print("Collecting news articles...")
news_articles = news_collector.collect_news(
    keywords=["Bitcoin", "Nigeria", "cryptocurrency"],
    from_date=datetime.now() - timedelta(days=3)
)

print(f"✓ Collected {len(news_articles)} news articles")

# Display sample article
if news_articles:
    print("\nSample article:")
    sample = news_articles[0]
    print(f"Title: {sample.get('title')}")
    print(f"Source: {sample.get('source')}")
    print(f"Published: {sample.get('published_at')}")

### 1.2 Collect RSS Feeds

In [None]:
# Initialize RSS collector
rss_collector = RSSFeedCollector()

# Collect from RSS feeds
print("Collecting from RSS feeds...")
rss_articles = rss_collector.collect_all_feeds(max_entries_per_feed=10)

print(f"✓ Collected {len(rss_articles)} RSS articles")

# Combine all articles
all_articles = news_articles + rss_articles
print(f"\nTotal articles: {len(all_articles)}")

### 1.3 Collect Price Data

In [None]:
# Initialize price collector
price_collector = PriceCollector()

# Get current prices
print("Fetching current prices...")
current_prices = price_collector.get_current_prices()
print("\nCurrent Prices:")
for symbol, price in current_prices.items():
    if symbol != 'timestamp':
        print(f"  {symbol}: ${price:,.2f}" if isinstance(price, (int, float)) else f"  {symbol}: {price}")

# Get historical prices
print("\nFetching historical prices (7 days)...")
price_df = price_collector.get_historical_prices(period='7d')
print(f"✓ Collected {len(price_df)} price records")
print(f"\nPrice data shape: {price_df.shape}")
print(price_df.tail())

## 2. Data Preprocessing

In [None]:
# Initialize preprocessor
preprocessor = DataFramePreprocessor()

# Convert articles to DataFrame
print("Preprocessing articles...")
articles_df = preprocessor.articles_to_dataframe(all_articles)

print(f"✓ Articles DataFrame shape: {articles_df.shape}")
print(f"\nColumns: {articles_df.columns.tolist()}")
print(f"\nFirst few articles:")
articles_df[['title', 'source', 'published_at']].head()

## 3. Sentiment Analysis

In [None]:
# Initialize sentiment analyzer
sentiment_analyzer = ArticleSentimentAnalyzer()

# Analyze sentiment
print("Analyzing sentiment...")
articles_with_sentiment = sentiment_analyzer.analyze_articles(all_articles)

# Convert to DataFrame
sentiment_df = preprocessor.articles_to_dataframe(articles_with_sentiment)

print(f"✓ Sentiment analysis complete!")
print(f"\nSentiment distribution:")
print(sentiment_df['sentiment_label'].value_counts())
print(f"\nAverage sentiment score: {sentiment_df['sentiment_score'].mean():.3f}")

# Display sample
print("\nSample sentiment analysis:")
sentiment_df[['title', 'sentiment_score', 'sentiment_label']].head(10)

## 4. Sentiment Visualizations

In [None]:
# Initialize visualizer
sent_viz = SentimentVisualizer()

# Plot sentiment distribution
print("Creating sentiment distribution plot...")
sent_viz.plot_sentiment_distribution(sentiment_df)

In [None]:
# Plot sentiment timeline
print("Creating sentiment timeline...")
sent_viz.plot_sentiment_timeline(sentiment_df, freq='6H')

## 5. Price Visualizations

In [None]:
# Initialize price visualizer
price_viz = PriceVisualizer()

# Plot price timeline
print("Creating price timeline...")
price_viz.plot_price_timeline(price_df)

## 6. Correlation Analysis

In [None]:
# Initialize correlation analyzer
corr_analyzer = CorrelationAnalyzer()

# Analyze correlation for Bitcoin
if 'BTC-USD' in price_df.columns:
    print("Analyzing Bitcoin sentiment-price correlation...")
    btc_corr = corr_analyzer.analyze_sentiment_price_correlation(
        sentiment_df, price_df, asset='BTC-USD'
    )
    
    print(f"\nBitcoin Correlation Results:")
    print(f"Samples: {btc_corr['samples']}")
    print(f"\nCorrelations by time horizon:")
    for period, corr in btc_corr.get('correlations', {}).items():
        pearson = corr['pearson']
        print(f"  {period}: r={pearson['correlation']:.3f}, p={pearson['p_value']:.4f}")
    
    if 'best_lag' in btc_corr:
        lag = btc_corr['best_lag']
        print(f"\nBest lag: {lag['lag']} hours")
        print(f"Correlation at best lag: {lag['correlation']:.3f}")
        print(f"P-value: {lag['p_value']:.4f}")

## 7. Correlation Visualizations

In [None]:
# Initialize correlation visualizer
corr_viz = CorrelationVisualizer()

# Plot sentiment vs price correlation
if 'BTC-USD' in price_df.columns:
    print("Creating sentiment-price correlation plot...")
    corr_viz.plot_sentiment_price_correlation(
        sentiment_df, price_df, price_col='BTC-USD'
    )

## 8. Predictive Analysis

In [None]:
# Initialize predictive analyzer
pred_analyzer = PredictiveAnalyzer()

# Calculate directional accuracy
if 'BTC-USD' in price_df.columns:
    print("Analyzing predictive accuracy...")
    accuracy = pred_analyzer.calculate_directional_accuracy(
        sentiment_df, price_df, asset='BTC-USD', lag=1
    )
    
    print(f"\nPredictive Accuracy Results:")
    print(f"Overall accuracy: {accuracy.get('overall_accuracy', 0):.2%}")
    print(f"Positive sentiment accuracy: {accuracy.get('positive_sentiment_accuracy', 0):.2%}")
    print(f"Negative sentiment accuracy: {accuracy.get('negative_sentiment_accuracy', 0):.2%}")
    print(f"Total samples: {accuracy.get('total_samples', 0)}")

## 9. Summary Report

In [None]:
# Generate summary report
report = {
    'timestamp': datetime.now().isoformat(),
    'data_collection': {
        'total_articles': len(all_articles),
        'news_articles': len(news_articles),
        'rss_articles': len(rss_articles),
        'price_records': len(price_df) if price_df is not None else 0
    },
    'sentiment_analysis': {
        'positive': int((sentiment_df['sentiment_label'] == 'positive').sum()),
        'negative': int((sentiment_df['sentiment_label'] == 'negative').sum()),
        'neutral': int((sentiment_df['sentiment_label'] == 'neutral').sum()),
        'avg_score': float(sentiment_df['sentiment_score'].mean()),
        'std_score': float(sentiment_df['sentiment_score'].std())
    },
    'correlation': btc_corr if 'BTC-USD' in price_df.columns else None,
    'predictive_accuracy': accuracy if 'BTC-USD' in price_df.columns else None
}

import json
print("\n" + "="*80)
print("SUMMARY REPORT")
print("="*80)
print(json.dumps(report, indent=2, default=str))

## 10. Save Results

In [None]:
# Save sentiment data
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

# Save to CSV
sentiment_csv = Config.PROCESSED_DATA_DIR / f'sentiment_{timestamp}.csv'
sentiment_df.to_csv(sentiment_csv, index=False)
print(f"✓ Sentiment data saved to {sentiment_csv}")

# Save report to JSON
report_json = Config.PROCESSED_DATA_DIR / f'report_{timestamp}.json'
with open(report_json, 'w') as f:
    json.dump(report, f, indent=2, default=str)
print(f"✓ Report saved to {report_json}")

print("\n✓ Analysis complete!")