# Bitcoin Sentiment Analysis
This notebook performs real-time Bitcoin sentiment analysis using news data and price information.

## Setup and Dependencies

## Initialize API Clients

In [None]:
from src.common import NEWS_API_KEY, RELEVANT_SOURCES, newsapi, coingecko

In [3]:
from src.fetch_data import fetch_bitcoin_news, fetch_bitcoin_prices
from src.data_saver import save_data, load_data, format_sentiment_score#, get_sentiment_color 



from src.utils_analyzer import analyze_sentiment, aggregate_sentiment, prepare_time_series_data, run_forecast, compare_historical_sentiment

In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

use_cached_data = True  # Set to False to fetch fresh data

# Date range
end_date = datetime.now().date()
start_date = end_date - timedelta(days=29)
start_date_str = start_date.strftime('%Y-%m-%d')
end_date_str = end_date.strftime('%Y-%m-%d')

In [None]:
# === 1. Fetch and Process News Articles ===
if use_cached_data:
    print("Loading cached news articles and sentiment data...")
    articles = load_data("articles_data.csv")
    aggregated_sentiment = load_data("aggregated_sentiment.csv")
else:
    print("Fetching Bitcoin-related news...")
    new_articles_fetch = fetch_bitcoin_news(start_date_str, end_date_str, refresh=True)
    new_articles = analyze_sentiment(new_articles_fetch)

    # Load old data and merge
    old_articles = load_data("articles_data.csv")
    if old_articles is not None:
        articles = pd.concat([old_articles, new_articles], ignore_index=True)
        # articles = articles.drop_duplicates(subset=['url'])
        
    else:
        articles = new_articles

    # Aggregate sentiment
    aggregated_sentiment = aggregate_sentiment(articles)

    # Save updated versions
    save_data(articles, "articles_data.csv")
    save_data(aggregated_sentiment, "aggregated_sentiment.csv")


In [None]:

# === 2. Fetch and Process Bitcoin Prices ===
if use_cached_data:
    print("Loading cached Bitcoin price data...")
    price_data = load_data("price_data.csv")
else:
    print("Fetching Bitcoin price data...")
    new_prices = fetch_bitcoin_prices(start_date_str, end_date_str, refresh=True)

    # Load old data and merge
    old_prices = load_data("price_data.csv")
    if old_prices is not None:
        price_data = pd.concat([old_prices, new_prices], ignore_index=True)
        price_data = price_data.drop_duplicates(subset=['date'])
    else:
        price_data = new_prices

    save_data(price_data, "price_data.csv")

In [None]:
# === 3. Prepare Time Series ===
print("Merging sentiment and price data...")
merged_data = prepare_time_series_data(aggregated_sentiment, price_data)
# merged_data['price_diff'] = merged_data['close_price'].diff().dropna()
if merged_data is not None:
    merged_data = merged_data.drop_duplicates()
# merged_data

In [None]:






# === 4. Run Forecast ===
print("Running forecast...")
forecast_results, future_forecast = run_forecast(merged_data)

# === 5. Visualization with Matplotlib / Seaborn ===
def plot_sentiment_trends(sentiment_df):
    fig, ax = plt.subplots(figsize=(12, 4))
    sns.lineplot(data=sentiment_df, x='date', y='avg_polarity', ax=ax)
    ax.set_title("Average Sentiment Polarity Over Time")
    ax.set_ylabel("Polarity")
    ax.set_xlabel("Date")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

def plot_price_trends(price_df):
    fig, ax = plt.subplots(figsize=(12, 4))
    sns.lineplot(data=price_df, x='date', y='close_price', ax=ax)
    ax.set_title("Bitcoin Close Price Over Time")
    ax.set_ylabel("Price (USD)")
    ax.set_xlabel("Date")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

def plot_price_vs_sentiment(merged_df):
    fig, ax = plt.subplots(figsize=(8, 5))
    sns.scatterplot(data=merged_df, x='avg_polarity', y='price', ax=ax)
    sns.regplot(data=merged_df, x='avg_polarity', y='price', scatter=False, ax=ax, color='red')
    ax.set_title("Bitcoin Price vs Sentiment Polarity")
    ax.set_ylabel("Price (USD)")
    ax.set_xlabel("Sentiment Polarity")
    plt.tight_layout()
    plt.show()

def plot_forecast(forecast_df):
    fig, ax = plt.subplots(figsize=(12, 5))
    ax.plot(forecast_df['date'], forecast_df['actual_price'], label='Actual', color='blue')
    ax.plot(forecast_df['date'], forecast_df['predicted_price'], label='Forecast', linestyle='--', color='orange')
    ax.fill_between(
        forecast_df['date'],
        forecast_df['lower_ci'],
        forecast_df['upper_ci'],
        color='orange',
        alpha=0.2,
        label='95% CI'
    )
    ax.set_title("Forecast vs Actual Bitcoin Price")
    ax.set_xlabel("Date")
    ax.set_ylabel("Price (USD)")
    ax.legend()
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

def plot_future_forecast(future_forecast):
    plt.figure(figsize=(10, 5))
    plt.plot(future_forecast['date'], future_forecast['predicted_price'], marker='o', label='Predicted Price')
    plt.title('7-Day Bitcoin Price Forecast')
    plt.xlabel('Date')
    plt.ylabel('Predicted Price (USD)')
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()

# Run visualizations




In [None]:
future_forecast

In [None]:
forecast_results

In [None]:
plot_sentiment_trends(aggregated_sentiment)


In [None]:
plot_price_trends(price_data)


In [None]:
plot_price_vs_sentiment(merged_data)


In [None]:
plot_future_forecast(future_forecast)  # if you're returning both results


In [None]:
if forecast_results is not None:
    plot_forecast(forecast_results)



In [None]:
# === 6. Historical Comparison (Example) ===
comparison = compare_historical_sentiment(
    base_date=start_date_str,
    comparison_date=end_date_str,
    sentiment_df=aggregated_sentiment,
    price_df=price_data
)
print("\nHistorical Comparison:")
print(comparison)

In [None]:
future = future_forecast.copy()
future['price'] = future['predicted_price']
future['source'] = 'Forecast'

historical = merged_data[['date', 'close_price']].copy()
historical['price'] = historical['close_price']
historical['source'] = 'Historical'

trend_df = pd.concat([historical[['date', 'price', 'source']], future[['date', 'price', 'source']]])

# plot it
import seaborn as sns
plt.figure(figsize=(12, 5))
sns.lineplot(data=trend_df, x='date', y='price', hue='source')
plt.title('Bitcoin Historical and Forecasted Prices')
plt.ylabel('Price (USD)')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
trend_df

In [None]:
from src.models import run_auto_arima_forecast, run_lstm_forecast

# auto_arima_forecast = run_auto_arima_forecast(merged_data, forecast_hours=(24*7))
# lstm_forecast = run_lstm_forecast(merged_data, forecast_hours=(24*7), sequence_length=(24*7))


auto_arima_future, auto_arima_forecast = run_auto_arima_forecast(merged_data, forecast_hours=24*3)
lstm_future, lstm_forecast = run_lstm_forecast(merged_data, forecast_hours=24*3, sequence_length=24)

In [None]:
plt.figure(figsize=(10,5))
plt.plot(auto_arima_forecast['date'], auto_arima_forecast['predicted_price'], label='Auto ARIMA')
plt.plot(lstm_forecast['date'], lstm_forecast['predicted_price'], label='LSTM')
plt.legend()
plt.title('Bitcoin Price Forecast: Auto ARIMA vs LSTM')
plt.xlabel('Date')
plt.ylabel('Predicted Price (USD)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10,5))
plt.plot(auto_arima_forecast['date'], auto_arima_forecast['predicted_price'], label='Auto ARIMA')
plt.plot(auto_arima_future['date'], auto_arima_future['predicted_price'], label='Auto ARIMA (Future)')
plt.plot(lstm_forecast['date'], lstm_forecast['predicted_price'], label='LSTM')
plt.plot(lstm_future['date'], lstm_future['predicted_price'], label='LSTM (Future)')
plt.plot(forecast_results['date'], forecast_results['predicted_price'], label='ARIMA')
plt.plot(future_forecast['date'], future_forecast['predicted_price'], label='ARIMA (Future)')
plt.legend()
plt.title('Bitcoin Price Forecast: Auto ARIMA vs LSTM')
plt.xlabel('Date')
plt.ylabel('Predicted Price (USD)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
if auto_arima_forecast is not None:
    plot_forecast(auto_arima_future)

In [None]:
if lstm_future is not None:
    plot_forecast(lstm_future)

In [None]:
display(auto_arima_forecast)

In [None]:
auto_arima_forecast_future = auto_arima_forecast.copy()
auto_arima_forecast_future['price'] = auto_arima_forecast_future['predicted_price']
auto_arima_forecast_future['source'] = 'Forecast'

auto_arima_forecast_historical = merged_data[['date', 'close_price']].copy()
auto_arima_forecast_historical['price'] = auto_arima_forecast_historical['close_price']
auto_arima_forecast_historical['source'] = 'Historical'

trend_df = pd.concat([auto_arima_forecast_historical[['date', 'price', 'source']], auto_arima_forecast_future[['date', 'price', 'source']]])

# plot it
import seaborn as sns
plt.figure(figsize=(12, 5))
sns.lineplot(data=trend_df, x='date', y='price', hue='source')
plt.title('Bitcoin Historical and Forecasted Prices')
plt.ylabel('Price (USD)')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
display(lstm_forecast)

In [None]:
lstm_forecast_future = lstm_forecast.copy()
lstm_forecast_future['price'] = lstm_forecast_future['predicted_price']
lstm_forecast_future['source'] = 'Forecast'

lstm_forecast_historical = merged_data[['date', 'close_price']].copy()
lstm_forecast_historical['price'] = lstm_forecast_historical['close_price']
lstm_forecast_historical['source'] = 'Historical'

trend_df = pd.concat([lstm_forecast_historical[['date', 'price', 'source']], lstm_forecast_future[['date', 'price', 'source']]])

# plot it
plt.figure(figsize=(12, 5))

sns.lineplot(data=trend_df, x='date', y='price', hue='source')
plt.title('Bitcoin Historical and Forecasted Prices')
plt.ylabel('Price (USD)')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()