<a href="https://colab.research.google.com/github/mittushaji25/crypto-xrp-analysis/blob/main/notebooks/04_forecasting_and_prediction_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🔮 **XRPUSDT Crypto Market Analysis - Forecasting & Prediction Models**

In [None]:
import pandas as pd
xrp_data = pd.read_csv('/content/crypto-xrp-analysis/data/cleaned/xrpusdt_features.csv')

## 🔁 Create New 3-Day Forecast Target

In [None]:
xrp_data['future_return_3d'] = xrp_data['close'].shift(-3) / xrp_data['close'] - 1
xrp_data['price_direction_3d'] = (xrp_data['future_return_3d'] > 0).astype(int)
xrp_data.dropna(inplace=True)  # Clean rows with missing future data


## ✂️ Use Top Features from Feature Importance

In [None]:
selected_features = [
    'rsi', 'std_14', 'log_return', 'momentum_3',
    'buy_volume_ratio', 'avg_trade_size', 'macd', 'volume_spike'
]

X = xrp_data[selected_features]
y = xrp_data['price_direction_3d']

# Time-based split
split_idx = int(len(xrp_data) * 0.8)
X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]

## 🧠 Train Classification Model

In [None]:
from xgboost import XGBClassifier

clf = XGBClassifier(
    n_estimators=150,
    max_depth=5,
    learning_rate=0.05,
    use_label_encoder=False,
    eval_metric='logloss'
)

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
y_proba = clf.predict_proba(X_test)[:, 1]

Parameters: { "use_label_encoder" } are not used.



✅ Accuracy: 0.478
🎯 ROC AUC: 0.594


## 📊 Evaluate Model Performance

In [None]:
from sklearn.metrics import accuracy_score, roc_auc_score

print("✅ Accuracy:", round(accuracy_score(y_test, y_pred), 3))
print("🎯 ROC AUC:", round(roc_auc_score(y_test, y_proba), 3))

## 🎯 Quantify Returns from Prediction Hits

In [None]:
xrp_data.loc[X_test.index, 'model_prediction_3d'] = y_pred
xrp_data['prediction_hit'] = (xrp_data['model_prediction_3d'] == xrp_data['price_direction_3d'])

# Mean returns grouped by prediction success
print(xrp_data.groupby('prediction_hit')['future_return_3d'].mean())


prediction_hit
False    0.005762
True     0.035199
Name: future_return_3d, dtype: float64


## 🔍 Analyze Forecast by Confidence Levels

In [14]:
xrp_data.loc[X_test.index, 'prediction_prob'] = y_proba
xrp_data.loc[X_test.index, 'high_confidence'] = (y_proba > 0.7).astype(int)

xrp_data.loc[X_test.index, 'confidence_bin'] = pd.cut(y_proba, bins=[0, 0.5, 0.7, 0.9, 1.0])
print(xrp_data.groupby('confidence_bin')['future_return_3d'].mean())

confidence_bin
(0.0, 0.5]    0.003516
(0.5, 0.7]    0.000951
(0.7, 0.9]   -0.007450
(0.9, 1.0]    0.009243
Name: future_return_3d, dtype: float64


  print(xrp_data.groupby('confidence_bin')['future_return_3d'].mean())


Insight from Confidence Bins

    Confidence Range | AvgFutureReturn(3d) | Implication

    (0.0 – 0.5] | +0.35% | Low-certainty, low-but-positive gain

    (0.5 – 0.7] | +0.09% | Neutral behavior—uncertain signal

    (0.7 – 0.9] |-0.75%  | Overconfident but wrong—model risk

    (0.9 – 1.0] | +0.92% | High certainty, highest reward zone



In [16]:
xrp_data.columns

Index(['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time',
       'quote_asset_volume', 'number_of_trades', 'taker_buy_base_volume',
       'taker_buy_quote_volume', 'source_filename', 'open_lag_1',
       'close_lag_1', 'high_lag_1', 'low_lag_1', 'volumne_lag_1',
       'volumne_lag_3', 'candle_size', 'body_size', 'upper_shadow',
       'lower_shadow', 'buy_volume_ratio', 'avg_trade_size', 'ma_7', 'ma_21',
       'std_14', 'hl_spread', 'hl_spread_change', 'pct_change', 'log_return',
       'momentum_3', 'momentum_7', 'day_of_week', 'is_weekend', 'month', 'rsi',
       'macd', 'signal_line', 'volume_zscore', 'volume_spike', 'rsi_buy',
       'rsi_sell', 'macd_bullish', 'macd_bearish', 'future_return_1d',
       'price_direction', 'future_return_3d', 'price_direction_3d',
       'model_prediction_3d', 'prediction_hit', 'prediction_prob',
       'high_confidence', 'confidence_bin'],
      dtype='object')

In [17]:
xrp_data.to_csv('/content/crypto-xrp-analysis/data/featured/xrpusdt_master_data.csv', index=False)

# 🔁 Split into Thematic Tables for Tableau Dashbaord


### 📊 1. Price Signals Table

In [21]:
price_signals_cols = [
    'close_time', 'open', 'high', 'low', 'close', 'volume',
    'rsi', 'macd', 'momentum_3', 'volume_spike', 'rsi_buy', 'macd_bullish'
]

xrp_data[price_signals_cols].to_csv(
    'data/dashboard_split/price_signals.csv', index=False
)


### 🔮 2. Predictions Table

In [23]:
predictions_cols = [
    'close_time', 'price_direction_3d', 'model_prediction_3d',
    'future_return_3d', 'prediction_prob', 'confidence_bin', 'high_confidence'
]

xrp_data[predictions_cols].to_csv(
    'data/dashboard_split/predictions.csv', index=False
)


### 🎯 3. Performance KPIs Table

In [25]:
performance_cols = [
    'close_time', 'prediction_hit', 'future_return_3d',
    'price_direction_3d', 'model_prediction_3d'
]

xrp_data[performance_cols].to_csv(
    'data/dashboard_split/performance_kpi.csv', index=False
)


### 📈 4. Confidence Analysis Table

In [26]:
confidence_cols = [
    'close_time', 'prediction_prob', 'confidence_bin', 'future_return_3d'
]

xrp_data[confidence_cols].to_csv(
    'data/dashboard_split/confidence_analysis.csv', index=False
)