# IDX Neural Network Trading System
## End-to-End Demonstration

This notebook demonstrates the complete pipeline for building a research-grade neural network trading system for Indonesian equities (IDX) targeting >15% CAGR.

In [None]:
import sys
import yaml
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Import our modules
sys.path.insert(0, '..')
from idx_trading_system.data_loader import DataLoader
from idx_trading_system.features import FeatureEngine, create_labels
from idx_trading_system.models import create_model
from idx_trading_system.portfolio import PortfolioConstructor
from idx_trading_system.backtest import WalkForwardBacktest

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

print("Imports successful!")

## 1. Load Configuration

In [None]:
# Load config
with open('../idx_trading_system/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

print("Configuration loaded:")
print(f"Universe: {len(config['universe']['tickers'])} tickers")
print(f"Index: {config['universe']['index']}")
print(f"Backtest splits: {len(config['backtest']['splits'])}")
print(f"Target CAGR: {config['targets']['cagr']:.1%}")

## 2. Load Data

Download and cache OHLCV data from Yahoo Finance for Indonesian equities.

In [None]:
# Initialize data loader
data_loader = DataLoader(config['paths']['cache_dir'])

# Load universe
print("Loading data (this may take a few minutes on first run)...")
data_dict = data_loader.load_universe(config)

equity_data = data_dict['equities']
index_data = data_dict['index']

print(f"\nEquity data shape: {equity_data.shape}")
print(f"Tickers: {equity_data['ticker'].unique()}")
print(f"Date range: {equity_data.index.min()} to {equity_data.index.max()}")
print(f"\nIndex data shape: {index_data.shape}")

In [None]:
# Visualize index
plt.figure(figsize=(14, 6))
plt.plot(index_data.index, index_data['adj_close'])
plt.title('Jakarta Composite Index (^JKSE)')
plt.xlabel('Date')
plt.ylabel('Price')
plt.grid(True, alpha=0.3)
plt.show()

## 3. Feature Engineering

Create technical indicators, cross-asset features, and regime features with no lookahead bias.

In [None]:
# Initialize feature engine
feature_engine = FeatureEngine(config)

# Engineer features
print("Engineering features...")
features = feature_engine.engineer_features(equity_data, index_data, shift_features=True)

# Create labels
features = create_labels(features, config['labels']['forward_periods'])

print(f"\nFeatures shape: {features.shape}")
print(f"\nFeature columns ({len(features.columns)}):")
for col in features.columns[:20]:
    print(f"  - {col}")
print("  ...")

In [None]:
# Examine features for one ticker
ticker = 'BBCA.JK'
ticker_features = features[features['ticker'] == ticker].copy()

print(f"Features for {ticker}:")
print(ticker_features[['returns', 'momentum_20d', 'volatility_20d', 'rsi', 'volume_zscore']].tail(10))

## 4. Model Training

Train baseline and neural network models.

In [None]:
# Prepare train/val split for demonstration
train_cutoff = pd.to_datetime('2014-12-31')
val_cutoff = pd.to_datetime('2016-12-31')

train_data = features[features.index <= train_cutoff]
val_data = features[(features.index > train_cutoff) & (features.index <= val_cutoff)]

# Select features
exclude_cols = ['ticker', 'open', 'high', 'low', 'close', 'adj_close', 'volume', 'returns']
exclude_cols += [col for col in features.columns if 'target' in col]
feature_cols = [col for col in features.columns if col not in exclude_cols]

print(f"Train period: {train_data.index.min()} to {train_data.index.max()}")
print(f"Val period: {val_data.index.min()} to {val_data.index.max()}")
print(f"Using {len(feature_cols)} features")

# Prepare data
X_train = train_data[feature_cols].fillna(0).values
y_train = train_data['target_return_20d'].fillna(0).values

X_val = val_data[feature_cols].fillna(0).values
y_val = val_data['target_return_20d'].fillna(0).values

print(f"\nTrain set: {X_train.shape}")
print(f"Val set: {X_val.shape}")

In [None]:
# Train XGBoost model
print("Training XGBoost model...")
xgb_model = create_model('xgboost', model_type='regression', config=config)
xgb_model.fit(X_train, y_train, X_val, y_val)

print("\nModel training complete!")

In [None]:
# Feature importance
if hasattr(xgb_model, 'get_feature_importance'):
    importance = xgb_model.get_feature_importance()
    feature_importance_df = pd.DataFrame({
        'feature': feature_cols,
        'importance': importance
    }).sort_values('importance', ascending=False)
    
    print("\nTop 10 most important features:")
    print(feature_importance_df.head(10))
    
    # Plot
    plt.figure(figsize=(10, 8))
    plt.barh(range(10), feature_importance_df['importance'].head(10))
    plt.yticks(range(10), feature_importance_df['feature'].head(10))
    plt.xlabel('Importance')
    plt.title('Top 10 Feature Importances')
    plt.gca().invert_yaxis()
    plt.tight_layout()
    plt.show()

## 5. Walk-Forward Backtesting

Run strict time-series walk-forward backtest.

In [None]:
# Initialize components
portfolio_constructor = PortfolioConstructor(config)
backtester = WalkForwardBacktest(config)

# Create a fresh model for backtesting
model = create_model('xgboost', model_type='regression', config=config)

# Run backtest
print("Running walk-forward backtest...\n")
results = backtester.run_backtest(
    model,
    feature_engine,
    portfolio_constructor,
    data_dict
)

## 6. Performance Analysis

In [None]:
# Display results
print("="*60)
print("BACKTEST RESULTS")
print("="*60)

avg_metrics = results['average_metrics_after_costs']

print(f"\nAverage Performance Metrics (After Costs):")
print(f"  CAGR: {avg_metrics['cagr']:.2%}")
print(f"  Annual Volatility: {avg_metrics['annual_volatility']:.2%}")
print(f"  Sharpe Ratio: {avg_metrics['sharpe_ratio']:.2f}")
print(f"  Sortino Ratio: {avg_metrics['sortino_ratio']:.2f}")
print(f"  Max Drawdown: {avg_metrics['max_drawdown']:.2%}")
print(f"  Calmar Ratio: {avg_metrics['calmar_ratio']:.2f}")
print(f"  Hit Rate: {avg_metrics['hit_rate']:.2%}")

target_cagr = config['targets']['cagr']
print(f"\nTarget CAGR: {target_cagr:.2%}")
if avg_metrics['cagr'] >= target_cagr:
    print(f"✓ TARGET ACHIEVED!")
else:
    print(f"✗ Target not met. Consider hyperparameter tuning.")

print("="*60)

In [None]:
# Plot equity curves for each split
fig, axes = plt.subplots(len(results['individual_results']), 1, figsize=(14, 4*len(results['individual_results'])))

if len(results['individual_results']) == 1:
    axes = [axes]

for i, result in enumerate(results['individual_results']):
    returns = result['returns_after_costs']
    equity_curve = (1 + returns).cumprod()
    
    ax = axes[i]
    ax.plot(equity_curve.index, equity_curve.values)
    ax.set_title(f"Equity Curve - Split {i+1} ({result['split_period'][0].date()} to {result['split_period'][1].date()})")
    ax.set_xlabel('Date')
    ax.set_ylabel('Cumulative Return')
    ax.grid(True, alpha=0.3)
    
    # Add metrics as text
    metrics = result['metrics_after_costs']
    text = f"CAGR: {metrics['cagr']:.1%} | Sharpe: {metrics['sharpe_ratio']:.2f} | MaxDD: {metrics['max_drawdown']:.1%}"
    ax.text(0.02, 0.98, text, transform=ax.transAxes, 
           verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

plt.tight_layout()
plt.show()

In [None]:
# Plot drawdown for first split
result = results['individual_results'][0]
returns = result['returns_after_costs']
cumulative = (1 + returns).cumprod()
running_max = cumulative.expanding().max()
drawdown = (cumulative - running_max) / running_max

plt.figure(figsize=(14, 6))
plt.fill_between(drawdown.index, drawdown.values, 0, alpha=0.3, color='red')
plt.plot(drawdown.index, drawdown.values, color='red')
plt.title('Drawdown - First Split')
plt.xlabel('Date')
plt.ylabel('Drawdown')
plt.grid(True, alpha=0.3)
plt.gca().yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: '{:.0%}'.format(y)))
plt.show()

In [None]:
# Plot turnover
result = results['individual_results'][0]
costs = result['costs']

plt.figure(figsize=(14, 6))
plt.plot(costs.index, costs.values)
plt.title('Transaction Costs Over Time - First Split')
plt.xlabel('Date')
plt.ylabel('Cost')
plt.grid(True, alpha=0.3)
plt.show()

print(f"Average daily cost: {costs.mean():.4%}")
print(f"Total cost impact: {costs.sum():.2%}")

## 7. Model Comparison

Compare different models (optional - can be time-consuming).

In [None]:
# # Uncomment to run model comparison
# models_to_test = ['linear', 'xgboost', 'lightgbm']
# comparison_results = {}

# for model_name in models_to_test:
#     print(f"\nTesting {model_name}...")
#     model = create_model(model_name, model_type='regression', config=config)
#     result = backtester.run_backtest(
#         model,
#         feature_engine,
#         portfolio_constructor,
#         data_dict
#     )
#     comparison_results[model_name] = result['average_metrics_after_costs']

# # Display comparison
# comparison_df = pd.DataFrame(comparison_results).T
# print("\nModel Comparison:")
# print(comparison_df[['cagr', 'sharpe_ratio', 'max_drawdown']])

## Summary

This notebook demonstrated:
1. Data loading from Yahoo Finance with proper handling of adjusted prices
2. Feature engineering with technical indicators and cross-asset features
3. Model training with XGBoost (and other models available)
4. Walk-forward backtesting with strict time-series splits
5. Performance analysis with equity curves, drawdowns, and metrics

**Next Steps:**
- Run hyperparameter optimization if target CAGR not achieved
- Test neural network models (LSTM, Transformer)
- Perform feature ablation studies
- Stress test across crisis periods (1997-98, 2008, 2020)
- Implement ensemble methods