# 03 â€” Triple Barrier Labeling
Apply the Dynamic Triple Barrier Method with ATR-scaled barriers.
Generates labels: Short (0), Hold (1), Long (2).

In [None]:
!pip install -q torch xgboost ccxt PyWavelets pandas-ta hmmlearn numba scikit-learn pyyaml tqdm pyarrow

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import sys, os
REPO_DIR = '/content/scalp2'
if not os.path.exists(REPO_DIR):
    !git clone https://github.com/<YOUR_USERNAME>/scalp2.git {REPO_DIR}
sys.path.insert(0, REPO_DIR)

import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(name)s %(levelname)s: %(message)s')

from scalp2.config import load_config
config = load_config(f'{REPO_DIR}/config.yaml')
config.data.processed_dir = '/content/drive/MyDrive/scalp2/data/processed'

In [None]:
import pandas as pd

df = pd.read_parquet(f'{config.data.processed_dir}/BTC_USDT_features.parquet')
print(f'Loaded feature matrix: {len(df)} rows x {len(df.columns)} columns')

In [None]:
from scalp2.labeling.triple_barrier import triple_barrier_labels

df_labeled = triple_barrier_labels(df, config.labeling)

print(f'\nLabel distribution:')
print(df_labeled['tb_label'].value_counts().sort_index())
print(f'\nLabel distribution (%):')
print((df_labeled['tb_label'].value_counts(normalize=True) * 100).sort_index().round(1))

In [None]:
# Visualize label distribution over time
import matplotlib.pyplot as plt

fig, axes = plt.subplots(2, 1, figsize=(16, 8), sharex=True)

axes[0].plot(df_labeled.index, df_labeled['close'], linewidth=0.5, alpha=0.7)
axes[0].set_ylabel('BTC Price')
axes[0].set_title('BTC/USDT 15m with Triple Barrier Labels')

# Rolling label ratio
window = 960  # ~10 days
long_ratio = (df_labeled['tb_label'] == 1).rolling(window).mean()
short_ratio = (df_labeled['tb_label'] == -1).rolling(window).mean()

axes[1].plot(df_labeled.index, long_ratio, label='Long %', color='green', alpha=0.7)
axes[1].plot(df_labeled.index, short_ratio, label='Short %', color='red', alpha=0.7)
axes[1].axhline(0.5, color='gray', linestyle='--', alpha=0.3)
axes[1].set_ylabel('Label Ratio (10d rolling)')
axes[1].legend()

plt.tight_layout()
plt.show()

In [None]:
# Save labeled dataset
output_path = f'{config.data.processed_dir}/BTC_USDT_labeled.parquet'
df_labeled.to_parquet(output_path)
print(f'Saved labeled dataset: {len(df_labeled)} rows to {output_path}')