# Training a transformer on bitcoin

## Data loading

### Importations

In [22]:
import requests
import pandas as pd
import time
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

In [2]:
def get_data():
    symbol = "BTCUSDT"
    interval = "1h"
    limit = 1000  # max par appel
    url = "https://api.binance.com/api/v3/klines"

    # Temps actuel en ms
    end_time = int(time.time() * 1000)

    all_data = []
    for i in range(3):  # 3 appels pour ~2400 bougies
        params = {
            "symbol": symbol,
            "interval": interval,
            "limit": limit,
            "endTime": end_time
        }
        response = requests.get(url, params=params)
        data = response.json()
        all_data = data + all_data  # concaténer au début
        end_time = data[0][0] - 1   # reculer de 1000 bougies
        
    # Transformer en DataFrame
    df = pd.DataFrame(all_data, columns=[
        "open_time","open","high","low","close","volume",
        "close_time","quote_asset_volume","nb_trades",
        "taker_buy_base","taker_buy_quote","ignore"
    ])

    # Conversions
    df["open_time"] = pd.to_datetime(df["open_time"], unit="ms")
    numeric_cols = ["open","high","low","close","volume"]
    df[numeric_cols] = df[numeric_cols].astype(float)
    
    return df

In [3]:
df = get_data()

print(df[["open_time","open","high","low","close","volume"]].head())
len(df)

            open_time      open      high       low     close     volume
0 2025-04-26 15:00:00  94240.01  94394.00  94184.23  94373.24  399.34843
1 2025-04-26 16:00:00  94373.24  94373.24  94041.00  94286.92  267.47055
2 2025-04-26 17:00:00  94286.92  94378.87  94222.33  94316.79  164.43394
3 2025-04-26 18:00:00  94316.79  94372.00  94123.50  94154.71  229.99296
4 2025-04-26 19:00:00  94154.71  94334.81  94095.23  94307.50  170.35998


3000

## Add and transform for usefull features

In [52]:
def compute_rsi(series, window=14):
    delta = series.diff()
    gain = (delta.where(delta > 0, 0)).fillna(0)
    loss = (-delta.where(delta < 0, 0)).fillna(0)

    avg_gain = gain.rolling(window=window).mean()
    avg_loss = loss.rolling(window=window).mean()

    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def compute_features(df):
    df = df.copy()
    
    # SMA
    df["sma_7"] = df["close"].rolling(window=7).mean()
    df["sma_30"] = df["close"].rolling(window=30).mean()
    df["sma_50"] = df["close"].rolling(window=50).mean()
    df["sma_100"] = df["close"].rolling(window=100).mean()
    
    # Volatibility
    df["return"] = df["close"].pct_change()

    # Volatility on 20 hours
    df["volatility_20"] = df["return"].rolling(window=20).std()
    
    # RSI 14
    df["rsi_14"] = compute_rsi(df["close"], window=14)
    
    # MACD
    df = compute_MACD(df)
    
    # Relative volume 20
    df["volume_sma20"] = df["volume"].rolling(window=20).mean()
    df["volume_rel"] = df["volume"] / df["volume_sma20"]
    
    return df

def compute_MACD(df):
    df = df.copy()
    
    # EMA 12 et EMA 26
    df["ema_12"] = df["close"].ewm(span=12, adjust=False).mean()
    df["ema_26"] = df["close"].ewm(span=26, adjust=False).mean()
    
    # MACD line
    df["MACD"] = df["ema_12"] - df["ema_26"]

    # Signal line (EMA 9 du MACD)
    df["Signal"] = df["MACD"].ewm(span=9, adjust=False).mean()

    # Histogramme
    df["MACD_Hist"] = df["MACD"] - df["Signal"]

    return df

In [53]:
df = compute_features(df)
df.columns

Index(['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time',
       'quote_asset_volume', 'nb_trades', 'taker_buy_base', 'taker_buy_quote',
       'ignore', 'sma_7', 'sma_30', 'sma_50', 'sma_100', 'return',
       'volatility_20', 'rsi_14', 'ema_12', 'ema_26', 'MACD', 'Signal',
       'MACD_Hist', 'volume_sma20', 'volume_rel'],
      dtype='object')

### Visualization

#### SMA

In [59]:
fig = make_subplots(rows=4, cols=1, shared_xaxes=True,
                    row_heights=[0.4, 0.2, 0.2, 0.2], vertical_spacing=0.1)

# Cours du BTC
fig.add_scatter(
    x=df["open_time"], 
    y=df["close"], 
    mode="lines", 
    name="BTC Close", 
    line=dict(color="white"),
    row=1,
    col=1
)

# SMA 7
fig.add_scatter(
    x=df["open_time"], 
    y=df["sma_7"], 
    mode="lines", 
    name="SMA 7", 
    line=dict(color="orange"),
    row=1,
    col=1
)

# SMA 30
fig.add_scatter(
    x=df["open_time"], 
    y=df["sma_30"], 
    mode="lines", 
    name="SMA 30", 
    line=dict(color="green"),
    row=1,
    col=1
)

# SMA 50
fig.add_scatter(
    x=df["open_time"], 
    y=df["sma_50"], 
    mode="lines", 
    name="SMA 50", 
    line=dict(color="blue"),
    row=1,
    col=1
)

# SMA 100
fig.add_scatter(
    x=df["open_time"], 
    y=df["sma_100"], 
    mode="lines", 
    name="SMA 100", 
    line=dict(color="red"),
    row=1,
    col=1
)

# Relative Volume
fig.add_trace(go.Bar(x=df["open_time"], y=df["volume_rel"],
                     name="Volume", marker_color="lightblue"), row=2, col=1)

# MACD line
fig.add_trace(go.Scatter(x=df["open_time"], y=df["MACD"],
                         mode="lines", name="MACD", line=dict(color="blue")), row=3, col=1)

# Signal line
fig.add_trace(go.Scatter(x=df["open_time"], y=df["Signal"],
                         mode="lines", name="Signal", line=dict(color="orange")), row=3, col=1)

# Histogramme
fig.add_trace(go.Bar(x=df["open_time"], y=df["MACD_Hist"],
                     name="Histogram", marker_color="cyan"), row=4, col=1)

# Layout (style général)
fig.update_layout(
    title="Prix BTC avec SMA",
    height=900,
    xaxis4=dict(title="Date"),
    yaxis=dict(title="Prix ($)"),
    yaxis2=dict(title="Relative Volume"),
    yaxis3=dict(title="MACD"),
    yaxis4=dict(title="Momentum"),
    template="plotly_dark"  # style global
)

fig.show()

#### Volatibility / RSI

In [35]:
fig = make_subplots(rows=3, cols=1, shared_xaxes=True,
                    vertical_spacing=0.10,
                    row_heights=[0.5, 0.25, 0.25])

# Cours du BTC
fig.add_scatter(
    x=df["open_time"], 
    y=df["close"], 
    mode="lines", 
    name="BTC Close", 
    line=dict(color="white"), 
    row=1, 
    col=1
)

# Volatility 20
fig.add_scatter(
    x=df["open_time"], 
    y=df["volatility_20"], 
    mode="lines", 
    name="Volatibility (20)", 
    line=dict(color="red"),
    row=2, 
    col=1
)

# RSI 14
fig.add_scatter(x=df["open_time"], y=df["rsi_14"],
                mode="lines", name="RSI (14)", line=dict(color="orange"), row=3, col=1)

# Zones de surachat/survente
fig.add_hline(y=70, line_dash="dash", line_color="red", row=3, col=1)
fig.add_hline(y=30, line_dash="dash", line_color="green", row=3, col=1)

# Layout (style général)
fig.update_layout(
    title="Prix BTC avec SMA",
    xaxis3=dict(title="Date"),
    yaxis=dict(title="Prix ($)"),              # 1er subplot
    yaxis2=dict(title="Volatilité (σ)"),        # 2e subplot
    yaxis3=dict(title="RSI 14"),               # 3e subplot
    template="plotly_dark"  # style global
)

fig.show()