# Working with Binance API
- https://python-binance.readthedocs.io/en/latest/overview.html

In [1]:
import datetime as dt
import os
import numpy as np, pandas as pd, matplotlib.pyplot as plt
import dateparser
import math
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import coint
from binance.client import Client

from trading import utils

In [2]:
API_PUBLIC = os.environ.get("B_PUBLIC_KEY")
API_SECRET = os.environ.get("B_SECRET_KEY")
client = Client(API_PUBLIC, API_SECRET)

In [17]:
eth_sym = "ETHBTC"
trades = pd.DataFrame(client.get_recent_trades(symbol=eth_sym))
trades["time"] = trades["time"].astype(str).apply(dateparser.parse)

In [3]:
get_stream_str(eth_sym)

NameError: name 'eth_sym' is not defined

In [20]:
lob = client.get_order_book(symbol=eth_sym)

In [22]:
lob.keys()

dict_keys(['lastUpdateId', 'bids', 'asks'])

In [23]:
lob["lastUpdateId"]

2079154232

In [16]:
client.get_orderbook_ticker(symbol=eth_sym)

{'symbol': 'ETHBTC',
 'bidPrice': '0.02787000',
 'bidQty': '23.86000000',
 'askPrice': '0.02787300',
 'askQty': '1.23100000'}

In [24]:
from binance.websockets import BinanceSocketManager

In [26]:
def process_message(msg):
    print("message type: {}".format(msg['e']))
    print(msg)

In [27]:
bm = BinanceSocketManager(client)
conn_key = bm.start_trade_socket("BNBBTC", process_message)

In [28]:
bm.start()

message type: trade
{'e': 'trade', 'E': 1609703440436, 's': 'BNBBTC', 't': 98645283, 'p': '0.00123630', 'q': '1.51000000', 'b': 646330065, 'a': 646330071, 'T': 1609703440435, 'm': True, 'M': True}
message type: trade
{'e': 'trade', 'E': 1609703440436, 's': 'BNBBTC', 't': 98645284, 'p': '0.00123590', 'q': '3.94000000', 'b': 646330060, 'a': 646330071, 'T': 1609703440435, 'm': True, 'M': True}
message type: trade
{'e': 'trade', 'E': 1609703447402, 's': 'BNBBTC', 't': 98645285, 'p': '0.00123620', 'q': '1.00000000', 'b': 646330094, 'a': 646330159, 'T': 1609703447401, 'm': True, 'M': True}
message type: trade
{'e': 'trade', 'E': 1609703447402, 's': 'BNBBTC', 't': 98645286, 'p': '0.00123620', 'q': '6.98000000', 'b': 646330121, 'a': 646330159, 'T': 1609703447401, 'm': True, 'M': True}
message type: trade
{'e': 'trade', 'E': 1609703455145, 's': 'BNBBTC', 't': 98645287, 'p': '0.00123580', 'q': '1.56000000', 'b': 646330271, 'a': 646330192, 'T': 1609703455144, 'm': False, 'M': True}
message type: 

In [30]:
bm.stop_socket(conn_key)
bm.close()


In [None]:
ltc_sym = "LTCBTC"
eth_sym = "ETHBTC"
interval = Client.KLINE_INTERVAL_1MINUTE
timedelta = dt.timedelta(days=14)
# eth_df = get_closes(client, eth_sym, interval, timedelta)
# ltc_df = get_closes(client, ltc_sym, interval, timedelta)
# eth_df.to_csv("data/ethbtc_1min.csv")
# ltc_df.to_csv("data/ltcbtc_1min.csv")

eth_df = pd.read_csv("data/ethbtc_1min.csv")
ltc_df = pd.read_csv("data/ltcbtc_1min.csv")

eth_lr = get_log_returns(eth_df["c"].values)
ltc_lr = get_log_returns(ltc_df["c"].values)
lr_diff = eth_lr - ltc_lr
hedge_ratio = (eth_df["c"]/ltc_df["c"]).values[1:]

data = np.stack([eth_df["c"].values[1:], eth_lr, ltc_df["c"].values[1:], ltc_lr, lr_diff, hedge_ratio]).T
cols = ["eth", "eth_lr", "ltc", "ltc_lr", "lr_diff", "hedge_ratio"]
ix = eth_df["t1"].values[1:]
df = pd.DataFrame(data=data, columns=cols, index=ix)
df["time_idx"] = list(range(len(df)))

In [None]:
t1 = 40
t2 = 60
fig1, [ax1, ax2, ax3] = plt.subplots(3, 1, sharex=True, figsize=(12,9))
linewidth = 3
alpha = 1
ax1.plot(df.iloc[t1:t2]["eth"],linewidth=linewidth, alpha=alpha, label="eth")
ax2.plot(df.iloc[t1:t2]["ltc"], linewidth=linewidth, alpha=alpha, label="ltc")
ax3.plot(df.iloc[t1:t2]["lr_diff"], "--g", linewidth=linewidth, alpha=alpha, label="eth - ltc")
fig1.legend()

In [None]:
t1 = 40
t2 = 60
fig2, ax1 = plt.subplots(1, 1, sharex=True, figsize=(12,6))
linewidth = 3
alpha = 1
ax1.plot(df.iloc[t1:t2]["eth_lr"],linewidth=linewidth, alpha=alpha, label="eth")
ax1.plot(df.iloc[t1:t2]["ltc_lr"], linewidth=linewidth, alpha=alpha, label="ltc")
ax1.plot(df.iloc[t1:t2]["lr_diff"], "--g", linewidth=linewidth, alpha=alpha, label="eth - ltc")
ax1.legend(loc="best")

In [None]:
df[["eth", "ltc"]].rolling(7).std()

# using pytorch forecasting

In [None]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping
import torch

from pytorch_forecasting import Baseline, NBeats, TimeSeriesDataSet, DeepAR, TemporalFusionTransformer
from pytorch_forecasting.data import NaNLabelEncoder, TorchNormalizer
from pytorch_forecasting.data.examples import generate_ar_data
from pytorch_forecasting.metrics import SMAPE, NormalDistributionLoss, QuantileLoss

# build dataset

In [None]:
ddf = df[["eth", "ltc", "lr_diff", "time_idx"]]
ddf = ddf.set_index("time_idx").reset_index()
ddf["group"] = 0
ddf

In [None]:
coint_t, p_value, crit_value = coint(ddf["eth"].values, ddf["ltc"].values)
coint_t, p_value, crit_value

In [None]:
ddf

In [None]:
a=TorchNormalizer(method="identity")

In [None]:
a.fit(ddf["eth"])

In [None]:
# start with just eth returns
ddf = df[["eth", "ltc", "lr_diff", "time_idx"]]
ddf = ddf.set_index("time_idx").reset_index()
ddf["group"] = 0

context_length = 45
prediction_length = 3
training_cutoff = ddf["time_idx"].max() - prediction_length

train_tsds = TimeSeriesDataSet(
    ddf[ddf["time_idx"] <= training_cutoff],
    time_idx = "time_idx",
    target = "lr_diff",
    categorical_encoders={"group": NaNLabelEncoder().fit(ddf.group)},
    group_ids = ["group"],
    time_varying_unknown_reals=["eth", "ltc", "lr_diff"],
    max_encoder_length=context_length,
    max_prediction_length=prediction_length,
    target_normalizer=None,
    add_relative_time_idx=True,
    scalers = {
        "eth": None, 
        "ltc": None
    }
)
val_tsds = TimeSeriesDataSet.from_dataset(train_tsds, ddf, min_prediction_idx=training_cutoff + 1)

batch_size = 128
train_dataloader = train_tsds.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = val_tsds.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

In [None]:
train_tsds.get_parameters()

# Start model building

In [None]:
# calculate baseline absolute error
actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
baseline_predictions = Baseline().predict(val_dataloader)
SMAPE()(baseline_predictions, actuals.view(1,-1))

In [None]:
pl.seed_everything(42)
trainer = pl.Trainer(gpus=0, gradient_clip_val=0.1)

model_hyper_params = {
    "learning_rate": 3e-2,
    "hidden_size": 16,
    "lstm_layers": 2,
    "attention_head_size": 4,
    "output_size": 7,
    "loss": QuantileLoss(),
    "time_varying_reals_encoder": ["eth", "lr_diff", "ltc"]
}
net = TemporalFusionTransformer.from_dataset(
    train_tsds, 
    **model_hyper_params
)

In [None]:
# net.summarize("full")
# net.hparams

In [None]:
# find optimal learning rate
res = trainer.tuner.lr_find(net, train_dataloader=train_dataloader, val_dataloaders=val_dataloader, min_lr=1e-5)
print(f"suggested learning rate: {res.suggestion()}")
fig = res.plot(show=True, suggest=True)
fig.show()
net.hparams.learning_rate = res.suggestion()

In [None]:
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
trainer = pl.Trainer(
    max_epochs=50,
    gpus=0,
    weights_summary="top",
    gradient_clip_val=0.1,
    callbacks=[early_stop_callback],
    limit_train_batches=30,
)

trainer.fit(
    net,
    train_dataloader=train_dataloader,
    val_dataloaders=val_dataloader,
)

In [None]:
best_model_path = trainer.checkpoint_callback.best_model_path
best_model = TemporalFusionTransformer.load_from_checkpoint(best_model_path)

In [None]:
actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
predictions = best_model.predict(val_dataloader)
(actuals - predictions).abs().mean()

In [None]:
raw_predictions, x = best_model.predict(val_dataloader, mode="raw", return_x=True)

In [None]:
best_model.plot_prediction(x, raw_predictions, idx=0, add_loss_to_title=True);

In [None]:
predictions, x = best_model.predict(train_dataloader, return_x=True)
predictions_vs_actuals = best_model.calculate_prediction_actual_by_variable(x, predictions)
best_model.plot_prediction_actual_by_variable(predictions_vs_actuals);

In [None]:
interpretation = best_model.interpret_output(raw_predictions, reduction="sum")
best_model.plot_interpretation(interpretation)

In [None]:
dependency = best_model.predict_dependency(
    val_dataloader.dataset, "eth", np.linspace(0, 30, 30), show_progress_bar=True, mode="dataframe"
)

In [None]:
# plotting median and 25% and 75% percentile
agg_dependency = dependency.groupby("eth").normalized_prediction.agg(
    median="median", q25=lambda x: x.quantile(0.25), q75=lambda x: x.quantile(0.75)
)
ax = agg_dependency.plot(y="median")
ax.fill_between(agg_dependency.index, agg_dependency.q25, agg_dependency.q75, alpha=0.3);