
# Loading binance data into trading universe

Loading CEX data into the TradingStrategy affords you the ability to work with longer historical periods with trading pairs. Using longer historical periods allows you to train your model on more data, which can lead to better results. The only downside is that you will have to wait longer for your backtests to complete. 

## Creating our assets

In the following code block, we create our ETH and USDC assets, which follow real on-chain deployment addresses. 

In [12]:
from dotenv import load_dotenv
import os

from eth_defi.chain import install_chain_middleware
from eth_defi.abi import get_deployed_contract
from web3 import Web3, HTTPProvider

from tradingstrategy.chain import ChainId
from tradeexecutor.state.identifier import AssetIdentifier

# load_dotenv()

polygon_json_rpc = os.environ["MY_JSON_RPC_POLYGON"]

web3 = Web3(HTTPProvider(polygon_json_rpc, request_kwargs={"timeout": 5}))
install_chain_middleware(web3)

# usdc with $4B supply
# https://polygonscan.com/address/0x2791bca1f2de4661ed88a30c99a7a9449aa84174
usdc_token = get_deployed_contract(
    web3, "ERC20MockDecimals.json", "0x2791Bca1f2de4661ED88A30C99A7a9449Aa84174"
)

# eth token.
# https://polygonscan.com//address/0x7ceb23fd6bc0add59e62ac25578270cff1b9f619
# https://tradingstrategy.ai/trading-view/polygon/tokens/0x7ceb23fd6bc0add59e62ac25578270cff1b9f619
eth_token = get_deployed_contract(
    web3, "ERC20MockDecimals.json", "0x7ceB23fD6bC0adD59E62ac25578270cFf1b9f619"
)


usdc_asset = AssetIdentifier(
    ChainId.polygon.value,
    usdc_token.address,
    usdc_token.functions.symbol().call(),
    usdc_token.functions.decimals().call(),
)


eth_asset = AssetIdentifier(
    ChainId.polygon.value,
    eth_token.address,
    eth_token.functions.symbol().call(),
    eth_token.functions.decimals().call(),
)


In [13]:
from tradeexecutor.state.identifier import TradingPairIdentifier

pair = TradingPairIdentifier(
    base = eth_asset,
    quote = usdc_asset,
    pool_address='0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640',
    exchange_address='0x1F98431c8aD98523631AE4a59f267346ea31F984',
    internal_id=2854973,
    fee=0.0005,
    internal_exchange_id=3990,
    info_url='https://tradingstrategy.ai/trading-view/polygon/uniswap-v3/eth-usdc-fee-5'
)

## Examine dataset

In the following code block, we examine the dataset to see what it looks like. We can see that the dataset contains the following columns:

- `Date`: The date of the start of the time interval
- `open`: The price of the asset at the start of the time interval
- `high`: The highest price of the asset during the time interval
- `low`: The lowest price of the asset during the time interval
- `close`: The price of the asset at the end of the time interval
- `volume`: The volume of the asset traded during the time interval

In [14]:
import pandas as pd
from pathlib import Path

parquet_file_path = Path('./binance-ETHUSDT-1h-updated.parquet')

df = pd.read_parquet('./binance-ETHUSDT-1h.parquet')

assert isinstance(df.index, pd.DatetimeIndex)

print(df.head())

                       open    high    low   close     volume
Date                                                         
2017-08-17 04:00:00  301.13  302.57  298.0  301.61  125.66877
2017-08-17 05:00:00  301.61  303.28  300.0  303.10  377.67246
2017-08-17 06:00:00  302.40  304.44  301.9  302.68  303.86672
2017-08-17 07:00:00  302.68  307.96  302.6  307.96  754.74510
2017-08-17 08:00:00  307.95  309.97  307.0  308.62  150.75029


## Add rows to dataset

We need to add rows for `base_token_symbol` and `quote_token_symbol`

In [15]:
# Add the new columns
df['base_token_symbol'] = eth_asset.token_symbol
df['quote_token_symbol'] = usdc_asset.token_symbol
df['exchange_slug'] = 'uniswap-v3'
df['chain_id'] = pair.base.chain_id
df['fee'] = pair.fee * 10_000
df['pair_id'] = pair.internal_id
df['buy_volume_all_time'] = 0
df['address'] = pair.pool_address
df['exchange_id'] = pair.internal_exchange_id
df['token0_address'] = eth_asset.address
df['token1_address'] = usdc_asset.address
df['token0_symbol'] = eth_asset.token_symbol
df['token1_symbol'] = usdc_asset.token_symbol
df['token0_decimals'] = eth_asset.decimals
df['token1_decimals'] = usdc_asset.decimals

# Write the DataFrame back to a Parquet file
df.to_parquet(parquet_file_path)

In [16]:
from tradeexecutor.strategy.trading_strategy_universe import Dataset
from tradingstrategy.timebucket import TimeBucket
from tradingstrategy.exchange import ExchangeUniverse, Exchange, ExchangeType
from tradeexecutor.strategy.pandas_trader.alternative_market_data import load_candle_universe_from_parquet

candle_universe, stop_loss_candle_universe = load_candle_universe_from_parquet(
    pair=pair,
    file=parquet_file_path,
    include_as_trigger_signal=True
)

uniswap_v3 = Exchange(
    chain_id=ChainId.polygon,
    chain_slug="polygon",
    exchange_id=3990,
    exchange_slug="uniswap-v3",
    address="0x1F98431c8aD98523631AE4a59f267346ea31F984",
    exchange_type=ExchangeType.uniswap_v3,
    pair_count=1,
)

exchange_universe = ExchangeUniverse.from_collection([uniswap_v3])

pairs_df = candle_universe.pairs.first().reset_index()

dataset = Dataset(
    time_bucket=TimeBucket.h1,
    exchanges=exchange_universe,
    pairs=pairs_df,
    candles=candle_universe.df,
    backtest_stop_loss_time_bucket=TimeBucket.h1,
    backtest_stop_loss_candles=candle_universe.df
)

In [17]:
from tradeexecutor.strategy.trading_strategy_universe import TradingStrategyUniverse

universe = TradingStrategyUniverse.create_single_pair_universe(
    dataset=dataset,
    pair=(ChainId.polygon, "uniswap-v3", "WETH", "USDC", 0.0005) # Ether-USD Coin (PoS) https://tradingstrategy.ai/trading-view/polygon/uniswap-v3/eth-usdc-fee-5
)

print(f"We loaded {universe.universe.candles.get_candle_count():,} candles.")

We loaded 52,106 candles.


## Using the universe in a strategy

In [18]:
import datetime
import pandas as pd

from tradingstrategy.chain import ChainId
from tradingstrategy.timebucket import TimeBucket
from tradeexecutor.strategy.cycle import CycleDuration
from tradeexecutor.strategy.strategy_module import StrategyType, TradeRouting, ReserveCurrency

# Tell what trade execution engine version this strategy needs to use
# NOTE: this setting has currently no effect
TRADING_STRATEGY_TYPE_ENGINE_VERSION = "0.1"

# What kind of strategy we are running.
# This tells we are going to use
# NOTE: this setting has currently no effect
TRADING_STRATEGY_TYPE = StrategyType.managed_positions

# How our trades are routed.
# PancakeSwap basic routing supports two way trades with BUSD
# and three way trades with BUSD-BNB hop.
TRADE_ROUTING = TradeRouting.ignore

# How often the strategy performs the decide_trades cycle.
# We do it for every 4h.
TRADING_STRATEGY_CYCLE = CycleDuration.cycle_1h

# Strategy keeps its cash in USDC
RESERVE_CURRENCY = ReserveCurrency.usdc

# Time bucket for our candles
CANDLE_TIME_BUCKET = TimeBucket.h1

# Which chain we are trading
CHAIN_ID = ChainId.polygon

# Which exchange we are trading on.
# EXCHANGE_SLUG = "pancakeswap-v2"

# Which trading pair we are trading
TRADING_PAIR = (ChainId.ethereum, "uniswap-v3", "WETH", "USDC", 0.0005) # Ether-USD Coin https://tradingstrategy.ai/trading-view/ethereum/uniswap-v3/eth-usdc-fee-5

# How much of the cash to put on a single trade
POSITION_SIZE = 0.70

#
# Strategy thinking specific parameter
#

BATCH_SIZE = 90

SLOW_EMA_CANDLE_COUNT = 10     
FAST_EMA_CANDLE_COUNT = 3


# Range of backtesting and synthetic data generation.
# Because we are using synthetic data actual dates do not really matter -
# only the duration

START_AT = df.index[0].to_pydatetime() # datetime.datetime(2022, 1, 1)

END_AT = df.index[-1].to_pydatetime() # datetime.datetime(2022, 1,18)

# Start with 10,000 USD
INITIAL_DEPOSIT = 10_000

# If the price drops 0.7% we trigger a stop loss
STOP_LOSS_PCT = 0.993

STOP_LOSS_TIME_BUCKET = TimeBucket.m15

In [19]:
from typing import List, Dict

from pandas_ta.overlap import ema

from tradeexecutor.state.visualisation import PlotKind, RecordingTime
from tradeexecutor.state.trade import TradeExecution
from tradeexecutor.strategy.pricing_model import PricingModel
from tradeexecutor.strategy.pandas_trader.position_manager import PositionManager
from tradeexecutor.state.state import State
from tradingstrategy.universe import Universe
from tradeexecutor.strategy.pandas_trader.position_manager import PositionManager

def decide_trades(
        timestamp: pd.Timestamp,
        universe: Universe,
        state: State,
        pricing_model: PricingModel,
        cycle_debug_data: Dict) -> List[TradeExecution]:
    """The brain function to decide the trades on each trading strategy cycle.

    - Reads incoming execution state (positions, past trades)

    - Reads the current universe (candles)

    - Decides what trades to do next, if any, at current timestamp.

    - Outputs strategy thinking for visualisation and debug messages

    :param timestamp:
        The Pandas timestamp object for this cycle. Matches
        TRADING_STRATEGY_CYCLE division.
        Always truncated to the zero seconds and minutes, never a real-time clock.

    :param universe:
        Trading universe that was constructed earlier.

    :param state:
        The current trade execution state.
        Contains current open positions and all previously executed trades, plus output
        for statistics, visualisation and diangnostics of the strategy.

    :param pricing_model:
        Pricing model can tell the buy/sell price of the particular asset at a particular moment.

    :param cycle_debug_data:
        Python dictionary for various debug variables you can read or set, specific to this trade cycle.
        This data is discarded at the end of the trade cycle.

    :return:
        List of trade instructions in the form of :py:class:`TradeExecution` instances.
        The trades can be generated using `position_manager` but strategy could also hand craft its trades.
    """

    # The pair we are trading
    pair = universe.pairs.get_single()

    # How much cash we have in the hand
    cash = state.portfolio.get_current_cash()

    # Get OHLCV candles for our trading pair as Pandas Dataframe.
    # We could have candles for multiple trading pairs in a different strategy,
    # but this strategy only operates on single pair candle.
    # We also limit our sample size to N latest candles to speed up calculations.
    candles: pd.DataFrame = universe.candles.get_single_pair_data(timestamp, sample_count=BATCH_SIZE, raise_on_not_enough_data=False)

    # We have data for open, high, close, etc.
    # We only operate using candle close values in this strategy.
    close_prices = candles["close"]

    # Calculate exponential moving averages based on slow and fast sample numbers.
    slow_ema_series = ema(close_prices, length=SLOW_EMA_CANDLE_COUNT)
    fast_ema_series = ema(close_prices, length=FAST_EMA_CANDLE_COUNT)

    if slow_ema_series is None or fast_ema_series is None:
        # Cannot calculate EMA, because
        # not enough samples in backtesting
        return []

    if len(slow_ema_series) < 2 or len(fast_ema_series) < 2:
        # We need at least two data points to determine if EMA crossover (or crossunder)
        # occurred at current timestamp. 
        return []

    slow_ema_latest = slow_ema_series.iloc[-1]
    fast_ema_latest = fast_ema_series.iloc[-1]
    price_latest = close_prices.iloc[-1]

    # Compute technical indicators needed for trade decisions.
    slow_ema_crossover = (
        close_prices.iloc[-3] < slow_ema_series.iloc[-2]
        and price_latest > slow_ema_latest
    )
    slow_ema_crossunder = (
        close_prices.iloc[-2] > slow_ema_series.iloc[-2]
        and price_latest < slow_ema_latest
    )
    fast_ema_crossunder = (
        close_prices.iloc[-2] > fast_ema_series.iloc[-2]
        and price_latest < fast_ema_latest
    )

#    entry_price = tradeposition.open_price
#    entry_price = float(get_buy_price['amount'])  
    # position_manager = PositionManager(timestamp, universe, state, pricing_model)
    
    trades = []


    # entry_price = position_manager.get_current_position().get_current_price()
    
    # entry_price = tradeexecutor.analysis.trade_analyser.SpotTrade
    
    # List of any trades we decide on this cycle.
    # trades = []

    # Create a position manager helper class that allows us easily to create
    # opening/closing trades for different positions
    position_manager = PositionManager(timestamp, universe, state, pricing_model)

    if not position_manager.is_any_open():
        # No open positions, decide if BUY in this cycle.
        # We buy if we just crossed over the slow EMA or if this is a very first
        # trading cycle and the price is already above the slow EMA.
        if (
            slow_ema_crossunder
            or price_latest < slow_ema_latest and timestamp == START_AT
            ):
            buy_amount = cash * POSITION_SIZE
            new_trades = position_manager.open_1x_long(pair, buy_amount, stop_loss_pct=STOP_LOSS_PCT)
            trades.extend(new_trades)
    else:
        # We have an open position, decide if SELL in this cycle.
        # We do that if we fall below any of the two moving averages.
        if slow_ema_crossover or (fast_ema_crossunder and  fast_ema_latest > slow_ema_latest) :
            new_trades = position_manager.close_all()
            assert len(new_trades) == 1
            trades.extend(new_trades)
                
        #else:
        #    current_position = position_manager.get_current_position()
        #    current_price = current_position.get_current_price()
            #if price_latest <= current_price * STOP_LOSS:
            #    print(f"Stop loss. Now {close}, opened at {entry_price}")
            #    new_trades = position_manager.close_all()
            #    assert len(new_trades) == 1
            #    trades.extend(new_trades)
        
    # Visualize strategy
    # See available Plotly colours here
    # https://community.plotly.com/t/plotly-colours-list/11730/3?u=miohtama
    visualisation = state.visualisation

    # plot stop loss line
    if position_manager.is_any_open():
        stop_loss_usd = position_manager.get_current_position().stop_loss
    else:
        stop_loss_usd = None
    visualisation.plot_indicator(timestamp, "stop loss", PlotKind.technical_indicator_on_price, stop_loss_usd, colour="blue", recording_time=RecordingTime.market_time) 
    
    visualisation.plot_indicator(timestamp, "Fast EMA", PlotKind.technical_indicator_detached, fast_ema_latest, colour="red")
    
    visualisation.plot_indicator(timestamp, "Slow EMA", PlotKind.technical_indicator_overlay_on_detached, slow_ema_latest, colour="green", detached_overlay_name="Fast EMA")
    
    # another way to plot horizontal lines
    # allows you to plot as many as you want
    visualisation.plot_indicator(timestamp, "h2", PlotKind.technical_indicator_overlay_on_detached, 3300, colour="orange", detached_overlay_name="Fast EMA")
    
    
    return trades

In [20]:
from tradingstrategy.client import Client

client = Client.create_jupyter_client()

Started Trading Strategy in Jupyter notebook environment, configuration is stored in /home/datalore/.tradingstrategy


In [21]:
import logging

from tradeexecutor.backtest.backtest_runner import run_backtest_inline

state, universe, debug_dump = run_backtest_inline(
    name="ETH/USDC fast and slow EMA example",
    start_at=START_AT,
    end_at=END_AT,
    client=client,
    cycle_duration=TRADING_STRATEGY_CYCLE,
    decide_trades=decide_trades,
    universe=universe,
    # create_trading_universe=create_single_pair_trading_universe,
    initial_deposit=INITIAL_DEPOSIT,
    reserve_currency=RESERVE_CURRENCY,
    trade_routing=TRADE_ROUTING,
    log_level=logging.WARNING,
)

trade_count = len(list(state.portfolio.get_all_trades()))
print(f"Backtesting completed, backtested strategy made {trade_count} trades")

Backtesting completed, backtested strategy made 9460 trades


8038800 [07:08<02:36, 179260.49it/s, trades=8078, cycles=44443, TPs=0, SLs=1527, PnL=-69.66%]Backtesting ETH/USDC fast and slow EMA example, 2017-08-17 - 2023-08-02 at 2022-09-12 (1h):  85%|████████▌ | 159991200/188038800 [07:09<02:36, 179260.49it/s, trades=8078, cycles=44443, TPs=0, SLs=1527, PnL=-69.66%]Backtesting ETH/USDC fast and slow EMA example, 2017-08-17 - 2023-08-02 at 2022-09-12 (1h):  85%|████████▌ | 159991200/188038800 [07:09<02:36, 179260.49it/s, trades=8079, cycles=44449, TPs=0, SLs=1527, PnL=-69.98%]Backtesting ETH/USDC fast and slow EMA example, 2017-08-17 - 2023-08-02 at 2022-09-12 (1h):  85%|████████▌ | 160012800/188038800 [07:09<02:36, 178952.67it/s, trades=8079, cycles=44449, TPs=0, SLs=1527, PnL=-69.98%]Backtesting ETH/USDC fast and slow EMA example, 2017-08-17 - 2023-08-02 at 2022-09-12 (1h):  85%|████████▌ | 160012800/188038800 [07:09<02:36, 178952.67it/s, trades=8079, cycles=44449, TPs=0, SLs=1527, PnL=-69.98%]Backtesting ETH/USDC fast and slow EMA example

In [22]:
print(f"Positions taken: {len(list(state.portfolio.get_all_positions()))}")
print(f"Trades made: {len(list(state.portfolio.get_all_trades()))}")

Positions taken: 4730
Trades made: 9460


In [23]:
# from tradeexecutor.visual.single_pair import visualise_single_pair, visualise_single_pair_positions_with_duration_and_slippage
# from tradingstrategy.charting.candle_chart import VolumeBarMode

# figure = visualise_single_pair(
#     state,
#     universe.universe.candles,
#     start_at=START_AT,
#     end_at=END_AT,
#     volume_bar_mode=VolumeBarMode.separate,
#     volume_axis_name="Volume (USD)",
#     height = 1000,
#     relative_sizing=[1, 0.2, 1]
# )

# figure.show()

In [29]:
from tradeexecutor.visual.benchmark import visualise_benchmark

traded_pair = universe.universe.pairs.get_single()

fig = visualise_benchmark(
    state.name,
    portfolio_statistics=state.stats.portfolio,
    all_cash=state.portfolio.get_initial_deposit(),
    buy_and_hold_asset_name=traded_pair.base_token_symbol,
    buy_and_hold_price_series=universe.universe.candles.get_single_pair_data()["close"],
    start_at=START_AT,
    end_at=END_AT,
    height=800
)

fig.show()

In [25]:
from tradeexecutor.analysis.trade_analyser import build_trade_analysis

analysis = build_trade_analysis(state.portfolio)

In [27]:
from IPython.core.display_functions import display

summary = analysis.calculate_summary_statistics(state=state, time_bucket=CANDLE_TIME_BUCKET)

# with pd.option_context("display.max_row", None):
#      display(summary.to_dataframe())

summary.display()

Returns,Unnamed: 1
Annualised return %,-13.47%
Lifetime return %,-80.28%
Realised PnL,"$-8,028.12"
Trade period,2175 days 20 hours

Holdings,Unnamed: 1
Total assets,"$1,971.88"
Cash left,"$1,971.88"
Open position value,$0.00
Open positions,0

Unnamed: 0_level_0,Winning,Losing,Total
Closed Positions,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Number of positions,2695,2035,4730
% of total,56.98%,43.02%,100.00%
Average PnL %,0.93%,-1.33%,-0.04%
Median PnL %,0.59%,-1.15%,0.14%
Biggest PnL %,14.87%,-13.72%,-
Average duration,2 hours 48 minutes,2 hours 50 minutes,2 hours 49 minutes
Max consecutive streak,14,8,-
Max runup / drawdown,49.47%,-85.25%,-

Unnamed: 0_level_0,Stop losses,Take profits
Position Exits,Unnamed: 1_level_1,Unnamed: 2_level_1
Triggered exits,1697,0
Percent winning,0.00%,-
Percent losing,100.00%,-
Percent of total,35.88%,0.00%

Risk Analysis,Unnamed: 1
Biggest realized risk,0.52%
Average realized risk,-0.93%
Max pullback of capital,-18.03%
Sharpe Ratio,-0.74
Sortino Ratio,-1.07
Profit Factor,0.95
