In [1]:
import numpy as np
import pandas as pd
from datetime import date, datetime
from finta import TA
from sklearn.model_selection import train_test_split

In [2]:
pairs = ['ADA_USDT', 'AVAX_USDT', 'BCH_USDT', 'BNB_USDT', 'BTC_USDT', 'DOGE_USDT', 
         'DOT_USDT', 'ETH_USDT', 'HBAR_USDT', 'LINK_USDT', 'LTC_USDT', 'SHIB_USDT', 
         'SOL_USDT', 'SUI_USDT', 'TON_USDT', 'TRX_USDT', 'XLM_USDT', 'XRP_USDT']
pair_data = []
for pair in pairs:
    df = pd.read_feather(f'../ft_userdata/user_data/data/binance/{pair}-1m.feather')
    df, test_df = train_test_split(df, test_size=0.15, shuffle=False) # не хотим подглядеть в будущее
    print(f"{pair}: start: {df['date'].iloc[0]}, end: {df['date'].iloc[-1]}")
    pair_data.append(df)

ADA_USDT: start: 2018-04-17 04:02:00+00:00, end: 2024-04-02 01:44:00+00:00
AVAX_USDT: start: 2020-09-22 06:30:00+00:00, end: 2024-08-13 03:10:00+00:00
BCH_USDT: start: 2019-11-28 10:00:00+00:00, end: 2024-06-29 09:33:00+00:00
BNB_USDT: start: 2017-11-06 03:54:00+00:00, end: 2024-03-09 04:06:00+00:00
BTC_USDT: start: 2018-06-05 00:00:00+00:00, end: 2024-03-31 01:34:00+00:00
DOGE_USDT: start: 2019-07-05 12:00:00+00:00, end: 2024-06-07 10:43:00+00:00
DOT_USDT: start: 2020-08-18 23:00:00+00:00, end: 2024-08-07 20:56:00+00:00
ETH_USDT: start: 2018-06-05 00:00:00+00:00, end: 2024-03-31 01:44:00+00:00
HBAR_USDT: start: 2019-09-29 04:00:00+00:00, end: 2024-06-20 09:19:00+00:00
LINK_USDT: start: 2019-01-16 10:00:00+00:00, end: 2024-05-13 00:47:00+00:00
LTC_USDT: start: 2017-12-13 03:32:00+00:00, end: 2024-03-14 17:07:00+00:00
SHIB_USDT: start: 2021-05-10 11:00:00+00:00, end: 2024-09-16 13:25:00+00:00
SOL_USDT: start: 2020-08-11 06:00:00+00:00, end: 2024-07-28 10:10:00+00:00
SUI_USDT: start: 202

In [3]:
start_max = pair_data[0]['date'].iloc[0]
end_min = pair_data[0]['date'].iloc[-1]
for df in pair_data:
    start_max = max(df['date'].iloc[0], start_max)
    end_min = min(df['date'].iloc[-1], end_min)
print(f"start: {start_max}, end: {end_min}")

start: 2024-08-08 10:00:00+00:00, end: 2024-03-09 04:06:00+00:00


In [4]:
def generate_target_classification(df, fee = 0.001):
    df['is_big_increase'] = (df['close'].shift(-1) / df['close']) > 1 + 2 * fee
    df['is_drawdown'] = (df['close'].shift(-1) / df['close']) < 1
    return df

In [5]:
for num_bps in [1, 8, 10]:
    fee = num_bps * 0.0001 # 10 процентных пунктов
    print(f'Доля изменения цены на >= {2 * num_bps} п.п. вверх для каждой пары')
    for df, pair in zip(pair_data, pairs):
        df = generate_target_classification(df, 2*fee)
        big_increase_ratio = 100 * df['is_big_increase'].sum() / len(df)
        print(f"Пара: {pair:<9} - {big_increase_ratio:.2f}%")
    print("")

Доля изменения цены на >= 2 п.п. вверх для каждой пары
Пара: ADA_USDT  - 31.09%
Пара: AVAX_USDT - 35.02%
Пара: BCH_USDT  - 30.57%
Пара: BNB_USDT  - 29.14%
Пара: BTC_USDT  - 22.33%
Пара: DOGE_USDT - 27.42%
Пара: DOT_USDT  - 31.53%
Пара: ETH_USDT  - 26.29%
Пара: HBAR_USDT - 28.08%
Пара: LINK_USDT - 32.11%
Пара: LTC_USDT  - 30.55%
Пара: SHIB_USDT - 33.32%
Пара: SOL_USDT  - 35.77%
Пара: SUI_USDT  - 34.53%
Пара: TON_USDT  - 28.14%
Пара: TRX_USDT  - 26.66%
Пара: XLM_USDT  - 30.70%
Пара: XRP_USDT  - 28.19%

Доля изменения цены на >= 20 п.п. вверх для каждой пары
Пара: ADA_USDT  - 1.29%
Пара: AVAX_USDT - 2.17%
Пара: BCH_USDT  - 1.05%
Пара: BNB_USDT  - 1.36%
Пара: BTC_USDT  - 0.37%
Пара: DOGE_USDT - 1.94%
Пара: DOT_USDT  - 1.43%
Пара: ETH_USDT  - 0.67%
Пара: HBAR_USDT - 2.70%
Пара: LINK_USDT - 1.95%
Пара: LTC_USDT  - 1.29%
Пара: SHIB_USDT - 1.84%
Пара: SOL_USDT  - 2.24%
Пара: SUI_USDT  - 1.40%
Пара: TON_USDT  - 0.46%
Пара: TRX_USDT  - 0.93%
Пара: XLM_USDT  - 1.27%
Пара: XRP_USDT  - 1.21%

Доля 