In [1]:
import sys
sys.path.insert(0, sys.path[0].removesuffix('/src/crypto'))

from pycaret.classification import ClassificationExperiment
from src.utils import *
from src.calcEMA import calc_RSI
import plotly.express as px
from sklearn.model_selection import train_test_split
import pandas as pd

In [2]:
# Variables
currency = 'USDT'
crypto = 'BTC'

stop_loss = 2.0
regression_times = 24*30 # horas

datadir = './data/' + crypto + currency
label = 'status'

#numeric_features=['open', 'high', 'low', 'volume', 'close', 'rsi']
numeric_features=['close',]
_calc_rsi = True

### Metadata

<code>
Field Name - Description</br>
open_time - Kline Open time in unix time format</br>
open - Open Price</br>
high - High Price</br>
low	- Low Price</br>
close	- Close Price</br>
volume - Volume</br>
close_time - Kline Close time in unix time format</br>
quote_volume - Quote Asset Volume</br>
count	- Number of Trades</br>
taker_buy_volume - Taker buy base asset volume during this period</br>
taker_buy_quote_volume - Taker buy quote asset volume during this period</br>
ignore - Ignore</br>
</code>

In [3]:
use_cols = date_features + numeric_features
print(use_cols)
all_data = read_data(datadir, all_cols=None, use_cols=use_cols)
print(all_data.info())

['open_time', 'close']
Start reading file:  ./data/BTCUSDT/BTCUSDT.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53086 entries, 0 to 53085
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   open_time  53086 non-null  datetime64[ns]
 1   close      53086 non-null  float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 829.6 KB
None


In [4]:
if _calc_rsi:
  all_data = calc_RSI(all_data)
  numeric_features.append('rsi')
  all_data.dropna(inplace=True)
print(all_data.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 53072 entries, 14 to 53085
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   open_time  53072 non-null  datetime64[ns]
 1   close      53072 non-null  float64       
 2   rsi        53072 non-null  float64       
dtypes: datetime64[ns](1), float64(2)
memory usage: 1.6 MB
None


In [5]:
all_cols = date_features + numeric_features
print('All Columns: ', all_cols)
all_data[all_cols]
#all_data = all_data[[all_cols]].copy()

All Columns:  ['open_time', 'close', 'rsi']


Unnamed: 0,open_time,close,rsi
14,2017-08-17 18:00:00,4256.97,44.569861
15,2017-08-17 19:00:00,4325.23,51.964570
16,2017-08-17 20:00:00,4346.74,54.045062
17,2017-08-17 21:00:00,4333.55,52.542259
18,2017-08-17 22:00:00,4336.80,52.889862
...,...,...,...
53081,2023-09-04 14:00:00,25854.41,25.916521
53082,2023-09-04 15:00:00,25815.71,17.561492
53083,2023-09-04 15:00:00,25832.75,28.492630
53084,2023-09-04 15:00:00,25800.87,22.485454


In [6]:
ca = ClassificationExperiment()
best = ca.load_model('xgboost_SL_2.0_RT_720_RPL_24_1')

Transformation Pipeline and Model Successfully Loaded


In [7]:
def regress_until_diff(data: pd.DataFrame, diff_percent: float, max_regression_times=6):
    data['close_shift_x'] = 0.0
    data['diff_shift_x'] = 0.0
    data['shift_x'] = 0
    data[label] = 'ESTAVEL'
    for row_nu in range(1, data.shape[0]):
        diff = 0
        i = 1
        while (abs(diff) <= diff_percent):
            if (i > max_regression_times) or ((row_nu + i) >= data.shape[0]):
                break

            close = data.iloc[row_nu:row_nu + 1]['close'].values[0]
            close_px = data.iloc[row_nu + i:row_nu + i + 1]['close'].values[0]
            diff = -100 * (close - close_px) / close
            # print(f'ROW_NU: {row_nu} - regresssion_times: {i} - diff: {diff}')
            i += 1
        data['close_shift_x'].iloc[row_nu:row_nu + 1] = close_px
        data['diff_shift_x'].iloc[row_nu:row_nu + 1] = diff
        data['shift_x'].iloc[row_nu:row_nu + 1] = i - 1 if i == max_regression_times + 1 else i
        if diff >= diff_percent:
            data[label].iloc[row_nu:row_nu + 1] = 'SOBE_' + str(diff_percent)
        elif diff <= -diff_percent:
            data[label].iloc[row_nu:row_nu + 1] = 'CAI_' + str(diff_percent)

    return data.drop(columns=['close_shift_x', 'diff_shift_x', 'shift_x'])


all_cols = date_features + numeric_features
print('All Columns: ', all_cols)
_data = all_data[all_cols].copy()

#_data = regress_until_diff(_data, stop_loss, 6)

for nf in numeric_features.copy():
    for i in range(1, regression_times + 1):
        col = nf + "_" + str(i)
        _data[col] = _data[nf].shift(i)
        numeric_features.append(col)

_data.dropna(inplace=True)
_data = _data.round(2)

All Columns:  ['open_time', 'close', 'rsi']


In [8]:
_data.dropna().tail(10)

Unnamed: 0,open_time,close,rsi,close_1,close_2,close_3,close_4,close_5,close_6,close_7,...,rsi_711,rsi_712,rsi_713,rsi_714,rsi_715,rsi_716,rsi_717,rsi_718,rsi_719,rsi_720
53076,2023-09-04 13:00:00,25883.35,35.24,25883.35,25883.35,25858.38,25858.38,25858.38,25858.38,25892.49,...,48.29,48.49,43.69,44.8,40.31,42.17,47.15,54.04,55.8,55.22
53077,2023-09-04 13:00:00,25883.35,35.24,25883.35,25883.35,25883.35,25858.38,25858.38,25858.38,25858.38,...,45.84,48.29,48.49,43.69,44.8,40.31,42.17,47.15,54.04,55.8
53078,2023-09-04 14:00:00,25854.41,25.92,25883.35,25883.35,25883.35,25883.35,25858.38,25858.38,25858.38,...,41.17,45.84,48.29,48.49,43.69,44.8,40.31,42.17,47.15,54.04
53079,2023-09-04 14:00:00,25854.41,25.92,25854.41,25883.35,25883.35,25883.35,25883.35,25858.38,25858.38,...,43.18,41.17,45.84,48.29,48.49,43.69,44.8,40.31,42.17,47.15
53080,2023-09-04 14:00:00,25854.41,25.92,25854.41,25854.41,25883.35,25883.35,25883.35,25883.35,25858.38,...,40.06,43.18,41.17,45.84,48.29,48.49,43.69,44.8,40.31,42.17
53081,2023-09-04 14:00:00,25854.41,25.92,25854.41,25854.41,25854.41,25883.35,25883.35,25883.35,25883.35,...,39.11,40.06,43.18,41.17,45.84,48.29,48.49,43.69,44.8,40.31
53082,2023-09-04 15:00:00,25815.71,17.56,25854.41,25854.41,25854.41,25854.41,25883.35,25883.35,25883.35,...,40.01,39.11,40.06,43.18,41.17,45.84,48.29,48.49,43.69,44.8
53083,2023-09-04 15:00:00,25832.75,28.49,25815.71,25854.41,25854.41,25854.41,25854.41,25883.35,25883.35,...,35.47,40.01,39.11,40.06,43.18,41.17,45.84,48.29,48.49,43.69
53084,2023-09-04 15:00:00,25800.87,22.49,25832.75,25815.71,25854.41,25854.41,25854.41,25854.41,25883.35,...,49.4,35.47,40.01,39.11,40.06,43.18,41.17,45.84,48.29,48.49
53085,2023-09-04 15:00:00,25832.75,36.83,25800.87,25832.75,25815.71,25854.41,25854.41,25854.41,25854.41,...,54.44,49.4,35.47,40.01,39.11,40.06,43.18,41.17,45.84,48.29


In [9]:
test_data = _data.tail(24*30*4).copy()

predict = ca.predict_model(best, data=test_data)
#predict[label] = test_data[label]
#predict['_score'] = predict['prediction_label'] == predict[label]
#print('Score Mean:', predict['_score'].mean())
predict = predict.sort_values(date_features)

In [10]:
#predict[[label, '_score']].groupby(label).mean()

In [11]:
#predict[[label, '_score']].groupby(label).count()

In [12]:
def test_trading_crypto_v3(data: pd.DataFrame, start_date, end_date, value: float, stop_loss = 3.0):
  _data = data.copy()
  _data.index = _data['open_time']
  _data = _data[(_data.index >= start_date) & (_data.index <= end_date)]
  saldo = value
  operacao = ''
  comprado = False
  valor_compra = 0 
  valor_venda = 0
  diff = 0.0
  
  operacao_compra = ''
  for row_nu in range(1, _data.shape[0]):
    open_time = pd.to_datetime(_data.iloc[row_nu:row_nu+1]['open_time'].values[0]).strftime("%Y-%m-%d %Hh")
    type(open_time)
    operacao = _data.iloc[row_nu:row_nu+1]['prediction_label'].values[0]
    #print(operacao)
    if (operacao.startswith('SOBE') or operacao.startswith('CAI')) and not comprado:
      operacao_compra = operacao
      valor_compra = round(_data.iloc[row_nu:row_nu+1]['close'].values[0], 2)
      print(f'[{row_nu}][{operacao_compra}][{open_time}] => Compra: {valor_compra}')
      comprado = True

    if comprado:
      diff = 100 * (_data.iloc[row_nu:row_nu+1]['close'].values[0] - valor_compra) / valor_compra
      print(f'[{row_nu}][{operacao_compra}][{open_time}] Diff ==> {round(diff,2)}% - Comprado: {comprado}')
    
    if (abs(diff) >= stop_loss) and comprado:
      valor_venda = round(_data.iloc[row_nu:row_nu+1]['close'].values[0],2)
      if operacao_compra.startswith('SOBE'):
        saldo += round(saldo * (diff/100), 2)
      else:
        saldo += round(saldo * (-diff/100), 2)
      print(f'[{row_nu}][{operacao_compra}][{open_time}] => Venda: {valor_venda} => Diff: {round(diff,2)}% ==> Saldo: {saldo}')
      comprado = False

  print(f'Saldo: {saldo}')
  return saldo

print('Min Data: ', predict['open_time'].min())
print('Max Data: ', predict['open_time'].max())
test_trading_crypto_v3(predict, '2023-01-01', predict['open_time'].max(), 100.0, stop_loss)

Min Data:  2023-05-15 10:00:00
Max Data:  2023-09-04 15:00:00
[45][SOBE_2.0][2023-05-17 07h] => Compra: 26845.140625
[45][SOBE_2.0][2023-05-17 07h] Diff ==> 0.0% - Comprado: True
[46][SOBE_2.0][2023-05-17 08h] Diff ==> -0.12% - Comprado: True
[47][SOBE_2.0][2023-05-17 09h] Diff ==> 0.1% - Comprado: True
[48][SOBE_2.0][2023-05-17 10h] Diff ==> 0.09% - Comprado: True
[49][SOBE_2.0][2023-05-17 11h] Diff ==> -0.77% - Comprado: True
[50][SOBE_2.0][2023-05-17 12h] Diff ==> -0.65% - Comprado: True
[51][SOBE_2.0][2023-05-17 13h] Diff ==> -0.78% - Comprado: True
[52][SOBE_2.0][2023-05-17 14h] Diff ==> -0.24% - Comprado: True
[53][SOBE_2.0][2023-05-17 15h] Diff ==> -0.18% - Comprado: True
[54][SOBE_2.0][2023-05-17 16h] Diff ==> 0.22% - Comprado: True
[55][SOBE_2.0][2023-05-17 17h] Diff ==> 0.9% - Comprado: True
[56][SOBE_2.0][2023-05-17 18h] Diff ==> 0.95% - Comprado: True
[57][SOBE_2.0][2023-05-17 19h] Diff ==> 2.01% - Comprado: True
[57][SOBE_2.0][2023-05-17 19h] => Venda: 27385.08984375 => Di

350.56