In [1]:
import sys
sys.path.insert(0, sys.path[0].removesuffix('/src/crypto'))
from pycaret.classification import *
from src.utils import *
from src.calcEMA import calc_RSI
import plotly.express as px
from sklearn.model_selection import train_test_split

In [2]:
# Variables
currency = 'USDT'
crypto = 'BTC'

stop_loss = 2.0
datadir = './data/' + crypto + currency
label = 'status'
regression_times = 24*30 # horas
regression_profit_and_loss = 24
#numeric_features=['open', 'high', 'low', 'volume', 'close', 'rsi']
numeric_features=['close',]
_calc_rsi = True

### Metadata

<code>
Field Name - Description</br>
open_time - Kline Open time in unix time format</br>
open - Open Price</br>
high - High Price</br>
low	- Low Price</br>
close	- Close Price</br>
volume - Volume</br>
close_time - Kline Close time in unix time format</br>
quote_volume - Quote Asset Volume</br>
count	- Number of Trades</br>
taker_buy_volume - Taker buy base asset volume during this period</br>
taker_buy_quote_volume - Taker buy quote asset volume during this period</br>
ignore - Ignore</br>
</code>

In [3]:
use_cols = date_features + numeric_features
print(use_cols)
all_data = read_data(datadir, all_cols=None, use_cols=use_cols)
print(all_data.info())

['open_time', 'close']
Start reading file:  ./data/BTCUSDT/BTCUSDT.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53086 entries, 0 to 53085
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   open_time  53086 non-null  datetime64[ns]
 1   close      53086 non-null  float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 829.6 KB
None


In [4]:
if _calc_rsi:
  all_data = calc_RSI(all_data)
  numeric_features.append('rsi')
  all_data.dropna(inplace=True)
print(all_data.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 53072 entries, 14 to 53085
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   open_time  53072 non-null  datetime64[ns]
 1   close      53072 non-null  float64       
 2   rsi        53072 non-null  float64       
dtypes: datetime64[ns](1), float64(2)
memory usage: 1.6 MB
None


In [5]:
all_cols = date_features + numeric_features
print('All Columns: ', all_cols)
all_data[all_cols]
#all_data = all_data[[all_cols]].copy()

All Columns:  ['open_time', 'close', 'rsi']


Unnamed: 0,open_time,close,rsi
14,2017-08-17 18:00:00,4256.97,44.569861
15,2017-08-17 19:00:00,4325.23,51.964570
16,2017-08-17 20:00:00,4346.74,54.045062
17,2017-08-17 21:00:00,4333.55,52.542259
18,2017-08-17 22:00:00,4336.80,52.889862
...,...,...,...
53081,2023-09-04 14:00:00,25854.41,25.916521
53082,2023-09-04 15:00:00,25815.71,17.561492
53083,2023-09-04 15:00:00,25832.75,28.492630
53084,2023-09-04 15:00:00,25800.87,22.485454


In [6]:
def regress_until_diff(data: pd.DataFrame, diff_percent: float, max_regression_profit_and_loss=6):
    data['close_shift_x'] = 0.0
    data['diff_shift_x'] = 0.0
    data['shift_x'] = 0
    data[label] = 'ESTAVEL'
    for row_nu in range(1, data.shape[0]):
        diff = 0
        i = 1
        while (abs(diff) <= diff_percent):
            if (i > max_regression_profit_and_loss) or ((row_nu + i) >= data.shape[0]):
                break

            close = data.iloc[row_nu:row_nu + 1]['close'].values[0]
            close_px = data.iloc[row_nu + i:row_nu + i + 1]['close'].values[0]
            diff = -100 * (close - close_px) / close
            # print(f'ROW_NU: {row_nu} - regresssion_times: {i} - diff: {diff}')
            i += 1
        data['close_shift_x'].iloc[row_nu:row_nu + 1] = close_px
        data['diff_shift_x'].iloc[row_nu:row_nu + 1] = diff
        data['shift_x'].iloc[row_nu:row_nu + 1] = i - 1 if i == max_regression_profit_and_loss + 1 else i
        if diff >= diff_percent:
            data[label].iloc[row_nu:row_nu + 1] = 'SOBE_' + str(diff_percent)
        elif diff <= -diff_percent:
            data[label].iloc[row_nu:row_nu + 1] = 'CAI_' + str(diff_percent)

    return data.drop(columns=['close_shift_x', 'diff_shift_x', 'shift_x'])


all_cols = date_features + numeric_features
print('All Columns: ', all_cols)
_data = all_data[all_cols].copy()

_data = regress_until_diff(_data, stop_loss, regression_profit_and_loss)

for nf in numeric_features.copy():
    for i in range(1, regression_times + 1):
        col = nf + "_" + str(i)
        _data[col] = _data[nf].shift(i)
        numeric_features.append(col)

_data.dropna(inplace=True)
_data.round(2).head()

All Columns:  ['open_time', 'close', 'rsi']


Unnamed: 0,open_time,close,rsi,status,close_1,close_2,close_3,close_4,close_5,close_6,...,rsi_711,rsi_712,rsi_713,rsi_714,rsi_715,rsi_716,rsi_717,rsi_718,rsi_719,rsi_720
734,2017-09-17 00:00:00,3670.0,50.72,CAI_2.0,3714.95,3710.0,3675.0,3662.13,3664.0,3681.0,...,48.89,45.76,42.61,47.17,46.99,52.89,52.54,54.05,51.96,44.57
735,2017-09-17 01:00:00,3602.0,46.16,CAI_2.0,3670.0,3714.95,3710.0,3675.0,3662.13,3664.0,...,48.35,48.89,45.76,42.61,47.17,46.99,52.89,52.54,54.05,51.96
736,2017-09-17 02:00:00,3640.02,48.93,CAI_2.0,3602.0,3670.0,3714.95,3710.0,3675.0,3662.13,...,51.66,48.35,48.89,45.76,42.61,47.17,46.99,52.89,52.54,54.05
737,2017-09-17 03:00:00,3580.0,45.0,CAI_2.0,3640.02,3602.0,3670.0,3714.95,3710.0,3675.0,...,47.32,51.66,48.35,48.89,45.76,42.61,47.17,46.99,52.89,52.54
738,2017-09-17 04:00:00,3560.01,43.74,SOBE_2.0,3580.0,3640.02,3602.0,3670.0,3714.95,3710.0,...,50.07,47.32,51.66,48.35,48.89,45.76,42.61,47.17,46.99,52.89


In [7]:
train_data, test_data = train_test_split(_data, test_size=0.3)
train_data = train_data.sort_values('open_time')
train_data

Unnamed: 0,open_time,close,rsi,status,close_1,close_2,close_3,close_4,close_5,close_6,...,rsi_711,rsi_712,rsi_713,rsi_714,rsi_715,rsi_716,rsi_717,rsi_718,rsi_719,rsi_720
734,2017-09-17 00:00:00,3670.00,50.719270,CAI_2.0,3714.95,3710.00,3675.00,3662.13,3664.00,3681.00,...,48.892517,45.764896,42.608803,47.169099,46.991204,52.889862,52.542259,54.045062,51.964570,44.569861
735,2017-09-17 01:00:00,3602.00,46.163262,CAI_2.0,3670.00,3714.95,3710.00,3675.00,3662.13,3664.00,...,48.351287,48.892517,45.764896,42.608803,47.169099,46.991204,52.889862,52.542259,54.045062,51.964570
737,2017-09-17 03:00:00,3580.00,45.000164,CAI_2.0,3640.02,3602.00,3670.00,3714.95,3710.00,3675.00,...,47.319243,51.657025,48.351287,48.892517,45.764896,42.608803,47.169099,46.991204,52.889862,52.542259
738,2017-09-17 04:00:00,3560.01,43.741350,SOBE_2.0,3580.00,3640.02,3602.00,3670.00,3714.95,3710.00,...,50.071996,47.319243,51.657025,48.351287,48.892517,45.764896,42.608803,47.169099,46.991204,52.889862
741,2017-09-17 07:00:00,3559.00,43.766189,SOBE_2.0,3563.05,3557.75,3560.01,3580.00,3640.02,3602.00,...,57.074250,50.741357,47.804989,50.071996,47.319243,51.657025,48.351287,48.892517,45.764896,42.608803
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53078,2023-09-04 14:00:00,25854.41,25.916521,ESTAVEL,25883.35,25883.35,25883.35,25883.35,25858.38,25858.38,...,41.169762,45.842140,48.287097,48.489657,43.687702,44.797493,40.313887,42.173124,47.154862,54.043485
53080,2023-09-04 14:00:00,25854.41,25.916521,ESTAVEL,25854.41,25854.41,25883.35,25883.35,25883.35,25883.35,...,40.056565,43.182608,41.169762,45.842140,48.287097,48.489657,43.687702,44.797493,40.313887,42.173124
53081,2023-09-04 14:00:00,25854.41,25.916521,ESTAVEL,25854.41,25854.41,25854.41,25883.35,25883.35,25883.35,...,39.109213,40.056565,43.182608,41.169762,45.842140,48.287097,48.489657,43.687702,44.797493,40.313887
53085,2023-09-04 15:00:00,25832.75,36.828561,ESTAVEL,25800.87,25832.75,25815.71,25854.41,25854.41,25854.41,...,54.442936,49.400804,35.467313,40.008427,39.109213,40.056565,43.182608,41.169762,45.842140,48.287097


In [8]:
setup = setup(train_data,
              train_size=0.7,
              target=label,
              numeric_features=numeric_features,
              date_features=['open_time'],
              create_date_columns=["hour", "day", "month"],
              fold_strategy='timeseries',
              fold=3,
              session_id=123,
              normalize=True,
              use_gpu=False,
              verbose=True,
              )

Unnamed: 0,Description,Value
0,Session id,123
1,Target,status
2,Target type,Multiclass
3,Target mapping,"CAI_2.0: 0, ESTAVEL: 1, SOBE_2.0: 2"
4,Original data shape,"(36646, 1444)"
5,Transformed data shape,"(36646, 1446)"
6,Transformed train set shape,"(25652, 1446)"
7,Transformed test set shape,"(10994, 1446)"
8,Numeric features,1442
9,Date features,1


In [9]:
#Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC

# best = setup.compare_models(exclude=['lightgbm'])
#model_name = 'lightgbm' 'xgboost'
model_name = 'lightgbm'
best = setup.create_model(model_name)
best

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6147,0.797,0.6147,0.6117,0.6113,0.4202,0.4215
1,0.6902,0.8544,0.6902,0.6883,0.6876,0.5326,0.5339
2,0.7129,0.875,0.7129,0.7127,0.7108,0.5677,0.5694
Mean,0.6726,0.8422,0.6726,0.6709,0.6699,0.5068,0.5083
Std,0.042,0.033,0.042,0.0431,0.0425,0.0629,0.0631


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

In [10]:
# predict on test set
holdout_pred = predict_model(best)
print(holdout_pred['prediction_score'].mean())
holdout_pred

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Light Gradient Boosting Machine,0.7382,0.8929,0.7382,0.7382,0.7363,0.6057,0.6073


0.5607273512825178


Unnamed: 0,open_time,close,rsi,close_1,close_2,close_3,close_4,close_5,close_6,close_7,...,rsi_714,rsi_715,rsi_716,rsi_717,rsi_718,rsi_719,rsi_720,status,prediction_label,prediction_score
23904,2020-05-13 13:00:00,9080.059570,66.670326,9089.299805,9035.980469,8948.049805,8914.219727,8908.179688,8867.200195,8912.429688,...,47.444920,47.493267,48.384430,44.102165,34.308949,39.439339,45.553856,2,SOBE_2.0,0.5041
46464,2022-12-10 11:00:00,17168.609375,55.752327,17163.939453,17145.289062,17141.820312,17154.060547,17147.210938,17134.119141,17152.169922,...,51.658222,62.893013,59.680969,61.429649,61.726246,45.179089,38.225883,1,ESTAVEL,0.7100
41201,2022-05-05 04:00:00,39752.929688,68.085342,39729.410156,39773.328125,39629.699219,39661.339844,39690.000000,39681.808594,39741.078125,...,56.977409,55.009365,58.021713,59.432316,57.227428,59.952690,62.071671,0,CAI_2.0,0.5725
24498,2020-06-07 07:00:00,9658.990234,48.391151,9665.799805,9671.330078,9681.209961,9685.690430,9684.230469,9665.000000,9673.000000,...,58.153473,55.179554,60.398586,64.689232,58.554531,60.723465,58.011623,0,ESTAVEL,0.4333
42070,2022-06-10 09:00:00,29897.089844,40.607990,30020.810547,30052.429688,30095.480469,30104.119141,30093.000000,30114.380859,30024.000000,...,48.377937,53.679173,50.153706,35.103390,51.985752,55.478878,54.672161,0,CAI_2.0,0.7124
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34743,2021-08-08 20:00:00,43840.910156,47.687340,43715.101562,43627.128906,43674.820312,43720.011719,43984.550781,43918.968750,44750.000000,...,57.918316,59.016468,67.619209,63.569622,69.369774,67.553406,60.580540,2,CAI_2.0,0.5345
38103,2021-12-27 02:00:00,50816.238281,60.501167,50787.941406,50622.871094,50775.488281,50973.640625,50774.160156,50269.441406,50342.511719,...,51.904705,41.025314,37.887249,38.058254,40.733162,38.436485,38.728958,2,SOBE_2.0,0.4569
47727,2023-02-01 02:00:00,23108.429688,52.557529,23123.089844,23085.730469,23125.130859,23181.810547,22948.689453,23095.410156,23163.880859,...,72.925552,71.708672,61.286362,63.028866,61.760674,67.723251,54.204479,2,SOBE_2.0,0.4894
35500,2021-09-09 13:00:00,46875.980469,57.135403,46670.699219,46293.148438,46153.011719,46160.878906,46272.058594,46275.000000,46146.441406,...,45.272064,47.749283,50.095119,55.697880,47.654686,47.673321,57.554169,0,CAI_2.0,0.5212


In [11]:
test_data.sort_values(date_features).drop(columns=[label])

Unnamed: 0,open_time,close,rsi,close_1,close_2,close_3,close_4,close_5,close_6,close_7,...,rsi_711,rsi_712,rsi_713,rsi_714,rsi_715,rsi_716,rsi_717,rsi_718,rsi_719,rsi_720
736,2017-09-17 02:00:00,3640.02,48.925756,3602.00,3670.00,3714.95,3710.00,3675.00,3662.13,3664.00,...,51.657025,48.351287,48.892517,45.764896,42.608803,47.169099,46.991204,52.889862,52.542259,54.045062
739,2017-09-17 05:00:00,3557.75,43.592879,3560.01,3580.00,3640.02,3602.00,3670.00,3714.95,3710.00,...,47.804989,50.071996,47.319243,51.657025,48.351287,48.892517,45.764896,42.608803,47.169099,46.991204
740,2017-09-17 06:00:00,3563.05,44.072314,3557.75,3560.01,3580.00,3640.02,3602.00,3670.00,3714.95,...,50.741357,47.804989,50.071996,47.319243,51.657025,48.351287,48.892517,45.764896,42.608803,47.169099
744,2017-09-17 10:00:00,3577.44,46.452127,3549.98,3535.01,3559.00,3563.05,3557.75,3560.01,3580.00,...,48.125539,53.487672,54.749034,57.074250,50.741357,47.804989,50.071996,47.319243,51.657025,48.351287
747,2017-09-17 13:00:00,3638.00,53.069718,3538.31,3499.03,3577.44,3549.98,3535.01,3559.00,3563.05,...,43.537011,41.418037,43.983214,48.125539,53.487672,54.749034,57.074250,50.741357,47.804989,50.071996
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53072,2023-09-04 12:00:00,25858.38,15.816116,25858.38,25858.38,25892.49,25892.49,25892.49,25892.49,25907.79,...,40.313887,42.173124,47.154862,54.043485,55.800110,55.220882,50.933159,50.111812,48.107395,48.705127
53077,2023-09-04 13:00:00,25883.35,35.236885,25883.35,25883.35,25883.35,25858.38,25858.38,25858.38,25858.38,...,45.842140,48.287097,48.489657,43.687702,44.797493,40.313887,42.173124,47.154862,54.043485,55.800110
53074,2023-09-04 13:00:00,25883.35,35.236885,25858.38,25858.38,25858.38,25858.38,25892.49,25892.49,25892.49,...,43.687702,44.797493,40.313887,42.173124,47.154862,54.043485,55.800110,55.220882,50.933159,50.111812
53082,2023-09-04 15:00:00,25815.71,17.561492,25854.41,25854.41,25854.41,25854.41,25883.35,25883.35,25883.35,...,40.008427,39.109213,40.056565,43.182608,41.169762,45.842140,48.287097,48.489657,43.687702,44.797493


In [12]:
predict = predict_model(best, data=test_data.sort_values(date_features).drop(columns=[label]))
predict[label] = test_data[label]
predict['_score'] = predict['prediction_label'] == predict[label]
print('Score Mean:', predict['_score'].mean())
predict.sort_values(date_features)

Score Mean: 0.7409907041894818


Unnamed: 0,open_time,close,rsi,close_1,close_2,close_3,close_4,close_5,close_6,close_7,...,rsi_715,rsi_716,rsi_717,rsi_718,rsi_719,rsi_720,prediction_label,prediction_score,status,_score
736,2017-09-17 02:00:00,3640.020020,48.925755,3602.000000,3670.000000,3714.949951,3710.000000,3675.000000,3662.129883,3664.000000,...,42.608803,47.169098,46.991203,52.889862,52.542259,54.045063,SOBE_2.0,0.6150,CAI_2.0,False
739,2017-09-17 05:00:00,3557.750000,43.592880,3560.010010,3580.000000,3640.020020,3602.000000,3670.000000,3714.949951,3710.000000,...,48.351288,48.892517,45.764896,42.608803,47.169098,46.991203,SOBE_2.0,0.7067,SOBE_2.0,True
740,2017-09-17 06:00:00,3563.050049,44.072315,3557.750000,3560.010010,3580.000000,3640.020020,3602.000000,3670.000000,3714.949951,...,51.657024,48.351288,48.892517,45.764896,42.608803,47.169098,SOBE_2.0,0.6705,SOBE_2.0,True
744,2017-09-17 10:00:00,3577.439941,46.452129,3549.979980,3535.010010,3559.000000,3563.050049,3557.750000,3560.010010,3580.000000,...,50.741356,47.804989,50.071995,47.319244,51.657024,48.351288,SOBE_2.0,0.6651,CAI_2.0,False
747,2017-09-17 13:00:00,3638.000000,53.069717,3538.310059,3499.030029,3577.439941,3549.979980,3535.010010,3559.000000,3563.050049,...,53.487671,54.749035,57.074249,50.741356,47.804989,50.071995,SOBE_2.0,0.6784,SOBE_2.0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53072,2023-09-04 12:00:00,25858.380859,15.816115,25858.380859,25858.380859,25892.490234,25892.490234,25892.490234,25892.490234,25907.789062,...,55.800110,55.220882,50.933159,50.111813,48.107395,48.705128,ESTAVEL,0.8744,ESTAVEL,True
53077,2023-09-04 13:00:00,25883.349609,35.236885,25883.349609,25883.349609,25883.349609,25858.380859,25858.380859,25858.380859,25858.380859,...,44.797493,40.313889,42.173122,47.154861,54.043484,55.800110,ESTAVEL,0.9334,ESTAVEL,True
53074,2023-09-04 13:00:00,25883.349609,35.236885,25858.380859,25858.380859,25858.380859,25858.380859,25892.490234,25892.490234,25892.490234,...,47.154861,54.043484,55.800110,55.220882,50.933159,50.111813,ESTAVEL,0.8265,ESTAVEL,True
53082,2023-09-04 15:00:00,25815.710938,17.561491,25854.410156,25854.410156,25854.410156,25854.410156,25883.349609,25883.349609,25883.349609,...,41.169762,45.842140,48.287098,48.489658,43.687702,44.797493,ESTAVEL,0.7839,ESTAVEL,True


In [13]:
predict[[label, '_score']].groupby(label).mean()

Unnamed: 0_level_0,_score
status,Unnamed: 1_level_1
CAI_2.0,0.679386
ESTAVEL,0.831828
SOBE_2.0,0.703086


In [14]:
final_predict = finalize_model(best) # data=test_data.sort_values(date_features).drop(columns=[label]))

In [15]:
_predict = predict_model(final_predict, data=test_data.sort_values(date_features).drop(columns=[label]))
_predict[label] = test_data[label]
_predict['_score'] = _predict['prediction_label'] == _predict[label]
print('Score Mean:', _predict['_score'].mean())
_predict.sort_values(date_features)
_predict

Score Mean: 0.750095504902585


Unnamed: 0,open_time,close,rsi,close_1,close_2,close_3,close_4,close_5,close_6,close_7,...,rsi_715,rsi_716,rsi_717,rsi_718,rsi_719,rsi_720,prediction_label,prediction_score,status,_score
736,2017-09-17 02:00:00,3640.020020,48.925755,3602.000000,3670.000000,3714.949951,3710.000000,3675.000000,3662.129883,3664.000000,...,42.608803,47.169098,46.991203,52.889862,52.542259,54.045063,SOBE_2.0,0.6748,CAI_2.0,False
739,2017-09-17 05:00:00,3557.750000,43.592880,3560.010010,3580.000000,3640.020020,3602.000000,3670.000000,3714.949951,3710.000000,...,48.351288,48.892517,45.764896,42.608803,47.169098,46.991203,SOBE_2.0,0.6558,SOBE_2.0,True
740,2017-09-17 06:00:00,3563.050049,44.072315,3557.750000,3560.010010,3580.000000,3640.020020,3602.000000,3670.000000,3714.949951,...,51.657024,48.351288,48.892517,45.764896,42.608803,47.169098,SOBE_2.0,0.6713,SOBE_2.0,True
744,2017-09-17 10:00:00,3577.439941,46.452129,3549.979980,3535.010010,3559.000000,3563.050049,3557.750000,3560.010010,3580.000000,...,50.741356,47.804989,50.071995,47.319244,51.657024,48.351288,SOBE_2.0,0.6141,CAI_2.0,False
747,2017-09-17 13:00:00,3638.000000,53.069717,3538.310059,3499.030029,3577.439941,3549.979980,3535.010010,3559.000000,3563.050049,...,53.487671,54.749035,57.074249,50.741356,47.804989,50.071995,SOBE_2.0,0.6378,SOBE_2.0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53072,2023-09-04 12:00:00,25858.380859,15.816115,25858.380859,25858.380859,25892.490234,25892.490234,25892.490234,25892.490234,25907.789062,...,55.800110,55.220882,50.933159,50.111813,48.107395,48.705128,ESTAVEL,0.9066,ESTAVEL,True
53077,2023-09-04 13:00:00,25883.349609,35.236885,25883.349609,25883.349609,25883.349609,25858.380859,25858.380859,25858.380859,25858.380859,...,44.797493,40.313889,42.173122,47.154861,54.043484,55.800110,ESTAVEL,0.9094,ESTAVEL,True
53074,2023-09-04 13:00:00,25883.349609,35.236885,25858.380859,25858.380859,25858.380859,25858.380859,25892.490234,25892.490234,25892.490234,...,47.154861,54.043484,55.800110,55.220882,50.933159,50.111813,ESTAVEL,0.8983,ESTAVEL,True
53082,2023-09-04 15:00:00,25815.710938,17.561491,25854.410156,25854.410156,25854.410156,25854.410156,25883.349609,25883.349609,25883.349609,...,41.169762,45.842140,48.287098,48.489658,43.687702,44.797493,ESTAVEL,0.8390,ESTAVEL,True


In [16]:
_predict[[label, '_score']].groupby(label).mean()

Unnamed: 0_level_0,_score
status,Unnamed: 1_level_1
CAI_2.0,0.693923
ESTAVEL,0.832009
SOBE_2.0,0.716504


In [17]:
for i in range(1, 99999):
  filename = model_name + '_SL_' + str(stop_loss) + '_RT_' + str(regression_times) + '_RPL_' + str(regression_profit_and_loss) + '_' + str(i) 
  print('Model file name: ', filename + '.pkl')
  if os.path.exists(filename + '.pkl'):
    continue
  else:    
    save_model(final_predict, filename)
    break

Model file name:  lightgbm_SL_2.0_RT_720_RPL_24_1.pkl
Transformation Pipeline and Model Successfully Saved
