In [53]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

df = pd.read_parquet('./data')
print(f'total dataset: {len(df)} rows')

## filters
# df = df[df['stabilized_spread'] < 10]  # nem tul nagy spread
df = df[df['stabilized_spread'] > 0.3]  # nem tul kis spread

for col in df.columns:
    # if 'nr_trades' in col or 'price_delta' in col and not col.startswith('last'):
    #     df.drop(col, axis=1, inplace=True)
    if 'nr_trades' in col:
        df.drop(col, axis=1, inplace=True)
    if 'past' in col:
        df.drop(col, axis=1, inplace=True)
    if '_spread' == col[1:]:
        df.drop(col, axis=1, inplace=True)

df_max = df[df.wave_direction == 'max'].copy()
df_min = df[df.wave_direction == 'min'].copy()

df_min.drop('wave_direction', axis=1, inplace=True)
df_max.drop('wave_direction', axis=1, inplace=True)
df = None
print(f'min dataset: {len(df_min)}')
print(f'max dataset: {len(df_max)}')

df_min.sort_index(axis=1, inplace=True)
df_max.sort_index(axis=1, inplace=True)


total dataset: 121438 rows
min dataset: 42673
max dataset: 43274


In [50]:
a = df_min.corr().last_price_delta_since_stabilized * 1000
a.sort_values(axis=0, ascending=False)


last_price_delta_since_stabilized    1000.000000
stabilized_spread                     133.429678
4_price_delta                          92.371246
stabilized_amount_mean                 74.208929
0_price_delta                          69.324185
1_price_delta                          67.829208
4_amount_mean                          65.007021
2_price_delta                          64.523479
3_amount_mean                          62.824393
3_price_delta                          61.510505
2_amount_mean                          57.910378
1_amount_mean                          56.058498
0_amount_mean                          53.176914
stabilized_gasp                        44.905629
Name: last_price_delta_since_stabilized, dtype: float64

In [57]:
from spreadsurfer.price_engine import FeatureEngineer
from catboost import CatBoostRegressor
from sklearn.preprocessing import StandardScaler


def train(df, mape=True):
    if mape:
        model = CatBoostRegressor(learning_rate=0.01, depth=7, loss_function='MAPE', random_state=0, verbose=False, iterations=1500)
        pipeline = Pipeline(steps=[
            ('preprocessor', FeatureEngineer()),
            ('model', model)
        ])
    else:
        model = CatBoostRegressor(learning_rate=0.01, depth=7, loss_function='RMSE', random_state=0, verbose=False, iterations=1500)
        pipeline = Pipeline(steps=[
            ('preprocessor', FeatureEngineer()),
            ('model', model)
        ])

    df = df.copy()
    y = df['last_price_delta_since_stabilized']
    X = df.drop('last_price_delta_since_stabilized', axis=1)
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state=0)

    pipeline.fit(X_train, y_train)
    score = pipeline.score(X_valid, y_valid)
    display(score)
    return model, score


min_mape, min_mape_score = train(df_min, mape=True)
max_mape, max_mape_score = train(df_max, mape=True)
min_rmse, _ = train(df_min, mape=False)
max_rmse, _ = train(df_max, mape=False)


-0.019047122615816425

-0.02426874446991234

0.1713748246553889

0.18109069628690133

In [56]:
from spreadsurfer import now_isoformat

save = True
if save:
    min_mape.save_model(f'./models/{now_isoformat()}-min_mape-score-{1000 * round(min_mape_score, 3)}.cat')
    max_mape.save_model(f'./models/{now_isoformat()}-max_mape-score-{1000 * round(max_mape_score, 3)}.cat')


## TEST LAB


In [58]:
input_from_log = """

2023-02-01 08:55:15.007 | data     | spreadsurfer.datacollect:start:97 - wave 598 collected: {'0_amount_mean': 0.033, '0_nr_trades': 42.0, '0_price_delta': 0.21, '0_spread': 0.89, '1_amount_mean': 0.0322, '1_nr_trades': 43.0, '1_price_delta': 0.21, '1_spread': 0.89, '2_amount_mean': 0.0325, '2_nr_trades': 44.0, '2_price_delta': 0.21, '2_spread': 0.89, '3_amount_mean': 0.0319, '3_nr_trades': 45.0, '3_price_delta': 0.21, '3_spread': 0.89, '4_amount_mean': 0.0333, '4_nr_trades': 46.0, '4_price_delta': 0.2, '4_spread': 0.9, 'last_price_delta_since_stabilized': 0.0, 'past_final_price_0': 4.549999999999272, 'past_final_price_1': 4.709999999999127, 'past_final_price_2': 5.639999999999418, 'past_final_price_3': 5.279999999998836, 'past_final_price_4': 5.479999999999563, 'past_final_price_5': nan, 'past_final_price_6': nan, 'past_final_price_7': -1.640000000003056, 'past_final_price_8': -1.3000000000029104, 'past_final_price_9': 0.0, 'stabilized_amount_mean': 0.0426, 'stabilized_at_ms': 773, 'stabilized_gasp': -0.5878, 'stabilized_nr_trades': 55.0, 'stabilized_spread': 1.1, 'wave_direction': 'max'}
2023-02-01 08:55:16.072 | data     | spreadsurfer.datacollect:start:97 - wave 599 collected: {'0_amount_mean': 0.0348, '0_nr_trades': 6.0, '0_price_delta': 0.3, '0_spread': 0.06, '1_amount_mean': 0.0299, '1_nr_trades': 7.0, '1_price_delta': 0.08, '1_spread': 0.28, '2_amount_mean': 0.0405, '2_nr_trades': 8.0, '2_price_delta': 0.08, '2_spread': 0.28, '3_amount_mean': 0.036, '3_nr_trades': 9.0, '3_price_delta': 0.02, '3_spread': 0.34, '4_amount_mean': 0.0298, '4_nr_trades': 11.0, '4_price_delta': 0.02, '4_spread': 0.34, 'last_price_delta_since_stabilized': -0.19, 'past_final_price_0': 4.709999999999127, 'past_final_price_1': 5.639999999999418, 'past_final_price_2': 5.279999999998836, 'past_final_price_3': 5.479999999999563, 'past_final_price_4': nan, 'past_final_price_5': nan, 'past_final_price_6': -1.640000000003056, 'past_final_price_7': -1.3000000000029104, 'past_final_price_8': 0.0, 'past_final_price_9': 0.0, 'stabilized_amount_mean': 0.0275, 'stabilized_at_ms': 273, 'stabilized_gasp': -0.3978, 'stabilized_nr_trades': 12.0, 'stabilized_spread': 0.36, 'wave_direction': 'max'}
2023-02-01 08:55:16.516 | data     | spreadsurfer.datacollect:start:97 - wave 600 collected: {'0_amount_mean': 0.0197, '0_nr_trades': 6.0, '0_price_delta': 0.01, '0_spread': 0.54, '1_amount_mean': 0.0169, '1_nr_trades': 7.0, '1_price_delta': 0.01, '1_spread': 0.54, '2_amount_mean': 0.015, '2_nr_trades': 8.0, '2_price_delta': 0.01, '2_spread': 0.54, '3_amount_mean': 0.0175, '3_nr_trades': 10.0, '3_price_delta': 0.0, '3_spread': 0.58, '4_amount_mean': 0.0278, '4_nr_trades': 17.0, '4_price_delta': 0.0, '4_spread': 0.72, 'last_price_delta_since_stabilized': -0.51, 'past_final_price_0': 4.709999999999127, 'past_final_price_1': 5.639999999999418, 'past_final_price_2': 5.279999999998836, 'past_final_price_3': 5.479999999999563, 'past_final_price_4': nan, 'past_final_price_5': nan, 'past_final_price_6': -1.640000000003056, 'past_final_price_7': -1.3000000000029104, 'past_final_price_8': 0.0, 'past_final_price_9': 0.0, 'stabilized_amount_mean': 0.0613, 'stabilized_at_ms': 169, 'stabilized_gasp': -0.2321, 'stabilized_nr_trades': 19.0, 'stabilized_spread': 0.72, 'wave_direction': None}
2023-02-01 08:55:16.623 | data     | spreadsurfer.datacollect:start:97 - wave 601 collected: {'0_amount_mean': 0.0175, '0_nr_trades': 10.0, '0_price_delta': 1.01, '0_spread': 0.58, '1_amount_mean': 0.0278, '1_nr_trades': 17.0, '1_price_delta': 1.01, '1_spread': 0.72, '2_amount_mean': 0.0613, '2_nr_trades': 19.0, '2_price_delta': 1.01, '2_spread': 0.72, '3_amount_mean': 0.0585, '3_nr_trades': 20.0, '3_price_delta': 1.01, '3_spread': 0.72, '4_amount_mean': 0.0376, '4_nr_trades': 44.0, '4_price_delta': 0.5, '4_spread': 1.23, 'last_price_delta_since_stabilized': 0.0, 'past_final_price_0': 4.709999999999127, 'past_final_price_1': 5.639999999999418, 'past_final_price_2': 5.279999999998836, 'past_final_price_3': 5.479999999999563, 'past_final_price_4': nan, 'past_final_price_5': nan, 'past_final_price_6': -1.640000000003056, 'past_final_price_7': -1.3000000000029104, 'past_final_price_8': 0.0, 'past_final_price_9': 0.0, 'stabilized_amount_mean': 0.0354, 'stabilized_at_ms': 414, 'stabilized_gasp': -1.2421, 'stabilized_nr_trades': 79.0, 'stabilized_spread': 1.73, 'wave_direction': None}
2023-02-01 08:55:17.004 | data     | spreadsurfer.datacollect:start:97 - wave 602 collected: {'0_amount_mean': 0.0585, '0_nr_trades': 20.0, '0_price_delta': 1.01, '0_spread': 0.72, '1_amount_mean': 0.0376, '1_nr_trades': 44.0, '1_price_delta': 0.5, '1_spread': 1.23, '2_amount_mean': 0.0354, '2_nr_trades': 79.0, '2_price_delta': 0.0, '2_spread': 1.73, '3_amount_mean': 0.0434, '3_nr_trades': 97.0, '3_price_delta': 0.0, '3_spread': 1.75, '4_amount_mean': 0.0426, '4_nr_trades': 99.0, '4_price_delta': 0.0, '4_spread': 1.75, 'last_price_delta_since_stabilized': 0.0, 'past_final_price_0': 4.350000000002183, 'past_final_price_1': 3.9900000000016007, 'past_final_price_2': 4.190000000002328, 'past_final_price_3': nan, 'past_final_price_4': nan, 'past_final_price_5': -2.930000000000291, 'past_final_price_6': -2.5900000000001455, 'past_final_price_7': -1.2899999999972351, 'past_final_price_8': -1.2899999999972351, 'past_final_price_9': 0.0, 'stabilized_amount_mean': 0.0421, 'stabilized_at_ms': 522, 'stabilized_gasp': -1.2421, 'stabilized_nr_trades': 100.0, 'stabilized_spread': 1.75, 'wave_direction': 'max'}
2023-02-01 08:55:18.266 | data     | spreadsurfer.datacollect:start:97 - wave 603 collected: {'0_amount_mean': 0.0023, '0_nr_trades': 3.0, '0_price_delta': 0.18, '0_spread': 0.35, '1_amount_mean': 0.002, '1_nr_trades': 4.0, '1_price_delta': 0.18, '1_spread': 0.41, '2_amount_mean': 0.0143, '2_nr_trades': 6.0, '2_price_delta': 0.18, '2_spread': 0.41, '3_amount_mean': 0.0108, '3_nr_trades': 8.0, '3_price_delta': 0.07, '3_spread': 0.52, '4_amount_mean': 0.0088, '4_nr_trades': 10.0, '4_price_delta': 0.0, '4_spread': 0.59, 'last_price_delta_since_stabilized': 0.15, 'past_final_price_0': 5.830000000001746, 'past_final_price_1': 6.030000000002474, 'past_final_price_2': nan, 'past_final_price_3': nan, 'past_final_price_4': -1.0900000000001455, 'past_final_price_5': -0.75, 'past_final_price_6': 0.5500000000029104, 'past_final_price_7': 0.5500000000029104, 'past_final_price_8': 1.8400000000001455, 'past_final_price_9': 0.0, 'stabilized_amount_mean': 0.0074, 'stabilized_at_ms': 222, 'stabilized_gasp': 0.4479, 'stabilized_nr_trades': 12.0, 'stabilized_spread': 0.59, 'wave_direction': 'min'}
2023-02-01 08:55:19.755 | data     | spreadsurfer.datacollect:start:97 - wave 604 collected: {'0_amount_mean': 0.0023, '0_nr_trades': 7.0, '0_price_delta': 0.08, '0_spread': 0.57, '1_amount_mean': 0.0031, '1_nr_trades': 10.0, '1_price_delta': 0.08, '1_spread': 0.57, '2_amount_mean': 0.0028, '2_nr_trades': 12.0, '2_price_delta': 0.08, '2_spread': 0.57, '3_amount_mean': 0.0093, '3_nr_trades': 15.0, '3_price_delta': 0.06, '3_spread': 0.59, '4_amount_mean': 0.0087, '4_nr_trades': 16.0, '4_price_delta': 0.06, '4_spread': 0.59, 'last_price_delta_since_stabilized': -0.02, 'past_final_price_0': 5.6599999999998545, 'past_final_price_1': nan, 'past_final_price_2': nan, 'past_final_price_3': -1.4600000000027649, 'past_final_price_4': -1.1200000000026193, 'past_final_price_5': 0.18000000000029104, 'past_final_price_6': 0.18000000000029104, 'past_final_price_7': 1.4699999999975262, 'past_final_price_8': -0.37000000000261934, 'past_final_price_9': 0.0, 'stabilized_amount_mean': 0.0094, 'stabilized_at_ms': 285, 'stabilized_gasp': -0.2706, 'stabilized_nr_trades': 20.0, 'stabilized_spread': 0.65, 'wave_direction': None}
2023-02-01 08:55:20.006 | data     | spreadsurfer.datacollect:start:97 - wave 605 collected: {'0_amount_mean': 0.0083, '0_nr_trades': 25.0, '0_price_delta': 0.57, '0_spread': 0.67, '1_amount_mean': 0.0081, '1_nr_trades': 26.0, '1_price_delta': 0.57, '1_spread': 0.67, '2_amount_mean': 0.0079, '2_nr_trades': 27.0, '2_price_delta': 0.57, '2_spread': 0.67, '3_amount_mean': 0.0077, '3_nr_trades': 28.0, '3_price_delta': 0.57, '3_spread': 0.67, '4_amount_mean': 0.0103, '4_nr_trades': 36.0, '4_price_delta': 0.34, '4_spread': 0.9, 'last_price_delta_since_stabilized': 0.0, 'past_final_price_0': nan, 'past_final_price_1': nan, 'past_final_price_2': -0.5700000000033469, 'past_final_price_3': -0.23000000000320142, 'past_final_price_4': 1.069999999999709, 'past_final_price_5': 1.069999999999709, 'past_final_price_6': 2.359999999996944, 'past_final_price_7': 0.5199999999967986, 'past_final_price_8': 0.8899999999994179, 'past_final_price_9': 0.0, 'stabilized_amount_mean': 0.0121, 'stabilized_at_ms': 641, 'stabilized_gasp': 1.0948, 'stabilized_nr_trades': 41.0, 'stabilized_spread': 1.24, 'wave_direction': 'min'}
2023-02-01 08:55:21.010 | data     | spreadsurfer.datacollect:start:97 - wave 606 collected: {'0_amount_mean': 0.003, '0_nr_trades': 4.0, '0_price_delta': 0.66, '0_spread': 0.33, '1_amount_mean': 0.0032, '1_nr_trades': 5.0, '1_price_delta': 0.66, '1_spread': 0.33, '2_amount_mean': 0.0063, '2_nr_trades': 7.0, '2_price_delta': 0.53, '2_spread': 0.46, '3_amount_mean': 0.0278, '3_nr_trades': 15.0, '3_price_delta': 0.14, '3_spread': 0.85, '4_amount_mean': 0.0292, '4_nr_trades': 28.0, '4_price_delta': 0.0, '4_spread': 0.99, 'last_price_delta_since_stabilized': 0.0, 'past_final_price_0': nan, 'past_final_price_1': -0.5200000000004366, 'past_final_price_2': -0.18000000000029104, 'past_final_price_3': 1.1200000000026193, 'past_final_price_4': 1.1200000000026193, 'past_final_price_5': 2.4099999999998545, 'past_final_price_6': 0.569999999999709, 'past_final_price_7': 0.9400000000023283, 'past_final_price_8': 0.05000000000291038, 'past_final_price_9': 0.0, 'stabilized_amount_mean': 0.0282, 'stabilized_at_ms': 137, 'stabilized_gasp': 0.7482, 'stabilized_nr_trades': 29.0, 'stabilized_spread': 0.99, 'wave_direction': 'min'}
2023-02-01 08:55:23.040 | data     | spreadsurfer.datacollect:start:97 - wave 607 collected: {'0_amount_mean': 0.0206, '0_nr_trades': 42.0, '0_price_delta': 0.32, '0_spread': 1.03, '1_amount_mean': 0.021, '1_nr_trades': 47.0, '1_price_delta': 0.21, '1_spread': 1.14, '2_amount_mean': 0.0201, '2_nr_trades': 49.0, '2_price_delta': 0.13, '2_spread': 1.22, '3_amount_mean': 0.0197, '3_nr_trades': 50.0, '3_price_delta': 0.06, '3_spread': 1.29, '4_amount_mean': 0.0195, '4_nr_trades': 51.0, '4_price_delta': 0.01, '4_spread': 1.34, 'last_price_delta_since_stabilized': 0.63, 'past_final_price_0': 1.3699999999989814, 'past_final_price_1': 2.6700000000018917, 'past_final_price_2': 2.6700000000018917, 'past_final_price_3': 3.959999999999127, 'past_final_price_4': 2.1199999999989814, 'past_final_price_5': 2.4900000000016007, 'past_final_price_6': 1.6000000000021828, 'past_final_price_7': 1.5499999999992724, 'past_final_price_8': 2.1300000000010186, 'past_final_price_9': 0.0, 'stabilized_amount_mean': 0.0205, 'stabilized_at_ms': 181, 'stabilized_gasp': 1.1929, 'stabilized_nr_trades': 53.0, 'stabilized_spread': 1.35, 'wave_direction': 'min'}
2023-02-01 08:55:24.028 | data     | spreadsurfer.datacollect:start:97 - wave 608 collected: {'0_amount_mean': 0.0054, '0_nr_trades': 3.0, '0_price_delta': 1.23, '0_spread': 0.42, '1_amount_mean': 0.0091, '1_nr_trades': 11.0, '1_price_delta': 0.99, '1_spread': 0.66, '2_amount_mean': 0.0264, '2_nr_trades': 38.0, '2_price_delta': 0.26, '2_spread': 1.39, '3_amount_mean': 0.0232, '3_nr_trades': 50.0, '3_price_delta': 0.0, '3_spread': 1.65, '4_amount_mean': 0.0259, '4_nr_trades': 60.0, '4_price_delta': 0.0, '4_spread': 1.65, 'last_price_delta_since_stabilized': 0.0, 'past_final_price_0': 3.9800000000032014, 'past_final_price_1': 3.9800000000032014, 'past_final_price_2': 5.270000000000437, 'past_final_price_3': 3.430000000000291, 'past_final_price_4': 3.8000000000029104, 'past_final_price_5': 2.9100000000034925, 'past_final_price_6': 2.860000000000582, 'past_final_price_7': 3.4400000000023283, 'past_final_price_8': 1.3100000000013097, 'past_final_price_9': 0.0, 'stabilized_amount_mean': 0.0255, 'stabilized_at_ms': 345, 'stabilized_gasp': 0.1685, 'stabilized_nr_trades': 61.0, 'stabilized_spread': 1.65, 'wave_direction': 'min'}
2023-02-01 08:55:26.024 | data     | spreadsurfer.datacollect:start:97 - wave 609 collected: {'0_amount_mean': 0.0198, '0_nr_trades': 9.0, '0_price_delta': 1.02, '0_spread': 0.74, '1_amount_mean': 0.0133, '1_nr_trades': 36.0, '1_price_delta': 0.26, '1_spread': 1.5, '2_amount_mean': 0.0133, '2_nr_trades': 37.0, '2_price_delta': 0.26, '2_spread': 1.5, '3_amount_mean': 0.0129, '3_nr_trades': 45.0, '3_price_delta': 0.1, '3_spread': 1.66, '4_amount_mean': 0.0123, '4_nr_trades': 50.0, '4_price_delta': 0.0, '4_spread': 1.76, 'last_price_delta_since_stabilized': -0.11, 'past_final_price_0': 2.4399999999986903, 'past_final_price_1': 0.5999999999985448, 'past_final_price_2': 0.9700000000011642, 'past_final_price_3': 0.08000000000174623, 'past_final_price_4': 0.029999999998835847, 'past_final_price_5': 0.6100000000005821, 'past_final_price_6': -1.5200000000004366, 'past_final_price_7': -2.8300000000017462, 'past_final_price_8': -0.9399999999986903, 'past_final_price_9': 0.0, 'stabilized_amount_mean': 0.0121, 'stabilized_at_ms': 430, 'stabilized_gasp': -0.5445, 'stabilized_nr_trades': 51.0, 'stabilized_spread': 1.76, 'wave_direction': 'max'}
2023-02-01 08:55:26.620 | data     | spreadsurfer.datacollect:start:97 - wave 610 collected: {'0_amount_mean': 0.0073, '0_nr_trades': 5.0, '0_price_delta': 0.0, '0_spread': 0.61, '1_amount_mean': 0.0081, '1_nr_trades': 7.0, '1_price_delta': 0.0, '1_spread': 0.61, '2_amount_mean': 0.0111, '2_nr_trades': 9.0, '2_price_delta': 0.0, '2_spread': 0.61, '3_amount_mean': 0.0109, '3_nr_trades': 10.0, '3_price_delta': 0.0, '3_spread': 0.61, '4_amount_mean': 0.0108, '4_nr_trades': 11.0, '4_price_delta': 0.0, '4_spread': 0.64, 'last_price_delta_since_stabilized': -0.04, 'past_final_price_0': 2.4399999999986903, 'past_final_price_1': 0.5999999999985448, 'past_final_price_2': 0.9700000000011642, 'past_final_price_3': 0.08000000000174623, 'past_final_price_4': 0.029999999998835847, 'past_final_price_5': 0.6100000000005821, 'past_final_price_6': -1.5200000000004366, 'past_final_price_7': -2.8300000000017462, 'past_final_price_8': -0.9399999999986903, 'past_final_price_9': 0.0, 'stabilized_amount_mean': 0.0119, 'stabilized_at_ms': 153, 'stabilized_gasp': -0.3823, 'stabilized_nr_trades': 13.0, 'stabilized_spread': 0.64, 'wave_direction': None}
2023-02-01 08:55:27.005 | data     | spreadsurfer.datacollect:start:97 - wave 611 collected: {'0_amount_mean': 0.0119, '0_nr_trades': 13.0, '0_price_delta': 0.43, '0_spread': 0.64, '1_amount_mean': 0.011, '1_nr_trades': 15.0, '1_price_delta': 0.43, '1_spread': 0.65, '2_amount_mean': 0.0109, '2_nr_trades': 23.0, '2_price_delta': 0.28, '2_spread': 0.83, '3_amount_mean': 0.0122, '3_nr_trades': 25.0, '3_price_delta': 0.25, '3_spread': 0.86, '4_amount_mean': 0.0115, '4_nr_trades': 27.0, '4_price_delta': 0.0, '4_spread': 1.11, 'last_price_delta_since_stabilized': 0.92, 'past_final_price_0': 2.7699999999967986, 'past_final_price_1': 3.139999999999418, 'past_final_price_2': 2.25, 'past_final_price_3': 2.1999999999970896, 'past_final_price_4': 2.779999999998836, 'past_final_price_5': 0.6499999999978172, 'past_final_price_6': -0.6600000000034925, 'past_final_price_7': 1.2299999999995634, 'past_final_price_8': 2.1699999999982538, 'past_final_price_9': 0.0, 'stabilized_amount_mean': 0.0111, 'stabilized_at_ms': 452, 'stabilized_gasp': 0.1235, 'stabilized_nr_trades': 28.0, 'stabilized_spread': 1.11, 'wave_direction': 'min'}
2023-02-01 08:55:28.079 | data     | spreadsurfer.datacollect:start:97 - wave 612 collected: {'0_amount_mean': 0.0007, '0_nr_trades': 5.0, '0_price_delta': 1.03, '0_spread': 0.32, '1_amount_mean': 0.0009, '1_nr_trades': 7.0, '1_price_delta': 1.03, '1_spread': 0.32, '2_amount_mean': 0.0037, '2_nr_trades': 9.0, '2_price_delta': 1.03, '2_spread': 0.32, '3_amount_mean': 0.0034, '3_nr_trades': 11.0, '3_price_delta': 1.03, '3_spread': 0.32, '4_amount_mean': 0.007, '4_nr_trades': 13.0, '4_price_delta': 0.94, '4_spread': 0.41, 'last_price_delta_since_stabilized': -0.17, 'past_final_price_0': 1.3700000000026193, 'past_final_price_1': 0.4800000000032014, 'past_final_price_2': 0.43000000000029104, 'past_final_price_3': 1.0100000000020373, 'past_final_price_4': -1.1199999999989814, 'past_final_price_5': -2.430000000000291, 'past_final_price_6': -0.5399999999972351, 'past_final_price_7': 0.4000000000014552, 'past_final_price_8': -1.7699999999967986, 'past_final_price_9': 0.0, 'stabilized_amount_mean': 0.0461, 'stabilized_at_ms': 697, 'stabilized_gasp': -1.1758, 'stabilized_nr_trades': 35.0, 'stabilized_spread': 1.35, 'wave_direction': 'max'}
2023-02-01 08:55:29.105 | data     | spreadsurfer.datacollect:start:97 - wave 613 collected: {'0_amount_mean': 0.0101, '0_nr_trades': 3.0, '0_price_delta': 0.4, '0_spread': 0.02, '1_amount_mean': 0.0077, '1_nr_trades': 4.0, '1_price_delta': 0.4, '1_spread': 0.05, '2_amount_mean': 0.0063, '2_nr_trades': 5.0, '2_price_delta': 0.0, '2_spread': 0.45, '3_amount_mean': 0.0055, '3_nr_trades': 6.0, '3_price_delta': 0.0, '3_spread': 0.45, '4_amount_mean': 0.005, '4_nr_trades': 7.0, '4_price_delta': 0.0, '4_spread': 0.45, 'last_price_delta_since_stabilized': 0.0, 'past_final_price_0': 0.6500000000014552, 'past_final_price_1': 0.5999999999985448, 'past_final_price_2': 1.180000000000291, 'past_final_price_3': -0.9500000000007276, 'past_final_price_4': -2.2600000000020373, 'past_final_price_5': -0.36999999999898137, 'past_final_price_6': 0.569999999999709, 'past_final_price_7': -1.5999999999985448, 'past_final_price_8': 0.16999999999825377, 'past_final_price_9': 0.0, 'stabilized_amount_mean': 0.0142, 'stabilized_at_ms': 434, 'stabilized_gasp': 0.3199, 'stabilized_nr_trades': 9.0, 'stabilized_spread': 0.45, 'wave_direction': 'min'}

"""

def predict_samples():
    mae = 0
    count = 0
    for line in input_from_log.split('\n'):
        if not line: continue
        count += 1
        json = '{' + line.split('{')[1]
        json = json.replace('nan', '0')
        sample = pd.DataFrame([eval(json)])
        if sample.wave_direction[0] not in ['min', 'max']: continue

        real = sample.last_price_delta_since_stabilized[0]
        # if real < 0.1: continue
        print('expected: ', real)
        sample.drop('last_price_delta_since_stabilized', axis=1, inplace=True)

        if sample.wave_direction[0] == 'min':
            print('using MIN mape model')
            model = min_mape
        else:
            print('using MAX mape model')
            model = max_mape

        sample.drop('wave_direction', axis=1, inplace=True)

        guess = model.predict(sample)[0]
        print('predict: ', guess)
        mae += abs(real - guess)
        print()
    display(f'MAE: {mae / count}')

predict_samples()


expected:  0.0
using MAX mape model
predict:  -0.034095584060593764

expected:  -0.19
using MAX mape model
predict:  -0.2955446615429256

expected:  0.0
using MAX mape model
predict:  0.0018425906336418385

expected:  0.15
using MIN mape model
predict:  0.18834961242327525

expected:  0.0
using MIN mape model
predict:  0.10754536821272957

expected:  0.0
using MIN mape model
predict:  0.2877090093683856

expected:  0.63
using MIN mape model
predict:  0.3734857686379306

expected:  0.0
using MIN mape model
predict:  0.041925558458776865

expected:  -0.11
using MAX mape model
predict:  -0.03450838979776638

expected:  0.92
using MIN mape model
predict:  0.06741524738071064

expected:  -0.17
using MAX mape model
predict:  -0.15538540921033983

expected:  0.0
using MIN mape model
predict:  0.0586361829516784



'MAE: 0.11717835953907872'