In [66]:
import glob
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from glob import glob
from spreadsurfer.price_engine import FeatureEngineer
from catboost import CatBoostRegressor
from sklearn.preprocessing import StandardScaler

## chosen models
# min: models/2023-02-03T22:09:25-min_quantile0.5-score-73.0.cat
# max: models/2023-02-03T22:14:13-max_quantile0.7-score--154.0.cat


# cat_filename = 'models/2023-02-03T22:09:25-max_quantile0.5-score-77.0.cat'
# cat_filename = 'models/2023-02-03T22:14:13-max_quantile0.7-score--154.0.cat'

# cat_filename = 'models/2023-02-03T18:06:57-max_mape-score--21.0.cat'

# cat_filename = 'models/2023-02-03T21:57:57-min_huber0.4-score-108.0.cat'
# cat_filename = 'models/2023-02-03T21:34:07-min_poisson-score-191.0.cat'
# cat_filename = 'models/2023-02-03T21:50:04-min_quantile0.3-score--135.0.cat'
# cat_filename = '2023-02-03T20:13:00-min_rmse-score--10.0.cat'
# cat_filename = 'models/2023-02-01T09:14:50-mape-score-161.0.cat'

def load_and_score_mae(cat_filename, min=False):
    model = CatBoostRegressor()
    model.load_model(fname=cat_filename)
    print(f'catboost model loaded from {cat_filename}')

    pipeline = Pipeline(steps=[
        ('preprocessor', FeatureEngineer()),
        ('model', model)
    ])

    with open("neverseen.log","r") as f:
        input_from_log = ''.join([x for x in f.readlines() if 'collected' in x][-100:])

    mae = 0
    count = 0
    for line in input_from_log.split('\n'):
        if not line: continue
        count += 1
        json = '{' + line.split('{')[1]
        json = json.replace('nan', '0')
        sample = pd.DataFrame([eval(json)])
        if sample.wave_direction[0] not in ['min', 'max']: continue

        real = sample.last_price_delta_since_stabilized[0]
        # if real < 0.1: continue
        sample.drop('last_price_delta_since_stabilized', axis=1, inplace=True)

        sample.loc[sample['wave_direction'] == 'min', 'wave_direction'] = 1
        sample.loc[sample['wave_direction'] == 'max', 'wave_direction'] = -1
        sample = sample.astype({"wave_direction": 'float64'})

        if sample['wave_direction'][0] != (1 if min else -1):
            continue

        print('expected: ', real)
        guess = model.predict(sample)[0]
        print('predict: ', guess)
        mae += abs(real - guess)
        # print()
    display(f'MAE: {mae / count}')
    return mae / count

# load_and_score_mae(cat_filename)

## search for best model
models = [x for x in glob('models/*max_quantile*.cat')]
display(models)
results = {}
for i in models:
    try:
        mae = load_and_score_mae(i, min=False)
        results[mae] = i
    except Exception:
        print('skip with error')

print(dict(sorted(results.items())))


['models/2023-02-03T21:48:03-max_quantile0.9-score--312.0.cat',
 'models/2023-02-03T21:50:04-max_quantile0.3-score-195.0.cat',
 'models/2023-02-03T22:09:25-max_quantile0.5-score-77.0.cat',
 'models/2023-02-03T22:14:13-max_quantile0.7-score--154.0.cat',
 'models/2023-02-07T09:39:08-max_quantile0.7-score-156.0.cat']

catboost model loaded from models/2023-02-03T21:48:03-max_quantile0.9-score--312.0.cat
expected:  -0.25
predict:  -0.003128685093703759
expected:  0.0
predict:  -5.695443707929996e-07
expected:  -0.02
predict:  -7.202035092930412e-07
expected:  0.0
predict:  -0.0006839410423041351
expected:  -0.02
predict:  -0.004011969677482929
expected:  0.0
predict:  -6.739789964002899e-07
expected:  0.0
predict:  -6.644725674094636e-07
expected:  0.0
predict:  -4.600375007050712e-07
expected:  -0.5
predict:  -0.0008005997270428058
expected:  -0.23
predict:  1.8546791055717663e-05
expected:  -1.59
predict:  -0.009993685122067116
expected:  0.0
predict:  -7.573928849704899e-07
expected:  -0.01
predict:  -3.707520851128385e-07
expected:  -0.01
predict:  -0.008116202561797589
expected:  -0.07
predict:  -0.010545847634091785
expected:  0.0
predict:  -5.309304403696547e-07
expected:  -0.54
predict:  -0.027476008819994804
expected:  0.0
predict:  -6.779231606423951e-07
expected:  0.0
predict:  -7.62264698

'MAE: 0.09579618650946715'

catboost model loaded from models/2023-02-03T21:50:04-max_quantile0.3-score-195.0.cat
expected:  -0.25
predict:  -0.5108489504334414
expected:  0.0
predict:  -0.4798289735352614
expected:  -0.02
predict:  -0.2616340150573613
expected:  0.0
predict:  -0.447162837526877
expected:  -0.02
predict:  -0.5540130608541612
expected:  0.0
predict:  -0.07949772568789953
expected:  0.0
predict:  -0.18710783798596015
expected:  0.0
predict:  -0.06598677006742709
expected:  -0.5
predict:  -0.2406206092782469
expected:  -0.23
predict:  -0.3807597141759834
expected:  -1.59
predict:  -0.6899546350044778
expected:  0.0
predict:  -0.3466634345133243
expected:  -0.01
predict:  -0.1247059560741775
expected:  -0.01
predict:  -0.8019067567584324
expected:  -0.07
predict:  -0.8764683416035222
expected:  0.0
predict:  -0.0014993500563833528
expected:  -0.54
predict:  -0.9457151707921037
expected:  0.0
predict:  -0.31275177735842624
expected:  0.0
predict:  -0.3873856454708448
expected:  0.0
predict:  -0.365159

'MAE: 0.16335007661511547'

catboost model loaded from models/2023-02-03T22:09:25-max_quantile0.5-score-77.0.cat
expected:  -0.25
predict:  -0.2565357512587453
expected:  0.0
predict:  -0.19084845394287256
expected:  -0.02
predict:  -0.10509016961691951
expected:  0.0
predict:  -0.11032857344607891
expected:  -0.02
predict:  -0.2993545794653429
expected:  0.0
predict:  -0.005818084112689886
expected:  0.0
predict:  -0.013398818088534187
expected:  0.0
predict:  -0.0025555214278609184
expected:  -0.5
predict:  -0.05656661794232708
expected:  -0.23
predict:  -0.17988990497506885
expected:  -1.59
predict:  -0.38404920219671
expected:  0.0
predict:  -0.10618456108746671
expected:  -0.01
predict:  -0.035837331285678425
expected:  -0.01
predict:  -0.4344474841894781
expected:  -0.07
predict:  -0.4857380825689015
expected:  0.0
predict:  -0.0016774065801274385
expected:  -0.54
predict:  -0.5513106751537252
expected:  0.0
predict:  -0.08907791637839944
expected:  0.0
predict:  -0.10421461448113739
expected:  0.0
predict:

'MAE: 0.09406877075335121'

catboost model loaded from models/2023-02-03T22:14:13-max_quantile0.7-score--154.0.cat
expected:  -0.25
predict:  -0.1030848843051665
expected:  0.0
predict:  -0.017850147611608764
expected:  -0.02
predict:  -0.006775392719681068
expected:  0.0
predict:  -0.0034505899680355476
expected:  -0.02
predict:  -0.12344607373929992
expected:  0.0
predict:  0.0003695818423873084
expected:  0.0
predict:  -0.009457420233495316
expected:  0.0
predict:  0.00024391018264198716
expected:  -0.5
predict:  -0.010183264198957074
expected:  -0.23
predict:  -0.03639105070588423
expected:  -1.59
predict:  -0.20114789789730894
expected:  0.0
predict:  0.0017875295308357186
expected:  -0.01
predict:  -9.685267137643096e-05
expected:  -0.01
predict:  -0.17403458210433714
expected:  -0.07
predict:  -0.2206362105562627
expected:  0.0
predict:  0.00023939724832917696
expected:  -0.54
predict:  -0.24243129390122573
expected:  0.0
predict:  -0.003546844430546724
expected:  0.0
predict:  -0.006640726166324609
expect

'MAE: 0.08477792068489569'

catboost model loaded from models/2023-02-07T09:39:08-max_quantile0.7-score-156.0.cat
expected:  -0.25
predict:  0.45206940559971254
expected:  0.0
predict:  0.47268153664929985
expected:  -0.02
predict:  0.3086770553012221
expected:  0.0
predict:  0.25055629515929007
expected:  -0.02
predict:  0.5192528181682353
expected:  0.0
predict:  0.04319204959862377
expected:  0.0
predict:  0.17123023113637226
expected:  0.0
predict:  0.12442982960489285
expected:  -0.5
predict:  0.20715922638426015
expected:  -0.23
predict:  0.5003494679919022
expected:  -1.59
predict:  0.6951245853649074
expected:  0.0
predict:  0.2636351825111226
expected:  -0.01
predict:  0.15186500621592958
expected:  -0.01
predict:  0.6381363839236053
expected:  -0.07
predict:  0.8451712335387349
expected:  0.0
predict:  -0.0450047693999599
expected:  -0.54
predict:  1.050198631588986
expected:  0.0
predict:  0.3522677487042919
expected:  0.0
predict:  0.295234412755161
expected:  0.0
predict:  0.30695455818013645
expecte

'MAE: 0.27760897768376624'

{0.08477792068489569: 'models/2023-02-03T22:14:13-max_quantile0.7-score--154.0.cat', 0.09406877075335121: 'models/2023-02-03T22:09:25-max_quantile0.5-score-77.0.cat', 0.09579618650946715: 'models/2023-02-03T21:48:03-max_quantile0.9-score--312.0.cat', 0.16335007661511547: 'models/2023-02-03T21:50:04-max_quantile0.3-score-195.0.cat', 0.27760897768376624: 'models/2023-02-07T09:39:08-max_quantile0.7-score-156.0.cat'}
