In [4]:
import glob
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from glob import glob
from spreadsurfer.price_engine import FeatureEngineer
from catboost import CatBoostRegressor
from sklearn.preprocessing import StandardScaler

## chosen models
# min: models/2023-02-03T22:09:25-min_quantile0.5-score-73.0.cat
# max: models/2023-02-03T22:14:13-max_quantile0.7-score--154.0.cat


# cat_filename = 'models/2023-02-03T22:09:25-max_quantile0.5-score-77.0.cat'
# cat_filename = 'models/2023-02-03T22:14:13-max_quantile0.7-score--154.0.cat'

# cat_filename = 'models/2023-02-03T18:06:57-max_mape-score--21.0.cat'

# cat_filename = 'models/2023-02-03T21:57:57-min_huber0.4-score-108.0.cat'
# cat_filename = 'models/2023-02-03T21:34:07-min_poisson-score-191.0.cat'
# cat_filename = 'models/2023-02-03T21:50:04-min_quantile0.3-score--135.0.cat'
# cat_filename = '2023-02-03T20:13:00-min_rmse-score--10.0.cat'
# cat_filename = 'models/2023-02-01T09:14:50-mape-score-161.0.cat'

def load_and_score_mae(cat_filename, min=False):
    model = CatBoostRegressor()
    model.load_model(fname=cat_filename)
    print(f'catboost model loaded from {cat_filename}')

    pipeline = Pipeline(steps=[
        ('preprocessor', FeatureEngineer()),
        ('model', model)
    ])

    with open("neverseen.log","r") as f:
        input_from_log = ''.join([x for x in f.readlines() if 'collected' in x][-100:])

    mae = 0
    count = 0
    for line in input_from_log.split('\n'):
        if not line: continue
        count += 1
        json = '{' + line.split('{')[1]
        json = json.replace('nan', '0')
        sample = pd.DataFrame([eval(json)])
        if sample.wave_direction[0] not in ['min', 'max']: continue

        real = sample.last_price_delta_since_stabilized[0]
        if abs(real) < 0.2: continue
        sample.drop('last_price_delta_since_stabilized', axis=1, inplace=True)

        sample.loc[sample['wave_direction'] == 'min', 'wave_direction'] = 1
        sample.loc[sample['wave_direction'] == 'max', 'wave_direction'] = -1
        sample = sample.astype({"wave_direction": 'float64'})

        if sample['wave_direction'][0] != (1 if min else -1):
            continue

        print('expected: ', real)
        guess = model.predict(sample)[0]
        guess *= 3.5
        print('predict: ', guess)
        mae += abs(real - guess)
        print()
    display(f'MAE: {mae / count}')
    return mae / count

# load_and_score_mae(cat_filename)

## search for best model
# models = [x for x in glob('models/*max_quantile*.cat')]
models = [x for x in glob('models/*min*mape*')]
display(models)
results = {}
for i in models:
    try:
        mae = load_and_score_mae(i, min=True)
        results[mae] = i
    except Exception:
        print('skip with error')

print(dict(sorted(results.items())))


['models/2023-02-03T18:06:57-min_mape-score--17.0.cat',
 'models/2023-02-03T20:13:00-min_rmse-score--10.0.cat',
 'models/2023-02-03T21:34:07-min_poisson-score-191.0.cat',
 'models/2023-02-03T21:48:03-min_quantile0.9-score--674.0.cat',
 'models/2023-02-03T21:50:04-min_quantile0.3-score--135.0.cat',
 'models/2023-02-03T21:57:57-min_huber0.4-score-108.0.cat',
 'models/2023-02-03T22:09:25-min_quantile0.5-score-73.0.cat',
 'models/2023-02-03T22:14:13-min_quantile0.7-score-181.0.cat',
 'models/2023-02-03T22:34:15-min_poisson-slower-score-126.0.cat',
 'models/2023-02-08T22:17:55-min_-mape-score--8.0.cat']

catboost model loaded from models/2023-02-03T18:06:57-min_mape-score--17.0.cat
expected:  0.64
predict:  1.0794794421804141

expected:  0.55
predict:  1.0700347857152936

expected:  1.14
predict:  1.5140650045999486

expected:  0.94
predict:  0.9187105515700872

expected:  0.21
predict:  1.298673359045788

expected:  3.18
predict:  1.2798169103372312

expected:  0.92
predict:  1.6930520000414075

expected:  0.27
predict:  0.97366620408172

expected:  1.73
predict:  1.6755114580655253

expected:  0.39
predict:  1.639617634798023

expected:  0.48
predict:  0.5244853255944758



'MAE: 0.07169034836084226'

catboost model loaded from models/2023-02-03T20:13:00-min_rmse-score--10.0.cat
expected:  0.64
predict:  2.0237962384834733

expected:  0.55
predict:  1.4759490612693775

expected:  1.14
predict:  3.0717567079760695

expected:  0.94
predict:  1.6406101862750138

expected:  0.21
predict:  1.8748461114462056

expected:  3.18
predict:  2.7864197547743097

expected:  0.92
predict:  3.1765252122624346

expected:  0.27
predict:  1.977348257860959

expected:  1.73
predict:  3.0315996662840745

expected:  0.39
predict:  2.5235899350664015

expected:  0.48
predict:  1.0838331281483808



'MAE: 0.1500343475029808'

catboost model loaded from models/2023-02-03T21:34:07-min_poisson-score-191.0.cat
expected:  0.64
predict:  2.049274629365993

expected:  0.55
predict:  1.4343341782075516

expected:  1.14
predict:  3.0046458467701322

expected:  0.94
predict:  1.6319954473156235

expected:  0.21
predict:  1.6308145128267928

expected:  3.18
predict:  2.7423375291110306

expected:  0.92
predict:  3.054458816482407

expected:  0.27
predict:  2.0445229462730468

expected:  1.73
predict:  2.8572016557699604

expected:  0.39
predict:  2.422905477442719

expected:  0.48
predict:  0.9974189615181234



'MAE: 0.14295234942861318'

catboost model loaded from models/2023-02-03T21:48:03-min_quantile0.9-score--674.0.cat
expected:  0.64
predict:  4.8666622636811905

expected:  0.55
predict:  3.9369737336803503

expected:  1.14
predict:  6.816499888172199

expected:  0.94
predict:  4.133219457743727

expected:  0.21
predict:  4.393720828118677

expected:  3.18
predict:  6.898162994733221

expected:  0.92
predict:  6.564908743049036

expected:  0.27
predict:  4.893286549457494

expected:  1.73
predict:  6.881673387102596

expected:  0.39
predict:  6.002499493353201

expected:  0.48
predict:  2.7375249126219474



'MAE: 0.47675132251713637'

catboost model loaded from models/2023-02-03T21:50:04-min_quantile0.3-score--135.0.cat
expected:  0.64
predict:  0.5539803143475395

expected:  0.55
predict:  0.3340757894617125

expected:  1.14
predict:  0.912080793298193

expected:  0.94
predict:  0.35938316377425406

expected:  0.21
predict:  0.17333960942427507

expected:  3.18
predict:  0.35153739229682074

expected:  0.92
predict:  1.2137342161457016

expected:  0.27
predict:  0.23241468641967514

expected:  1.73
predict:  1.2218135433153687

expected:  0.39
predict:  0.8104380588638633

expected:  0.48
predict:  0.021178077214497347



'MAE: 0.05694368905457228'

catboost model loaded from models/2023-02-03T21:57:57-min_huber0.4-score-108.0.cat
expected:  0.64
predict:  1.3494615233057057

expected:  0.55
predict:  1.0763912724469635

expected:  1.14
predict:  2.2284341453098304

expected:  0.94
predict:  1.228396139741319

expected:  0.21
predict:  1.1521268410974348

expected:  3.18
predict:  1.5632999480434115

expected:  0.92
predict:  2.316658174206689

expected:  0.27
predict:  1.3215581507730947

expected:  1.73
predict:  2.3037473280777405

expected:  0.39
predict:  1.6816200108765034

expected:  0.48
predict:  0.8378873849042537



'MAE: 0.09842981022696123'

catboost model loaded from models/2023-02-03T22:09:25-min_quantile0.5-score-73.0.cat
expected:  0.64
predict:  1.2681132048169461

expected:  0.55
predict:  1.0132526564672169

expected:  1.14
predict:  1.8801802734602342

expected:  0.94
predict:  1.1560563597120763

expected:  0.21
predict:  1.0637299265019307

expected:  3.18
predict:  1.4270676168566578

expected:  0.92
predict:  2.22146857421428

expected:  0.27
predict:  0.9067515564244171

expected:  1.73
predict:  2.183151008985228

expected:  0.39
predict:  1.716693426565493

expected:  0.48
predict:  0.48715542325670885



'MAE: 0.08379484793547873'

catboost model loaded from models/2023-02-03T22:14:13-min_quantile0.7-score-181.0.cat
expected:  0.64
predict:  2.3608035669807244

expected:  0.55
predict:  1.7474615580813881

expected:  1.14
predict:  3.5248107007651903

expected:  0.94
predict:  1.9688971325035864

expected:  0.21
predict:  1.9099592071608833

expected:  3.18
predict:  3.2957777508395507

expected:  0.92
predict:  3.593111756974591

expected:  0.27
predict:  2.4080497830193295

expected:  1.73
predict:  3.5283908393601426

expected:  0.39
predict:  2.969666831679426

expected:  0.48
predict:  1.3360804275168428



'MAE: 0.18193009554881656'

catboost model loaded from models/2023-02-03T22:34:15-min_poisson-slower-score-126.0.cat
expected:  0.64
predict:  1.534338468448063

expected:  0.55
predict:  1.5450541665955726

expected:  1.14
predict:  3.413758998806712

expected:  0.94
predict:  1.3798295895300274

expected:  0.21
predict:  1.8491999993904142

expected:  3.18
predict:  1.831750936722876

expected:  0.92
predict:  1.6047894958273117

expected:  0.27
predict:  2.4040305231197414

expected:  1.73
predict:  3.1315355095052855

expected:  0.39
predict:  1.694229529157234

expected:  0.48
predict:  0.9933527719199626



'MAE: 0.1362836811557745'

catboost model loaded from models/2023-02-08T22:17:55-min_-mape-score--8.0.cat
expected:  0.64
predict:  0.7993453163109118

expected:  0.55
predict:  0.36923367485652936

expected:  1.14
predict:  1.3261623056900913

expected:  0.94
predict:  1.0814053580158123

expected:  0.21
predict:  1.373878836878918

expected:  3.18
predict:  1.4898651576076096

expected:  0.92
predict:  1.6374651170795764

expected:  0.27
predict:  0.9782288625167543

expected:  1.73
predict:  1.9219171206817758

expected:  0.39
predict:  2.633520834479304

expected:  0.48
predict:  0.26335578073576854



'MAE: 0.07599469138453237'

{0.05694368905457228: 'models/2023-02-03T21:50:04-min_quantile0.3-score--135.0.cat', 0.07169034836084226: 'models/2023-02-03T18:06:57-min_mape-score--17.0.cat', 0.07599469138453237: 'models/2023-02-08T22:17:55-min_-mape-score--8.0.cat', 0.08379484793547873: 'models/2023-02-03T22:09:25-min_quantile0.5-score-73.0.cat', 0.09842981022696123: 'models/2023-02-03T21:57:57-min_huber0.4-score-108.0.cat', 0.1362836811557745: 'models/2023-02-03T22:34:15-min_poisson-slower-score-126.0.cat', 0.14295234942861318: 'models/2023-02-03T21:34:07-min_poisson-score-191.0.cat', 0.1500343475029808: 'models/2023-02-03T20:13:00-min_rmse-score--10.0.cat', 0.18193009554881656: 'models/2023-02-03T22:14:13-min_quantile0.7-score-181.0.cat', 0.47675132251713637: 'models/2023-02-03T21:48:03-min_quantile0.9-score--674.0.cat'}
