In [56]:
import glob
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from glob import glob
from spreadsurfer.price_engine import FeatureEngineer
from catboost import CatBoostRegressor
from sklearn.preprocessing import StandardScaler

## chosen models
# min: models/2023-02-03T22:09:25-min_quantile0.5-score-73.0.cat
# max: models/2023-02-03T22:14:13-max_quantile0.7-score--154.0.cat


# cat_filename = 'models/2023-02-03T22:09:25-max_quantile0.5-score-77.0.cat'
# cat_filename = 'models/2023-02-03T22:14:13-max_quantile0.7-score--154.0.cat'

# cat_filename = 'models/2023-02-03T18:06:57-max_mape-score--21.0.cat'

# cat_filename = 'models/2023-02-03T21:57:57-min_huber0.4-score-108.0.cat'
# cat_filename = 'models/2023-02-03T21:34:07-min_poisson-score-191.0.cat'
# cat_filename = 'models/2023-02-03T21:50:04-min_quantile0.3-score--135.0.cat'
# cat_filename = '2023-02-03T20:13:00-min_rmse-score--10.0.cat'
# cat_filename = 'models/2023-02-01T09:14:50-mape-score-161.0.cat'

def load_and_score_mae(cat_filename, min=True):
    model = CatBoostRegressor()
    model.load_model(fname=cat_filename)
    print(f'catboost model loaded from {cat_filename}')

    pipeline = Pipeline(steps=[
        ('preprocessor', FeatureEngineer()),
        ('model', model)
    ])

    with open("neverseen.log","r") as f:
        input_from_log = ''.join([x for x in f.readlines() if 'collected' in x][-100:])

    mae = 0
    count = 0
    for line in input_from_log.split('\n'):
        if not line: continue
        count += 1
        json = '{' + line.split('{')[1]
        json = json.replace('nan', '0')
        sample = pd.DataFrame([eval(json)])
        if sample.wave_direction[0] not in ['min', 'max']: continue

        real = sample.last_price_delta_since_stabilized[0]
        # if real < 0.1: continue
        sample.drop('last_price_delta_since_stabilized', axis=1, inplace=True)

        sample.loc[sample['wave_direction'] == 'min', 'wave_direction'] = 1
        sample.loc[sample['wave_direction'] == 'max', 'wave_direction'] = -1
        sample = sample.astype({"wave_direction": 'float64'})

        if sample['wave_direction'][0] != (1 if min else -1):
            continue

        # print('expected: ', real)
        guess = model.predict(sample)[0]
        # print('predict: ', guess)
        mae += abs(real - guess)
        # print()
    display(f'MAE: {mae / count}')
    return mae / count

# load_and_score_mae(cat_filename)

## search for best model
models = [x for x in glob('models/*.cat')]
results = {}
for i in models:
    try:
        mae = load_and_score_mae(i, min=False)
        results[mae] = i
    except Exception:
        print('skip with error')

print(dict(sorted(results.items())))


catboost model loaded from models/2023-01-21T20:37:02-score-773.0.cat


'MAE: 0.6340447099397136'

catboost model loaded from models/2023-01-24T07:03:25-score-530.0.cat


'MAE: 0.15182447975561364'

catboost model loaded from models/2023-01-24T11:19:53-score-449.0.cat
skip with error
catboost model loaded from models/2023-01-24T15:37:36-score-384.0.cat
skip with error
catboost model loaded from models/2023-01-24T15:38:18-score-462.0.cat
skip with error
catboost model loaded from models/2023-01-31T23:21:57-mape-score-174.0.cat


'MAE: 0.09797648194241798'

catboost model loaded from models/2023-01-31T23:21:57-rmse-score-314.0.cat


'MAE: 0.10060664081399295'

catboost model loaded from models/2023-02-01T09:14:50-mape-score-161.0.cat


'MAE: 0.15245220831958975'

catboost model loaded from models/2023-02-01T09:14:50-rmse-score-291.0.cat


'MAE: 0.09470442138543168'

catboost model loaded from models/2023-02-03T18:06:57-min_mape-score--17.0.cat


'MAE: 0.20436891694221063'

catboost model loaded from models/2023-02-03T18:06:57-max_mape-score--21.0.cat


'MAE: 0.08998657581561759'

catboost model loaded from models/2023-02-03T20:13:00-min_rmse-score--10.0.cat


'MAE: 0.3373574716782442'

catboost model loaded from models/2023-02-03T20:13:00-max_rmse-score--23.0.cat


'MAE: 0.14553890497539576'

catboost model loaded from models/2023-02-03T21:34:07-min_poisson-score-191.0.cat


'MAE: 0.3231884602743031'

catboost model loaded from models/2023-02-03T21:34:07-max_poisson-score--324.0.cat


'MAE: 0.09679999999999998'

catboost model loaded from models/2023-02-03T21:48:03-min_quantile0.9-score--674.0.cat


'MAE: 0.6728317712345151'

catboost model loaded from models/2023-02-03T21:48:03-max_quantile0.9-score--312.0.cat


'MAE: 0.09579618650946715'

catboost model loaded from models/2023-02-03T21:50:04-min_quantile0.3-score--135.0.cat


'MAE: 0.14950503419814612'

catboost model loaded from models/2023-02-03T21:50:04-max_quantile0.3-score-195.0.cat


'MAE: 0.16335007661511547'

catboost model loaded from models/2023-02-03T21:57:57-min_huber0.4-score-108.0.cat


'MAE: 0.24711490678780823'

catboost model loaded from models/2023-02-03T21:57:57-max_huber0.4-score-105.0.cat


'MAE: 0.10442430916125904'

catboost model loaded from models/2023-02-03T22:09:25-min_quantile0.5-score-73.0.cat


'MAE: 0.21777210153955143'

catboost model loaded from models/2023-02-03T22:09:25-max_quantile0.5-score-77.0.cat


'MAE: 0.09406877075335121'

catboost model loaded from models/2023-02-03T22:14:13-min_quantile0.7-score-181.0.cat


'MAE: 0.3534417299051567'

catboost model loaded from models/2023-02-03T22:14:13-max_quantile0.7-score--154.0.cat


'MAE: 0.08477792068489569'

catboost model loaded from models/2023-02-03T22:34:15-min_poisson-slower-score-126.0.cat


'MAE: 0.3056858892547124'

catboost model loaded from models/2023-02-03T22:34:15-max_poisson-slower-inv-score-148.0.cat


'MAE: 0.2548203179184837'

{0.08477792068489569: 'models/2023-02-03T22:14:13-max_quantile0.7-score--154.0.cat', 0.08998657581561759: 'models/2023-02-03T18:06:57-max_mape-score--21.0.cat', 0.09406877075335121: 'models/2023-02-03T22:09:25-max_quantile0.5-score-77.0.cat', 0.09470442138543168: 'models/2023-02-01T09:14:50-rmse-score-291.0.cat', 0.09579618650946715: 'models/2023-02-03T21:48:03-max_quantile0.9-score--312.0.cat', 0.09679999999999998: 'models/2023-02-03T21:34:07-max_poisson-score--324.0.cat', 0.09797648194241798: 'models/2023-01-31T23:21:57-mape-score-174.0.cat', 0.10060664081399295: 'models/2023-01-31T23:21:57-rmse-score-314.0.cat', 0.10442430916125904: 'models/2023-02-03T21:57:57-max_huber0.4-score-105.0.cat', 0.14553890497539576: 'models/2023-02-03T20:13:00-max_rmse-score--23.0.cat', 0.14950503419814612: 'models/2023-02-03T21:50:04-min_quantile0.3-score--135.0.cat', 0.15182447975561364: 'models/2023-01-24T07:03:25-score-530.0.cat', 0.15245220831958975: 'models/2023-02-01T09:14:50-mape-score-161.0.cat'