In [5]:
import glob
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from glob import glob
from spreadsurfer.price_engine import FeatureEngineer
from catboost import CatBoostRegressor
from sklearn.preprocessing import StandardScaler

## chosen models
# min: models/2023-02-03T22:09:25-min_quantile0.5-score-73.0.cat
# max: models/2023-02-03T22:14:13-max_quantile0.7-score--154.0.cat


# cat_filename = 'models/2023-02-03T22:09:25-max_quantile0.5-score-77.0.cat'
# cat_filename = 'models/2023-02-03T22:14:13-max_quantile0.7-score--154.0.cat'

# cat_filename = 'models/2023-02-03T18:06:57-max_mape-score--21.0.cat'

# cat_filename = 'models/2023-02-03T21:57:57-min_huber0.4-score-108.0.cat'
# cat_filename = 'models/2023-02-03T21:34:07-min_poisson-score-191.0.cat'
# cat_filename = 'models/2023-02-03T21:50:04-min_quantile0.3-score--135.0.cat'
# cat_filename = '2023-02-03T20:13:00-min_rmse-score--10.0.cat'
# cat_filename = 'models/2023-02-01T09:14:50-mape-score-161.0.cat'

def load_and_score_mae(cat_filename, min=False):
    model = CatBoostRegressor()
    model.load_model(fname=cat_filename)
    print(f'catboost model loaded from {cat_filename}')

    pipeline = Pipeline(steps=[
        ('preprocessor', FeatureEngineer()),
        ('model', model)
    ])

    with open("neverseen.log","r") as f:
        input_from_log = ''.join([x for x in f.readlines() if 'collected' in x][-100:])

    mae = 0
    count = 0
    for line in input_from_log.split('\n'):
        if not line: continue
        count += 1
        json = '{' + line.split('{')[1]
        json = json.replace('nan', '0')
        sample = pd.DataFrame([eval(json)])
        if sample.wave_direction[0] not in ['min', 'max']: continue

        real = sample.last_price_delta_since_stabilized[0]
        if abs(real) < 0.2: continue
        sample.drop('last_price_delta_since_stabilized', axis=1, inplace=True)

        sample.loc[sample['wave_direction'] == 'min', 'wave_direction'] = 1
        sample.loc[sample['wave_direction'] == 'max', 'wave_direction'] = -1
        sample = sample.astype({"wave_direction": 'float64'})

        if sample['wave_direction'][0] != (1 if min else -1):
            continue

        print('expected: ', real)
        guess = model.predict(sample)[0]
        guess *= 3.5
        print('predict: ', guess)
        mae += abs(real - guess)
        print()
    display(f'MAE: {mae / count}')
    return mae / count

# load_and_score_mae(cat_filename)

## search for best model
# models = [x for x in glob('models/*max_quantile*.cat')]
models = [x for x in glob('models/*min*mape*')]
display(models)
results = {}
for i in models:
    try:
        mae = load_and_score_mae(i, min=True)
        results[mae] = i
    except Exception:
        print('skip with error')

print(dict(sorted(results.items())))


['models/2023-02-03T18:06:57-min_mape-score--17.0.cat',
 'models/2023-02-08T22:17:55-min_-mape-score--8.0.cat']

catboost model loaded from models/2023-02-03T18:06:57-min_mape-score--17.0.cat
expected:  0.64
predict:  1.0794794421804141

expected:  0.55
predict:  1.0700347857152936

expected:  1.14
predict:  1.5140650045999486

expected:  0.94
predict:  0.9187105515700872

expected:  0.21
predict:  1.298673359045788

expected:  3.18
predict:  1.2798169103372312

expected:  0.92
predict:  1.6930520000414075

expected:  0.27
predict:  0.97366620408172

expected:  1.73
predict:  1.6755114580655253

expected:  0.39
predict:  1.639617634798023

expected:  0.48
predict:  0.5244853255944758



'MAE: 0.07169034836084226'

catboost model loaded from models/2023-02-08T22:17:55-min_-mape-score--8.0.cat
expected:  0.64
predict:  0.7993453163109118

expected:  0.55
predict:  0.36923367485652936

expected:  1.14
predict:  1.3261623056900913

expected:  0.94
predict:  1.0814053580158123

expected:  0.21
predict:  1.373878836878918

expected:  3.18
predict:  1.4898651576076096

expected:  0.92
predict:  1.6374651170795764

expected:  0.27
predict:  0.9782288625167543

expected:  1.73
predict:  1.9219171206817758

expected:  0.39
predict:  2.633520834479304

expected:  0.48
predict:  0.26335578073576854



'MAE: 0.07599469138453237'

{0.07169034836084226: 'models/2023-02-03T18:06:57-min_mape-score--17.0.cat', 0.07599469138453237: 'models/2023-02-08T22:17:55-min_-mape-score--8.0.cat'}
