In [28]:
import glob
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from glob import glob
from spreadsurfer.price_engine import FeatureEngineer
from catboost import CatBoostRegressor
from sklearn.preprocessing import StandardScaler

## chosen models
# min: models/2023-02-03T22:09:25-min_quantile0.5-score-73.0.cat
# max: models/2023-02-03T22:14:13-max_quantile0.7-score--154.0.cat


# cat_filename = 'models/2023-02-03T22:09:25-max_quantile0.5-score-77.0.cat'
# cat_filename = 'models/2023-02-03T22:14:13-max_quantile0.7-score--154.0.cat'

# cat_filename = 'models/2023-02-03T18:06:57-max_mape-score--21.0.cat'

# cat_filename = 'models/2023-02-03T21:57:57-min_huber0.4-score-108.0.cat'
# cat_filename = 'models/2023-02-03T21:34:07-min_poisson-score-191.0.cat'
# cat_filename = 'models/2023-02-03T21:50:04-min_quantile0.3-score--135.0.cat'
# cat_filename = '2023-02-03T20:13:00-min_rmse-score--10.0.cat'
# cat_filename = 'models/2023-02-01T09:14:50-mape-score-161.0.cat'

def load_and_score_mae(cat_filename, min=False):
    model = CatBoostRegressor()
    model.load_model(fname=cat_filename)
    print(f'catboost model loaded from {cat_filename}')

    pipeline = Pipeline(steps=[
        ('preprocessor', FeatureEngineer()),
        ('model', model)
    ])

    with open("neverseen.log","r") as f:
        input_from_log = ''.join([x for x in f.readlines() if 'collected' in x][-100:])

    mae = 0
    count = 0
    for line in input_from_log.split('\n'):
        if not line: continue
        count += 1
        json = '{' + line.split('{')[1]
        json = json.replace('nan', '0')
        sample = pd.DataFrame([eval(json)])
        if sample.wave_direction[0] not in ['min', 'max']: continue

        real = sample.last_price_delta_since_stabilized[0]
        if abs(real) < 0.2: continue
        sample.drop('last_price_delta_since_stabilized', axis=1, inplace=True)

        sample.loc[sample['wave_direction'] == 'min', 'wave_direction'] = 1
        sample.loc[sample['wave_direction'] == 'max', 'wave_direction'] = -1
        sample = sample.astype({"wave_direction": 'float64'})

        if sample['wave_direction'][0] != (1 if min else -1):
            continue

        print('expected: ', real)
        guess = model.predict(sample)[0]
        guess *= 3.5
        print('predict: ', guess)
        mae += abs(real - guess)
        print()
    display(f'MAE: {mae / count}')
    return mae / count

# load_and_score_mae(cat_filename)

## search for best model
# models = [x for x in glob('models/*max_quantile*.cat')]
models = [x for x in glob('models/2023-02-07T13:50:42-max_quantile0.7-score--173.0.cat')]
display(models)
results = {}
for i in models:
    try:
        mae = load_and_score_mae(i, min=False)
        results[mae] = i
    except Exception:
        print('skip with error')

print(dict(sorted(results.items())))


['models/2023-02-07T13:50:42-max_quantile0.7-score--173.0.cat']

catboost model loaded from models/2023-02-07T13:50:42-max_quantile0.7-score--173.0.cat
expected:  -0.25
predict:  -0.08094569746677487

expected:  -0.5
predict:  -0.024423963717273862

expected:  -0.23
predict:  -0.07144636989432421

expected:  -1.59
predict:  -0.5745099110430822

expected:  -0.54
predict:  -1.0293334269329013

expected:  -0.21
predict:  -0.547050378469398

expected:  -0.33
predict:  -0.33459057043226903

expected:  -0.43
predict:  -0.3051426659097251

expected:  -1.01
predict:  -0.10793831487688496

expected:  -0.2
predict:  -0.3669514541890699

expected:  -0.3
predict:  -0.3053586696676872

expected:  -0.39
predict:  -0.07822257749130238

expected:  -0.27
predict:  -0.03138915423028016

expected:  -0.27
predict:  -0.7253583152301317

expected:  -2.4
predict:  -1.3694536889526594



'MAE: 0.058851704713391496'

{0.058851704713391496: 'models/2023-02-07T13:50:42-max_quantile0.7-score--173.0.cat'}
