In [10]:
import cloudpickle
import numpy as np
import pandas as pd
import json
from numerapi import NumerAPI

In [6]:
cloudpickle.__version__

'2.2.1'

In [11]:
napi = NumerAPI()
DATA_VERSION = "v4.3"
napi.download_dataset(f"{DATA_VERSION}/features.json");
# Load data
feature_metadata = json.load(open(f"{DATA_VERSION}/features.json"))
feature_cols = feature_metadata["feature_sets"]["all"]

2024-02-11 14:27:31,657 INFO numerapi.utils: target file already exists
2024-02-11 14:27:31,657 INFO numerapi.utils: download complete


In [5]:
favourite_targets = [
  "target_cyrus_v4_20",
  "target_victor_v4_20",
  "target_waldo_v4_20",
  "target_teager_v4_20",
]
models = {
    fl: cloudpickle.load(open(f"{fl}.pkl", 'rb'))
    for fl in favourite_targets
}

In [7]:
# we now give you access to the live_benchmark_models if you want to use them in your ensemble
def predict_ensemble(
    live_features: pd.DataFrame,
) -> pd.DataFrame:
    target2wt = {
        'target_cyrus_v4_20': 0.5,
        'target_victor_v4_20': 0.1,
        'target_waldo_v4_20': 0.1,
        'target_teager_v4_20': 0.3,
    }
    wts = np.array([target2wt[t] for t in favourite_targets])
    print('Weights:', wts)
    # generate predictions from each model
    predictions = pd.DataFrame(index=live_features.index)
    for target in favourite_targets:
        predictions[target] = models[target].predict(live_features[feature_cols])
    # ensemble predictions
    ensemble =  (predictions.rank(pct=True).dot(wts))
    # format submission
    submission = ensemble.rank(pct=True)
    return submission.to_frame("prediction")

In [12]:
# Quick test
napi.download_dataset(f"{DATA_VERSION}/live_int8.parquet")
live_features = pd.read_parquet(f"{DATA_VERSION}/live_int8.parquet", columns=feature_cols)
predict_ensemble(live_features)

2024-02-11 14:27:34,775 INFO numerapi.utils: target file already exists
2024-02-11 14:27:34,776 INFO numerapi.utils: download complete


Weights: [0.5 0.1 0.1 0.3]


Unnamed: 0_level_0,prediction
id,Unnamed: 1_level_1
n000cda5e3638175,0.234346
n001760719081fe2,0.717520
n001fed8911ba7b9,0.036100
n0028a6e86243c0b,0.330818
n0029da0af4a3d33,0.346930
...,...
nffa67470d3b585e,0.016113
nffe6af3074d8a0f,0.907608
nffee5f593c4a81c,0.139506
nfff0f0d2c9cb03f,0.377116


In [13]:
def save(mdl, fl_nm):
    p = cloudpickle.dumps(mdl)
    fl = f"{fl_nm}.pkl"
    print(f"Saving {fl}...")
    with open(fl, "wb") as f:
        f.write(p)

In [14]:
save(mdl=predict_ensemble, fl_nm='brazil_nb2_v43_c5t3v1w1_10k004lr_240211')

Saving brazil_nb2_v43_c5t3v1w1_10k004lr_240211.pkl...
