# MotherNet and TabPFN failure cases on 1d classification with step function

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

In [None]:
device = "cpu"

In [None]:
def make_data(n_classes, n_samples, n_steps):
    classes = (np.random.randint(0, n_classes) + np.cumsum(1 - 2 * np.random.randint(0, 2, size=n_steps))) % n_classes
    steps = np.sort(np.random.uniform(size=n_steps - 1))
    samples = np.random.uniform(size=n_samples)
    return samples.reshape(-1, 1), classes[np.searchsorted(steps, samples)]

In [None]:
X, y = make_data(2, 100, 5)
plt.figure(figsize=(4, 1), dpi=300)
plt.scatter(X, y, marker="o", edgecolor="k", c=y, s=20, cmap=plt.cm.tab10, norm=matplotlib.colors.Normalize(vmin=0, vmax=10, clip=False))
plt.yticks([0, 1])
plt.ylabel("class")
plt.xlabel("feature")
plt.ylim(-0.1, 1.1)
# plt.savefig("../figures/1d_classification_example.pdf", bbox_inches="tight")

In [None]:
def get_scores_steps(steps, models):
    X, y = make_data(2, 2000, steps)
    result = {'steps': steps}
    for model_name, model in models.items():
        result[model_name] = np.mean(cross_validate(model, X, y, cv=ShuffleSplit(train_size=.5), scoring="roc_auc")['test_score'])
    return result

In [None]:
from mothernet.prediction import EnsembleMeta, MotherNetClassifier, TabPFNClassifier
from sklearn.model_selection import cross_validate, StratifiedKFold, ShuffleSplit
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
import torch
import numpy as np
from joblib import Parallel, delayed
import pandas as pd

torch.set_num_threads(1)
device = "cpu"
tabpfn = TabPFNClassifier(device="cpu", model_string="tabpfn__emsize_512_nlayers_12_steps_2048_bs_32ada_lr_0.0001_1_gpu_07_24_2023_01_43_33_nooptimizer", epoch="1650", N_ensemble_configurations=3)
mothernet = EnsembleMeta(MotherNetClassifier(path="mn_d2048_H4096_L2_W32_P512_1_gpu_warm_08_25_2023_21_46_25_epoch_3940_no_optimizer.pickle", device=device), n_estimators=3)


models = {
    'MLP': MLPClassifier(max_iter=4000),
    'TabPFN': tabpfn,
    'RandomForest': RandomForestClassifier(),
    'MotherNet': mothernet

}
res = Parallel(n_jobs=-1)(delayed(get_scores_steps)(steps=steps, models=models) for i in range(20) for steps in [2, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
rank2 = pd.DataFrame.from_dict(res)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
plt.figure(figsize=(4, 3))
sns.lineplot(data=rank2.melt(id_vars="steps", var_name="model", value_name="score"), x="steps", y="score", hue="model", ax=plt.gca())
#plt.savefig("figures/one_d_classification.pdf", dpi=300, bbox_inches="tight")
plt.ylabel("ROC AUC")