In [None]:
def set_latex():
    for i in range(2):
        import matplotlib
        import matplotlib.pyplot as plt

        plt.rc('text', usetex=True)
        plt.rc('font', family='serif')

        plt.style.use("default")
        plt.rcParams["font.size"] = 15

        plt.rcParams['font.family'] = 'Times New Roman'
        plt.rcParams['mathtext.fontset'] = 'stix'

        try:
            del matplotlib.font_manager.weight_dict['roman']
            matplotlib.font_manager._rebuild()
        except:
            pass

In [None]:
import itertools
import math
import os
import pickle
import warnings
from typing import Dict, List, Tuple

import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm.notebook import tqdm

warnings.filterwarnings("ignore")

In [None]:
plt.style.use("default")
plt.rcParams["font.size"]=15

In [None]:
set_latex()

## Load dataset

For downloading dataset, see https://github.com/LeoYu/neural-tangent-kernel-UCI

In [None]:
DATA_DIR = os.path.join("./data/")

def get_datasize(dic: Dict) -> Tuple[int, int, int, int]:
    c = int(dic["n_clases="])
    d = int(dic["n_entradas="])
    n_train_val = int(dic["n_patrons1="])
    if "n_patrons2=" in dic:
        n_test = int(dic["n_patrons2="])
    else:
        n_test = 0
    n_tot = n_train_val + n_test
    return n_tot, n_train_val, n_test, d, c


def load_data(dic: Dict) -> Tuple[np.array, np.array]:
    f = open(os.path.join(DATA_DIR, dic["dataset"], dic["fich1="]), "r").readlines()[1:]
    X = np.asarray(list(map(lambda x: list(map(float, x.split()[1:-1])), f)))
    y = np.asarray(list(map(lambda x: int(x.split()[-1]), f)))
    return X, y

In [None]:
MAX_TOT = 1000
MAX_FEATURES = 10
MAX_CLASSES = 2

datasets = []

n_dataset = 0
for idx, dataset in enumerate(sorted(os.listdir(DATA_DIR))): 
    if not os.path.isfile(os.path.join(DATA_DIR, dataset, f"{dataset}.txt")):
        continue

    # load configuration
    dic = dict()
    dic["dataset"] = dataset
    if dic["dataset"]!="tic-tac-toe": # use only tic-tac-toe
        continue

    for k, v in map(
        lambda x: x.split(),
        open(os.path.join(DATA_DIR, dataset, f"{dataset}.txt"), "r").readlines(),
    ):
        dic[k] = v

    # Check skip or not
    n_tot, n_train_val, n_test, n_feature, n_class = get_datasize(dic)
    if (n_tot > MAX_TOT) or (n_test > 0) or (n_feature >  MAX_FEATURES) or (n_class > MAX_CLASSES):
        continue
    else:
        print(f"-----{idx}, {dataset}, {n_tot}, {n_feature}, {n_class}-----")
        n_dataset += 1

    # load dataset
    X, y = load_data(dic)
    fold = list(
        map(
            lambda x: list(map(int, x.split())),
            open(
                os.path.join(DATA_DIR, dic["dataset"], "conxuntos_kfold.dat"), "r"
            ).readlines(),
        )
    )

## Kernels

In [None]:
def calc_tau(alpha: float, S: np.array, diag_i: np.array, diag_j: np.array) -> np.array:
    tau = 1 / 4 + 1 / (2 * math.pi) * np.arcsin(
        ((alpha**2) * S)
        / (np.sqrt(((alpha**2) * diag_i + 0.5) * ((alpha**2) * diag_j + 0.5)))
    )
    return tau


def calc_tau_dot(
    alpha: float, S: np.array, diag_i: np.array, diag_j: np.array
) -> np.array:
    tau_dot = (
        (alpha**2)
        / (math.pi)
        * 1
        / np.sqrt(
            (2 * (alpha**2) * diag_i + 1) * (2 * (alpha**2) * diag_j + 1)
            - (4 * (alpha**4) * (S**2))
        )
    )
    return tau_dot

In [None]:
def hard_kernel(X: np.array, alpha: float, beta: float, finetune: bool, rulelist: list):
    S_list = []
    tau_list = []
    tau_dot_list = []

    for feature_index in range(len(X[0])):
        S = np.outer(X[:, feature_index], X[:, feature_index].T) + beta**2
        S_all = np.matmul(X, X.T) + beta**2
        if finetune:
            S_list.append(S_all)
        else:
            S_list.append(S)

        _diag = [S[i, i] for i in range(len(S))]
        diag_i = np.array(_diag * len(_diag)).reshape(len(_diag), len(_diag))
        diag_j = diag_i.transpose()
        tau_list.append(calc_tau(alpha, S, diag_i, diag_j))
        tau_dot_list.append(calc_tau_dot(alpha, S, diag_i, diag_j))

    K = np.zeros((X.shape[0], X.shape[0]))

    H = np.zeros_like(S_list[0])
    for rules in tqdm(rulelist, leave=False):
        # Internal nodes
        for i, s in enumerate(rules):
            ts = rules[0:i] + rules[i + 1 :]
            _H_nodes = S_list[s] * tau_dot_list[s]
            for t in ts:
                _H_nodes *= tau_list[t]
            K += _H_nodes * (2 ** len(rules))
        _H_leaves = np.ones_like(K)

        # Leaves
        for tau in [tau_list[i] for i in rules]:
            _H_leaves *= tau
        K += _H_leaves * (2 ** len(rules))

    return K / len(rulelist)

In [None]:
def soft_kernel(X: np.array, depth: int, alpha: float, beta: float):
    K = np.zeros((depth, X.shape[0], X.shape[0]))
    S = np.matmul(X, X.T) + beta**2
    _diag = [S[i, i] for i in range(len(S))]
    diag_i = np.array(_diag * len(_diag)).reshape(len(_diag), len(_diag))
    diag_j = diag_i.transpose()

    tau = calc_tau(alpha, S, diag_i, diag_j)
    tau_dot = calc_tau_dot(alpha, S, diag_i, diag_j)

    for i, depth in enumerate((range(1, depth + 1, 1))):
        H = (2 * S * (2 ** (depth - 1)) * depth * tau_dot * tau ** (depth - 1)) + (
            (2**depth) * (tau**depth)
        )
        K[depth - 1] = H

    return K[::-1][0]

In [None]:
def extract_kernels(X, alpha, beta, degree):
    assert degree in (1, 2, 3)
    patterns = list(itertools.combinations(np.arange(X.shape[1]), 1))

    if degree >= 2:
        patterns.extend(list(itertools.combinations(np.arange(X.shape[1]), 2)))

    if degree >= 3:
        patterns.extend(list(itertools.combinations(np.arange(X.shape[1]), 3)))

    patterns = [list(l) for l in patterns]
    patterns = [[pattern] for pattern in patterns]

    kernels_aaa = []
    kernels_aai = []

    for pattern in tqdm(patterns, leave=False):
        kernels_aaa.append(
            hard_kernel(X, alpha=alpha, beta=beta, finetune=False, rulelist=pattern)
        )
        kernels_aai.append(
            hard_kernel(X, alpha=alpha, beta=beta, finetune=True, rulelist=pattern)
        )

    return kernels_aaa, kernels_aai, patterns

## MKL

In [None]:
from MKLpy.algorithms import EasyMKL

In [None]:
kernels_aaa, kernels_aai, patterns = extract_kernels(X, alpha=2.0, beta=0.5, degree=3)

In [None]:
mkl = EasyMKL()
ker_matrix_aaa_full = mkl.combine_kernels(kernels_aaa, y)
ker_matrix_aai_full = mkl.combine_kernels(kernels_aai, y)

In [None]:
plt.figure(figsize=(20,2))
x = range(len(ker_matrix_aaa_full.weights))
plt.bar(x, ker_matrix_aaa_full.weights, alpha=0.5, label="AAA")
plt.bar(x, ker_matrix_aai_full.weights, alpha=0.5, label="AAI")
plt.xticks(
    x,
    [str(sorted(set(i[0]))).replace("[", "{").replace("]", "}") for i in patterns],
    rotation=75,
    fontsize=10
)
plt.xlim(-1.5, len(patterns)+0.5)
plt.axvline(45, color="red", linestyle="dashed", linewidth=1)
plt.axvline(60, color="red", linestyle="dashed", linewidth=1)
plt.axvline(66, color="red", linestyle="dashed", linewidth=1)
plt.axvline(86, color="red", linestyle="dashed", linewidth=1)
plt.axvline(100, color="red", linestyle="dashed", linewidth=1)
plt.axvline(105, color="red", linestyle="dashed", linewidth=1)
plt.axvline(109, color="red", linestyle="dashed", linewidth=1)
plt.axvline(128, color="red", linestyle="dashed", linewidth=1)

plt.grid(linestyle="dotted")
plt.ylabel("Weight")
x = range(len(ker_matrix_aai_full.weights))
plt.grid(linestyle="dotted")
plt.xlabel("Feature Combination")
plt.ylabel("Weight")
plt.legend()
plt.savefig("./figures/tictactoe_weight.pdf", bbox_inches="tight", pad_inches=0.10)

In [None]:
def get_optimal_weight(size):
    optimal = np.zeros_like(range(size))/1.
    optimal[45] = 1
    optimal[60] = 1
    optimal[66] = 1
    optimal[86] = 1
    optimal[100] = 1
    optimal[105] = 1
    optimal[109] = 1
    optimal[128] = 1
    optimal/=sum(optimal)
    return optimal

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


def svm(kernels, y, weights, reg, train_index, test_index):
    model = SVC(kernel="precomputed", C=1.0, probability=True)

    K = np.zeros_like(kernels[0])
    for j in range(len(weights)):
        K += kernels[j] * weights[j]

    K_train = K[train_index][:, train_index]
    K_test = K[test_index][:, train_index]

    y_train = y[train_index]
    y_test = y[test_index]

    model.fit(K_train, y_train)
    test_pred = model.predict(K_test)
    test_pred_proba = model.predict_proba(K_test)[:, 1]

    accuracy = accuracy_score(y_test, test_pred)
 
    return accuracy

In [None]:
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier


def rf_benchmark(
    X: np.array,
    y: np.array,
    train_index: list,
    test_index: list,
    max_depth: int,
    n_estimators: int,
    max_features: int,
) -> Tuple[float, List[float]]:
    model = RandomForestClassifier(
        max_depth=max_depth, n_estimators=n_estimators, max_features=max_features
    )
    model.fit(X[train_index], y[train_index])
    test_pred = model.predict(X[test_index])
    test_pred_proba = model.predict_proba(X[test_index])[:, 1]

    accuracy = accuracy_score(y[test_index], test_pred)

    return accuracy


def gbdt_benchmark(
    X: np.array,
    y: np.array,
    train_index: list,
    test_index: list,
    max_depth: int,
    n_estimators: int,
    max_features: int,
) -> Tuple[float, List[float]]:
    model = GradientBoostingClassifier(
        max_depth=max_depth, n_estimators=n_estimators, max_features=max_features
    )
    model.fit(X[train_index], y[train_index])
    test_pred = model.predict(X[test_index])
    test_pred_proba = model.predict_proba(X[test_index])[:, 1]

    accuracy = accuracy_score(y[test_index], test_pred)

    return accuracy

## Forest benchmark

In [None]:
def forest_benchmark(y, train_index, test_index):
    acc_dict = {}
    
    max_features="sqrt"
    
    for max_depth in (3,5,7,None):
        for n_estimators in (125, 250, 500, 1000, 2000, 4000):
            acc_dict[f"rf_{max_depth}_{n_estimators}"] = rf_benchmark(X, y, train_index, test_index, max_depth=max_depth, n_estimators=n_estimators, max_features=max_features)
            acc_dict[f"gbdt_{max_depth}_{n_estimators}"] = gbdt_benchmark(X, y, train_index, test_index, max_depth=max_depth, n_estimators=n_estimators, max_features=max_features)

    return acc_dict

if False:
    acc_dicts = []

    for i in tqdm(range(12), leave=False):
        for repeat in tqdm(range(4), leave=False):
            test_index, train_index = fold[repeat * 2], fold[repeat * 2 + 1]
            assert len(test_index) > len(train_index)
            acc_dict = forest_benchmark(y, train_index, test_index)
            acc_dict["seed"] = i
            acc_dicts.append(acc_dict)

        with open('forest_acc_dicts.pkl', 'wb') as file:
            pickle.dump(acc_dicts, file)
            
with open('forest_acc_dicts.pkl', 'rb') as file:
    forest_acc_dicts= pickle.load(file)

In [None]:
all_experiments_df = pd.DataFrame()
for i, data in enumerate(forest_acc_dicts):
    temp_df = pd.Series(data).reset_index()
    temp_df.columns = ['label', f'value_{i}']
    if all_experiments_df.empty:
        all_experiments_df = temp_df
    else:
        all_experiments_df = all_experiments_df.merge(temp_df, on='label')

forest_mean = pd.DataFrame(all_experiments_df.set_index("label").T).groupby(by="seed").mean().mean()
forest_std = pd.DataFrame(all_experiments_df.set_index("label").T).groupby(by="seed").mean().std()

In [None]:
def benchmark(kernels_aaa, kernels_aai,kernel_soft,  y, train_index, test_index, alpha, beta, repeat, optimal):
    acc_dict = {}
    
    acc_dict["alpha"] = alpha
    acc_dict["beta"] = beta
    acc_dict["repeat"] = repeat
    
    # AAA
    acc_dict["aaa_mkl"] = svm(kernels_aaa, y, np.array(ker_matrix_aaa.weights), 1.0, train_index, test_index)
    acc_dict["aaa_optimal"] = svm(kernels_aaa, y, optimal, 1.0, train_index, test_index)
    acc_dict["aaa_benchmark"] = svm(kernels_aaa, y, np.ones_like(ker_matrix_aaa.weights)/len(ker_matrix_aaa.weights), 1.0, train_index, test_index)

    # AAI
    acc_dict["aai_mkl"] = svm(kernels_aai, y, np.array(ker_matrix_aai.weights), 1.0, train_index, test_index)
    acc_dict["aai_optimal"] = svm(kernels_aai, y, optimal, 1.0, train_index, test_index)
    acc_dict["aai_benchmark"] = svm(kernels_aai, y, np.ones_like(ker_matrix_aai.weights)/len(ker_matrix_aaa.weights), 1.0, train_index, test_index)

    # Soft
    acc_dict["soft"] = svm([kernel_soft] * len(kernels_aaa), y, np.ones_like(ker_matrix_aaa.weights)/len(ker_matrix_aaa.weights), 1.0, train_index, test_index)
    return acc_dict

## Grid Search

In [None]:
degree = 3

acc_dicts = []

if False:
    for alpha in tqdm([0.5, 1.0, 2.0, 4.0], leave=False):
        for beta in tqdm([0.1, 0.5, 1.0], leave=False):
            kernel_soft = soft_kernel(X, depth=degree, alpha=alpha, beta=beta)
            kernels_aaa, kernels_aai, patterns = extract_kernels(X, alpha=alpha, beta=beta, degree=degree)

            for repeat in tqdm(range(4), leave=False):
                test_index, train_index = fold[repeat * 2], fold[repeat * 2 + 1]
                assert len(test_index) > len(train_index)
                mkl = EasyMKL()

                train_kernels_aaa = [i[train_index][:, train_index] for i in kernels_aaa]
                train_kernels_aai = [i[train_index][:, train_index] for i in kernels_aai]
                ker_matrix_aaa = mkl.combine_kernels(train_kernels_aaa, y[train_index])
                ker_matrix_aai = mkl.combine_kernels(train_kernels_aai, y[train_index])                    

                optimal = get_optimal_weight(len(ker_matrix_aaa.weights))

                acc_dict = benchmark(kernels_aaa, kernels_aai, kernel_soft, y, train_index, test_index, alpha, beta, repeat, optimal)

                acc_dicts.append(acc_dict)

    with open('acc_dicts.pkl', 'wb') as file:
        pickle.dump(acc_dicts, file)

In [None]:
with open('acc_dicts.pkl', 'rb') as file:
    acc_dicts= pickle.load(file)

In [None]:
beta = 0.5

df = pd.DataFrame(acc_dicts)

_df = df[df["beta"]==beta].groupby(by=["alpha", "beta"]).mean()[
    ["aaa_mkl", "aaa_optimal", "aaa_benchmark", "aai_mkl", "aai_optimal", "aai_benchmark", "soft"]
].reset_index()

x = range(4)

plt.figure(figsize=(9,5))
_df["aaa_mkl"].plot(label="AAA (MKL)", color="red", linestyle="solid", marker="o")
_df["aaa_optimal"].plot(label="AAA (Optimal)", color="red", linestyle="dashed", marker="^")
_df["aaa_benchmark"].plot(label="AAA (Benchmark)", color="red", linestyle="dotted", marker="v" )
_df["aai_mkl"].plot(label="AAI (MKL)", color="blue", linestyle="solid", marker="o")
_df["aai_optimal"].plot(label="AAI (Optimal)", color="blue", linestyle="dashed", marker="^")
_df["aai_benchmark"].plot(label="AAI (Benchmark)", color="blue", linestyle="dotted", marker="v")
_df["soft"].plot(label="Oblique", color="black", marker="s")

rf3_mean = forest_mean["rf_3_1000"]
rf3_std = forest_std["rf_3_1000"]

rfmax_mean = forest_mean["rf_None_1000"]
rfmax_std = forest_std["rf_None_1000"]

gbdt3_mean = forest_mean["gbdt_3_1000"]
gbdt3_std = forest_std["gbdt_3_1000"]

gbdtmax_mean = forest_mean["gbdt_None_1000"]
gbdtmax_std = forest_std["gbdt_None_1000"]

plt.plot(x, [rf3_mean]*len(x), color="green", linestyle=(3, (6, 6)), alpha=0.7, label="RF (max_depth=3)", linewidth=1.5)
plt.fill_between(x, rf3_mean - rf3_std, rf3_mean+rf3_std, color='green', alpha=0.1)
plt.plot(x, [rfmax_mean]*len(x), color="green", linestyle=(3, (1, 1)), alpha=0.7, label="RF (max_depth=None)", linewidth=1.5)
plt.fill_between(x, rfmax_mean-rfmax_std, rfmax_mean+rfmax_std, color='green', alpha=0.1)

plt.plot(x, [gbdt3_mean]*len(x), color="orange", linestyle=(0, (6,6)),  alpha=0.7, label="GBDT (max_depth=3)", linewidth=1.5)
plt.fill_between(x, gbdt3_mean-gbdt3_std, gbdt3_mean+gbdt3_std, color='orange', alpha=0.1)
plt.plot(x, [gbdtmax_mean]*len(x), color="orange", linestyle=(0, (1,1)),  alpha=0.7, label="GBDT (max_depth=None)", linewidth=1.5)
plt.fill_between(x, gbdtmax_mean-gbdtmax_std, gbdtmax_mean+gbdtmax_std, color='orange', alpha=0.1)

plt.xticks([0, 1, 2, 3], [0.5, 1.0, 2.0, 4.0])
plt.xlabel("$\\alpha$")
plt.ylabel("Accuracy")
plt.grid(linestyle="dotted")
plt.legend(loc="upper left", bbox_to_anchor=(1,0.95))
plt.tight_layout()
plt.savefig(f"./figures/tictactoe_metrics.pdf", bbox_inches="tight", pad_inches=0.10)

In [None]:
plt.figure(figsize=(15,5))
for i, beta in enumerate([0.1, 0.5, 1.0]):
    plt.subplot(1,3, i+1)
    _df = df[df["beta"]==beta].groupby(by=["alpha", "beta"]).mean()[
        ["aaa_mkl", "aaa_optimal", "aaa_benchmark", "aai_mkl", "aai_optimal", "aai_benchmark", "soft"]
    ].reset_index()

    x = range(4)

    _df["aaa_mkl"].plot(label="AAA (MKL)", color="red", linestyle="solid", marker="o")
    _df["aaa_optimal"].plot(label="AAA (Optimal)", color="red", linestyle="dashed", marker="^")
    _df["aaa_benchmark"].plot(label="AAA (Benchmark)", color="red", linestyle="dotted", marker="v" )
    _df["aai_mkl"].plot(label="AAI (MKL)", color="blue", linestyle="solid", marker="o")
    _df["aai_optimal"].plot(label="AAI (Optimal)", color="blue", linestyle="dashed", marker="^")
    _df["aai_benchmark"].plot(label="AAI (Benchmark)", color="blue", linestyle="dotted", marker="v")
    _df["soft"].plot(label="Oblique", color="black", marker="s")

    rf3_mean = forest_mean["rf_3_1000"]
    rf3_std = forest_std["rf_3_1000"]

    rfmax_mean = forest_mean["rf_None_1000"]
    rfmax_std = forest_std["rf_None_1000"]

    gbdt3_mean = forest_mean["gbdt_3_1000"]
    gbdt3_std = forest_std["gbdt_3_1000"]

    gbdtmax_mean = forest_mean["gbdt_None_1000"]
    gbdtmax_std = forest_std["gbdt_None_1000"]

    plt.plot(x, [rf3_mean]*len(x), color="green", linestyle=(3, (6, 6)), alpha=0.7, label="RF (max_depth=3)", linewidth=1.5)
    plt.plot(x, [rfmax_mean]*len(x), color="green", linestyle=(3, (1, 1)), alpha=0.7, label="RF (max_depth=None)", linewidth=1.5)
    plt.plot(x, [gbdt3_mean]*len(x), color="orange", linestyle=(0, (6,6)),  alpha=0.7, label="GBDT (max_depth=3)", linewidth=1.5)
    plt.plot(x, [gbdtmax_mean]*len(x), color="orange", linestyle=(0, (1,1)),  alpha=0.7, label="GBDT (max_depth=None)", linewidth=1.5)

    plt.fill_between(x, rf3_mean - rf3_std, rf3_mean+rf3_std, color='green', alpha=0.1)
    plt.fill_between(x, rfmax_mean-rfmax_std, rfmax_mean+rfmax_std, color='green', alpha=0.1)
    plt.fill_between(x, gbdt3_mean-gbdt3_std, gbdt3_mean+gbdt3_std, color='orange', alpha=0.1)
    plt.fill_between(x, gbdtmax_mean-gbdtmax_std, gbdtmax_mean+gbdtmax_std, color='orange', alpha=0.1)

    plt.xticks([0, 1, 2, 3], [0.5, 1.0, 2.0, 4.0])
    plt.xlabel("$\\alpha$")
    if beta==0.1:
        plt.ylabel("Accuracy")
    plt.grid(linestyle="dotted")
    plt.title(f"$\\beta$={beta}")

plt.figlegend(
    labels=[
        "AAA (MKL)", 
        "AAA (Benchmark)", 
        "AAA (Optimal)",
        "AAI (MKL)",
        "AAI (Benchmark)",
        "AAI (Optimal)",
        "Oblique", 
        "RF (max_depth=3)",
        "RF (max_depth=None)",        
        "GBDT (max_depth=3)",
        "GBDT (max_depth=None)",
    ],
    loc="lower center", 
    ncol=4,
    bbox_to_anchor=(0.525, -0.2)
)

plt.tight_layout()
plt.savefig(f"./figures/tictactoe_metrics_beta.pdf", bbox_inches="tight", pad_inches=0.10)

In [None]:
full_series_data = forest_mean
full_df = full_series_data.reset_index()
full_df.columns = ['label', 'value']
full_df['algorithm'] = full_df['label'].apply(lambda x: x.split('_')[0].upper())
full_df['max_depth'] = full_df['label'].apply(lambda x: x.split('_')[1])
full_df['n_estimators'] = full_df['label'].apply(lambda x: x.split('_')[2])
full_df.drop('label', axis=1, inplace=True)

full_df['x_label'] = full_df.apply(lambda row: f"max_depth={row['max_depth']}, n_estimators={row['n_estimators']}", axis=1)

plt.figure(figsize=(10, 8))
bar_plot = sns.barplot(y='x_label', x='value', hue='algorithm', data=full_df, ci=None)
for i in range(len(forest_mean)):
    if i%2:
        bar_plot.errorbar(forest_mean[i], i//2+0.2, xerr=forest_std[i], fmt='none', c='black', capsize=3)
    else:
        bar_plot.errorbar(forest_mean[i], i//2-0.2, xerr=forest_std[i], fmt='none', c='black', capsize=3)
        
plt.vlines(
    _df["aaa_benchmark"].max(), 
    ymin=-0.5,
    ymax=23.5, 
    color="red", 
    linestyle="dotted", 
    alpha=0.7, 
    label="AAA (Benchmark)",
    linewidth=2.0
)

plt.vlines(
    _df["aai_benchmark"].max(), 
    ymin=-0.5,
    ymax=23.5,
    color="blue",
    linestyle="dotted",
    alpha=0.7,
    label="AAI (Benchmark)",
    linewidth=2.0
)

plt.xlabel('Accuracy')
plt.ylabel('Parameters')
plt.xticks()
plt.legend()
plt.xlim(0.7, 1)

plt.savefig("./figures/rf_gbdt_performance.pdf", bbox_inches="tight", pad_inches=0.10)

## Hard Splitting Performance

In [None]:
if False:
    acc_dicts_hard = []
    for alpha in tqdm([1e0, 1e1, 1e2, 1e3], leave=False):
        kernel_soft = soft_kernel(X, depth=3, alpha=alpha, beta=0.5)
        kernels_aaa, kernels_aai, patterns = extract_kernels(X, alpha=alpha, beta=0.5, degree=3)
        for repeat in tqdm(range(4), leave=False):
            test_index, train_index = fold[repeat * 2], fold[repeat * 2 + 1]
            assert len(test_index) > len(train_index)
            mkl = EasyMKL()

            train_kernels_aaa = [i[train_index][:, train_index] for i in kernels_aaa]
            train_kernels_aai = [i[train_index][:, train_index] for i in kernels_aai]
            ker_matrix_aaa = mkl.combine_kernels(train_kernels_aaa, y[train_index])
            ker_matrix_aai = mkl.combine_kernels(train_kernels_aai, y[train_index])

            optimal = get_optimal_weight(len(ker_matrix_aaa.weights))

            acc_dict_hard = benchmark(kernels_aaa, kernels_aai, kernel_soft, y, train_index, test_index, alpha, beta, repeat, optimal)
            acc_dicts_hard.append(acc_dict_hard)
  
    with open('acc_dicts_hard.pkl', 'wb') as file:
        pickle.dump(acc_dicts_hard, file)

In [None]:
with open('acc_dicts_hard.pkl', 'rb') as file:
    acc_dicts_hard= pickle.load(file)

In [None]:
from scipy import special

fig = plt.figure(figsize=(10, 4))

plt.subplot(1,2,1)
df = pd.DataFrame(acc_dicts_hard)

results = df.groupby(by=["alpha", "beta"]).mean()["aaa_benchmark"].values
labels = ['$10^0$', '$10^1$', '$10^2$', '$10^3$']

rf3_mean = forest_mean["rf_3_1000"]
rf3_std = forest_std["rf_3_1000"]
rfmax_mean = forest_mean["rf_None_1000"]
rfmax_std = forest_std["rf_None_1000"]
gbdt3_mean = forest_mean["gbdt_3_1000"]
gbdt3_std = forest_std["gbdt_3_1000"]
gbdtmax_mean = forest_mean["gbdt_None_1000"]
gbdtmax_std = forest_std["gbdt_None_1000"]


plt.bar(labels, results)
x_range = np.linspace(-0.5, len(labels) - 0.5, 100)
plt.plot(np.arange(100000)-0.5, [rf3_mean]*100000, color="green", linestyle=(3, (6, 6)), alpha=0.7, label="RF(max_depth=3)", linewidth=1.5)
plt.plot(np.arange(100000)-0.5, [rfmax_mean]*100000, color="green", linestyle=(3, (1, 1)), alpha=0.7, label="RF(max_depth=None)", linewidth=1.5)
plt.plot(np.arange(100000)-0.5, [gbdt3_mean]*100000, color="orange", linestyle=(0, (6,6)),  alpha=0.7, label="GBDT(max_depth=3)", linewidth=1.5)
plt.plot(np.arange(100000)-0.5, [gbdtmax_mean]*100000, color="orange", linestyle=(0, (1,1)),  alpha=0.7, label="GBDT(max_depth=None)", linewidth=1.5)

plt.fill_between(np.arange(100000)-0.5, rf3_mean-rf3_std, rf3_mean+rf3_std, color='green', alpha=0.1)
plt.fill_between(np.arange(100000)-0.5, rfmax_mean-rfmax_std, rfmax_mean+rfmax_std, color='green', alpha=0.1)
plt.fill_between(np.arange(100000)-0.5, gbdt3_mean-gbdt3_std, gbdt3_mean+gbdt3_std, color='orange', alpha=0.1)
plt.fill_between(np.arange(100000)-0.5, gbdtmax_mean-gbdtmax_std, gbdtmax_mean+gbdtmax_std, color='orange', alpha=0.1)

plt.xlabel("$\\alpha$")
plt.ylabel('Accuracy')
plt.title("AAA (Benchmark)")
plt.ylim([0.7, 1.0])
plt.xlim(-0.5, len(labels) - 0.5)
plt.legend(loc='lower right')

plt.subplot(1,2,2)
alpha_values = [1e0, 1e1, 1e2, 1e3]
colors = ['blue', 'red', 'green', 'purple']
labels = ['$\\alpha=10^0$', '$\\alpha=10^1$', '$\\alpha=10^2$', '$\\alpha=10^3$']

for alpha, color, label in zip(alpha_values, colors, labels):
    x = np.linspace(-0.5, 0.5, 100000)
    plt.plot(x, 0.5 * special.erf(alpha * x) + 0.5, color=color, label=label)

plt.xlabel('$c$')
plt.ylabel('$\sigma(c)$')
plt.grid(linestyle="dotted")

plt.legend()
plt.tight_layout()
plt.savefig("./figures/large_alpha.pdf", bbox_inches="tight", pad_inches=0.10)