In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from kdg import *
from kdg.utils import *

from functions.transfer_kdg_functions import *

## Intro to KDG

In [None]:
n_labels = 5
task_id = f"Spiral{n_labels}"
X1, y1 = generate_spirals(2000, n_class=n_labels)
X1_test, y1_test = generate_spirals(5000, n_class=n_labels)

rf_kwargs = {"n_estimators": 100, "verbose": 0}
kdg_spiral = kdg.kdf(kwargs = rf_kwargs) #could also use KDN
kdg_spiral.fit(X1, y1, task_id, **rf_kwargs)
acc = np.mean(kdg_spiral.predict(X1_test, task_id) == y1_test)

posteriors = get_posteriors(kdg_spiral, y1)

Xgen, ygen = kdg_spiral.generate_data(5000, task_id)
print(f"{task_id} accuracy: {acc}")

fig, ax = plt.subplots(1, 3, fig_size = (15, 5))
ax[0] = plot_2dsim(X1, y1, palette="bright")
ax[1] = plot_2dsim(posteriors, palette="bright")
ax[2] = plot_2dsim(Xgen, ygen, palette="bright")

## Visualize posteriors

### Force flip

In [None]:
n_train = 300
n_test = 1000

X, y = generate_spirals(n_train*2, n_class=2)
X_test, y_test = generate_spirals(n_test, n_class=2)

rf_kwargs = {"n_estimators": 100, "verbose": 0}
kdg_spiral = kdg.kdf(kwargs = rf_kwargs) #could also use KDN
force_flip(X, y, X_test, y_test,
           "Spiral", kdg_spiral)

### General Transfer

In [None]:
X2, y2 = generate_spirals(n_train, n_class=2)
X2_test, y2_test = generate_spirals(n_test, n_class=2)
y2 = -1*(y2-1)
y2_test = -1*(y2_test-1)

kdf_multitask = kdf(kwargs = fit_kwargs)

transfer_posteriors(X, y, X_test, y_test,
                    X2, y2, X2_test, y2_test,
                    "Spiral", "SpiralFlip",
                    kdf_multitask)

## Manuscript Figures (derived from ProgLearn)

In [None]:
# define hyperparameters:
fit_kwargs = {"n_estimators": 500, "verbose": 0}
kdg_experiment = kdg.kdf(kwargs = fit_kwargs) 
mc_rep = 20
n_t1 = np.arange(50, 751, step=50).astype(int)
n_t2 = np.arange(50, 751, step=50).astype(int)
n_test = 1000

#choose save path
save_path = "kdg_demo.npz"

# run the experiment on the selected data functions
# look at kdg.utils for types of data that can be used
mean_error, std_error, mean_te, std_te = run(kdg_experiment,
                                             mc_rep,
                                             n_t1,
                                             n_t2,
                                             n_test = n_test,
                                             gen_1 = generate_spirals,
                                             gen_kwargs1 = {n_class = 3},
                                             gen_kwargs2 = {n_class = 5})

np.savez(save_path,
         task1 = n_t1,
         task2 = n_t2,
         mean_error = mean_error,
         mean_te = mean_te)

In [None]:
with np.load(save_path) as data:
    n_t1 = data["task1"]
    n_t2 = data["task2"]
    mean_error = data["mean_error"]
    mean_te = data["mean_te"]

n_data = max(np.max(n_t1), np.max(n_t2))

s3, s3_labels = generate_spirals(n_data, n_class = 3)
s5, s5_labels = generate_spirals(n_data, n_class = 5)

plot_error_and_eff(n_t1, n_t2,
                   mean_error, mean_te,
                   "Spiral3", "Spiral5",
                   s3, s3_labels,
                   s5, s5_labels)