In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

import numpy as np
from scipy.stats import gmean

import ot
import wasserstein_smote

from sklearn.model_selection import KFold, train_test_split

from joblib import Parallel, delayed

In [None]:
def k_star(n):
    return np.ceil(gmean([n**0.5, n**(1/3)])).astype(int)

In [None]:
k_star(100), k_star(300)

In [None]:
# random state
r = 0

In [None]:
#take_n = 500

X = np.load("../data/w300/lm_3000.npy")
#X = X[:take_n]
n, m, _ = X.shape
X.shape

In [None]:
%%time

# fit oversampler to X_test
oversampler = wasserstein_smote.Oversampling(X[:100], k=7, kernel="knn", random_state=r, n_jobs=-1)

# generate synthetic points to match X_train in size
X_smote_100 = oversampler.sample(method="simplicial", n=(7000,1))

In [None]:
%%time

# fit oversampler to X_test
oversampler = wasserstein_smote.Oversampling(X[:300], k=11, kernel="knn", random_state=r, n_jobs=-1)

# generate synthetic points to match X_train in size
X_smote_300 = oversampler.sample(method="simplicial", n=(7000,1))

In [None]:
%%time

# fit oversampler to X_test
oversampler = wasserstein_smote.Oversampling(X[:100], k=7, kernel="knn", random_state=r, n_jobs=-1)

# generate synthetic points to match X_train in size
X_smote_maximal_100 = oversampler.sample(method="simplicial_maximal", n=(7000,1))

In [None]:
%%time

# fit oversampler to X_test
oversampler = wasserstein_smote.Oversampling(X[:300], k=11, kernel="knn", random_state=r, n_jobs=-1)

# generate synthetic points to match X_train in size
X_smote_maximal_300 = oversampler.sample(method="simplicial_maximal", n=(7000,1))

In [None]:
s, alpha = 5, 0.005

fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12.75, 5))

ax[0].set_title("X[:100]")
ax[0].invert_yaxis()
ax[0].scatter(X[:100,:,0], X[:100,:,1], s=s, c="r", alpha=alpha * 10)

ax[1].set_title("Simplicial kNN, k=7")
ax[1].invert_yaxis()
ax[1].scatter(X_smote_100[:,:,0], X_smote_100[:,:,1], s=s, c="r", alpha=alpha)

ax[2].set_title("Simplicial maximal kNN, k=7")
ax[2].invert_yaxis()
ax[2].scatter(X_smote_maximal_100[:,:,0], X_smote_maximal_100[:,:,1], s=s, c="r", alpha=alpha)

plt.tight_layout()
plt.show()

In [None]:
s, alpha = 5, 0.005

fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12.75, 5))

ax[0].set_title("X[:300]")
ax[0].invert_yaxis()
ax[0].scatter(X[:300,:,0], X[:300,:,1], s=s, c="r", alpha=alpha * 10)

ax[1].set_title("Simplicial kNN, k=11")
ax[1].invert_yaxis()
ax[1].scatter(X_smote_300[:,:,0], X_smote_300[:,:,1], s=s, c="r", alpha=alpha)

ax[2].set_title("Simplicial maximal kNN, k=11")
ax[2].invert_yaxis()
ax[2].scatter(X_smote_maximal_300[:,:,0], X_smote_maximal_300[:,:,1], s=s, c="r", alpha=alpha)

plt.tight_layout()
plt.show()

In [None]:
np.save("../data/100_to_7000_simplicial_k7.npy", X_smote_100)
np.save("../data/100_to_7000_simplicial_maximal_k7.npy", X_smote_maximal_100)

np.save("../data/300_to_7000_simplicial_k11.npy", X_smote_300)
np.save("../data/300_to_7000_simplicial_maximal_k11.npy", X_smote_maximal_300)