In [None]:
import numpy as np
from time import time
from sklearn import datasets
import matplotlib.pyplot as plt
import pandas as pd
import random
from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.metrics.cluster import adjusted_rand_score
from sklearn.cluster import KMeans
import os

%run KernelkmeansFunctions.ipynb
%run ExplainabilityFunctions.ipynb
%run ExpandingIMM.ipynb
%run KernelExKMC.ipynb
%run RunExperiments.ipynb

rng = np.random.default_rng()

### all clustering benchmark datasets available under https://cs.joensuu.fi/sipu/datasets/

In [None]:
### Define kernel functions

def rbf(x,y,gamma):
    return(np.exp(-gamma*np.sum((x-y)**2)))

def laplace(x,y,gamma):
    return(np.exp(-gamma*np.sum(np.abs(x-y))))

def linear(x,y):
    return(np.dot(x,y))

In [None]:
### Pathbased

rng = np.random.default_rng()
df = pd.read_csv('your path', sep=";", header=None)

gammas = np.array([0.01, 0.05, 0.1, 0.5, 1, 5, 10])

X = np.array(df)[:,0:3]
y_true = X[:,2]
true_k = len(np.unique(y_true))
X = X[:,[0,1]]
y_true = y_true.astype(int) - 1

gammas = [0.05] # set optimum for a quick run
imm_path1, imm_path2 = imm_experiments(X, y_true, gammas)

In [None]:
y_kkm = imm_path2['y_kkm']
gamma = imm_path1['best_gamma']

if imm_path1['best_kernel'] == 0:
    Kmat = pairwise_kernels(X, metric=rbf, gamma=gamma)
    if imm_path1['price_taylor_imm_on_kkm'] < imm_path1['price_kmat_imm_on_kkm']:
        print('Gaussian Taylor')
        y_imm = imm_path2['y_taylor_imm_on_kkm']
    else:
        print('Gaussian Kernel Matrix')
        y_imm = imm_path2['y_kmat_imm_on_kkm']
else:
    print('Laplace Kernel Matrix')
    Kmat = pairwise_kernels(X, metric=laplace, gamma=gamma)
    y_imm = imm_path2['y_kmat_imm_on_kkm']

refine_path1, refine_path2 = refine_imm(X, y_true, y_kkm, y_imm, Kmat, max_leaves = 6)

In [None]:
### Make a plot

y_kmeans = imm_path2['y_kmeans']
y_kmeans_imm = imm_path2['y_kmeans_imm']
y_exkmc = refine_path2['y_exkmc']
y_expand = refine_path2['y_expand']

plt.subplot(2, 3, 1)
plt.scatter(X[y_kmeans==0, 0], X[y_kmeans==0, 1], s=50, c='green')
plt.scatter(X[y_kmeans==1, 0], X[y_kmeans==1, 1], s=50, c='red')
plt.scatter(X[y_kmeans==2, 0], X[y_kmeans==2, 1], s=50, c='blue')
plt.title('K-means', fontsize=10)

plt.subplot(2, 3, 2)
plt.scatter(X[y_kkm==0, 0], X[y_kkm==0, 1], s=50, c='green')
plt.scatter(X[y_kkm==1, 0], X[y_kkm==1, 1], s=50, c='blue')
plt.scatter(X[y_kkm==2, 0], X[y_kkm==2, 1], s=50, c='red')
plt.title('Kernel k-means', fontsize=10)

plt.subplot(2, 3, 3)
plt.scatter(X[y_expand==0, 0], X[y_expand==0, 1], s=50, c='green')
plt.scatter(X[y_expand==1, 0], X[y_expand==1, 1], s=50, c='blue')
plt.scatter(X[y_expand==2, 0], X[y_expand==2, 1], s=50, c='red')
plt.title('Kernel IMM expanded', fontsize=10)

plt.subplot(2, 3, 4)
plt.scatter(X[y_kmeans_imm==0, 0], X[y_kmeans_imm==0, 1], s=50, c='green')
plt.scatter(X[y_kmeans_imm==1, 0], X[y_kmeans_imm==1, 1], s=50, c='blue')
plt.scatter(X[y_kmeans_imm==2, 0], X[y_kmeans_imm==2, 1], s=50, c='red')
plt.title('IMM on k-means', fontsize=10)

plt.subplot(2, 3, 5)
plt.scatter(X[y_imm==0, 0], X[y_imm==0, 1], s=50, c='green')
plt.scatter(X[y_imm==1, 0], X[y_imm==1, 1], s=50, c='blue')
plt.scatter(X[y_imm==2, 0], X[y_imm==2, 1], s=50, c='red')
plt.title('Kernel IMM', fontsize=10)

plt.subplot(2, 3, 6)
plt.scatter(X[y_exkmc==0, 0], X[y_exkmc==0, 1], s=50, c='green')
plt.scatter(X[y_exkmc==1, 0], X[y_exkmc==1, 1], s=50, c='blue')
plt.scatter(X[y_exkmc==2, 0], X[y_exkmc==2, 1], s=50, c='red')
plt.title('Kernel ExKMC', fontsize=10)

plt.tight_layout()

In [None]:
print(imm_path1, refine_path1)

In [None]:
### Aggregation

df = pd.read_csv('your path', sep=";", header=None)

X = np.array(df)[:,0:3]
y_true = X[:,2]
y_true = y_true.astype(int) - 1
X = X[:,[0,1]]

gammas = [0.1]
imm_agg1, imm_agg2 = imm_experiments(X, y_true, gammas)

In [None]:
y_kkm = imm_agg2['y_kkm']
gamma = imm_agg1['best_gamma']

if imm_agg1['best_kernel'] == 0:
    Kmat = pairwise_kernels(X, metric=rbf, gamma=gamma)
    if imm_agg1['price_taylor_imm_on_kkm'] < imm_agg1['price_kmat_imm_on_kkm']:
        print('Gaussian Taylor')
        y_imm = imm_agg2['y_taylor_imm_on_kkm']
    else:
        print('Gaussian Kernel Matrix')
        y_imm = imm_agg2['y_kmat_imm_on_kkm']
else:
    print('Laplace Kernel Matrix')
    Kmat = pairwise_kernels(X, metric=laplace, gamma=gamma)
    y_imm = imm_agg2['y_kmat_imm_on_kkm']

refine_agg1, refine_agg2 = refine_imm(X, y_true, y_kkm, y_imm, Kmat, max_leaves=len(np.unique(y_true))+3)

In [None]:
### Make a plot

y_kmeans = imm_agg2['y_kmeans']
y_kmeans_imm = imm_agg2['y_kmeans_imm']
y_exkmc = refine_agg2['y_exkmc']
y_expand = refine_agg2['y_expand']

plt.subplot(2, 3, 1)

plt.scatter(X[:, 0], X[:, 1], s=50, c=y_kmeans)
plt.title('K-means', fontsize=10)

plt.subplot(2, 3, 2)
plt.scatter(X[:, 0], X[:, 1], s=50, c=y_kkm)
plt.title('Kernel k-means', fontsize=10)

plt.subplot(2, 3, 3)
plt.scatter(X[:, 0], X[:, 1], s=50, c=y_expand)
plt.title('Kernel IMM expanded', fontsize=10)

plt.subplot(2, 3, 4)
plt.scatter(X[:, 0], X[:, 1], s=50, c=y_kmeans_imm)
plt.title('IMM on k-means', fontsize=10)

plt.subplot(2, 3, 5)
plt.scatter(X[:, 0], X[:, 1], s=50, c=y_imm)
plt.title('Kernel IMM', fontsize=10)

plt.subplot(2, 3, 6)
plt.scatter(X[:, 0], X[:, 1], s=50, c=y_exkmc)
plt.title('Kernel ExKMC', fontsize=10)

plt.tight_layout()

In [None]:
print(imm_agg1, refine_agg1)

In [None]:
### Flame

df = pd.read_csv('your path', sep=";", header=None)

X = np.array(df)[:,0:3]
y_true = X[:,2]
X = X[:,[0,1]]
y_true = y_true.astype(int) - 1

gamma = [0.05]
imm_flame1, imm_flame2 = imm_experiments(X, y_true, gammas)

In [None]:
y_kkm = imm_flame2['y_kkm']
gamma = imm_flame1['best_gamma']

if imm_flame1['best_kernel'] == 0:
    Kmat = pairwise_kernels(X, metric=rbf, gamma=gamma)
    if imm_flame1['price_taylor_imm_on_kkm'] < imm_flame1['price_kmat_imm_on_kkm']:
        print('Gaussian Taylor')
        y_imm = imm_flame2['y_taylor_imm_on_kkm']
    else:
        print('Gaussian Kernel Matrix')
        y_imm = imm_flame2['y_kmat_imm_on_kkm']
else:
    print('Laplace Kernel Matrix')
    Kmat = pairwise_kernels(X, metric=laplace, gamma=gamma)
    y_imm = imm_flame2['y_kmat_imm_on_kkm']

y_imm = imm_flame2['y_kmat_imm_on_kkm']
refine_flame1, refine_flame2 = refine_imm(X, y_true, y_kkm, y_imm, Kmat, max_leaves = 4)

In [None]:
### Make a plot

y_kmeans = imm_flame2['y_kmeans']
y_kmeans_imm = imm_flame2['y_kmeans_imm']
y_exkmc = refine_flame2['y_exkmc']
y_expand = refine_flame2['y_expand']

plt.subplot(2, 3, 1)

plt.scatter(X[:, 0], X[:, 1], s=50, c=y_kmeans)
plt.title('K-means', fontsize=10)

plt.subplot(2, 3, 2)
plt.scatter(X[:, 0], X[:, 1], s=50, c=y_kkm)
plt.title('Kernel k-means', fontsize=10)

plt.subplot(2, 3, 3)
plt.scatter(X[:, 0], X[:, 1], s=50, c=y_expand)
plt.title('Kernel IMM expanded', fontsize=10)

plt.subplot(2, 3, 4)
plt.scatter(X[:, 0], X[:, 1], s=50, c=y_kmeans_imm)
plt.title('IMM on k-means', fontsize=10)

plt.subplot(2, 3, 5)
plt.scatter(X[:, 0], X[:, 1], s=50, c=y_imm)
plt.title('Kernel IMM', fontsize=10)

plt.subplot(2, 3, 6)
plt.scatter(X[:, 0], X[:, 1], s=50, c=y_exkmc)
plt.title('Kernel ExKMC', fontsize=10)

plt.tight_layout()

In [None]:
print(imm_flame1, refine_flame1)

In [None]:
from sklearn import datasets

iris = datasets.load_iris()
X = iris.data
y_true = iris.target

gammas = [1]
imm_iris1, imm_iris2 = imm_experiments(X, y_true, gammas)

In [None]:
y_kkm = imm_iris2['y_kkm']
gamma = imm_iris1['best_gamma']

if imm_iris1['best_kernel'] == 0:
    Kmat = pairwise_kernels(X, metric=rbf, gamma=gamma)
    if imm_iris1['price_taylor_imm_on_kkm'] < imm_iris1['price_kmat_imm_on_kkm']:
        print('Gaussian Taylor')
        y_imm = imm_iris2['y_taylor_imm_on_kkm']
    else:
        print('Gaussian Kernel Matrix')
        y_imm = imm_iris2['y_kmat_imm_on_kkm']
else:
    print('Laplace Kernel Matrix')
    Kmat = pairwise_kernels(X, metric=laplace, gamma=gamma)
    y_imm = imm_iris2['y_kmat_imm_on_kkm']

y_imm = imm_iris2['y_kmat_imm_on_kkm']
refine_iris1, refine_iris2 = refine_imm(X, y_true, y_kkm, y_imm, Kmat, max_leaves = 6)

In [None]:
print(imm_iris1, refine_iris1)

In [None]:
from sklearn import datasets

X, y_true = datasets.load_breast_cancer(return_X_y=True)

gammas = 10**(-6)*np.array([1, 5, 10])
imm_wisc1, imm_wisc2 = imm_experiments(X, y_true, gammas)


In [None]:
y_kkm = imm_wisc2['y_kkm']
gamma = imm_wisc1['best_gamma']

if imm_wisc1['best_kernel'] == 0:
    Kmat = pairwise_kernels(X, metric=rbf, gamma=gamma)
    if imm_wisc1['price_taylor_imm_on_kkm'] < imm_wisc1['price_kmat_imm_on_kkm']:
        print('Gaussian Taylor')
        y_imm = imm_wisc2['y_taylor_imm_on_kkm']
    else:
        print('Gaussian Kernel Matrix')
        y_imm = imm_wisc2['y_kmat_imm_on_kkm']
else:
    print('Laplace Kernel Matrix')
    Kmat = pairwise_kernels(X, metric=laplace, gamma=gamma)
    y_imm = imm_wisc2['y_kmat_imm_on_kkm']

y_imm = imm_wisc2['y_kmat_imm_on_kkm']
refine_wisc1, refine_wisc2 = refine_imm(X, y_true, y_kkm, y_imm, Kmat, max_leaves = 4)

In [None]:
print(imm_wisc1, refine_wisc1)