# Accuracy Tests for GSSL Methods

In [14]:
import numpy as np
from numpy.random import multivariate_normal as mvn

from scipy.stats import norm
from scipy.optimize import minimize

from itertools import product

from tqdm import tqdm

from accuracy import KNN_acc
from accuracy import Prox_acc

In [15]:
rbf = lambda x1, x2: np.exp(-0.5*np.linalg.norm(x1 - x2)**2)
unif = lambda x1, x2: 1

## Three Cluster Data

Code to generate X, y, knownvals given centers and a covariance matrix

In [3]:
def clusters(m3, centers, covar):
	# m3 is the number of points around each distribution

	knownvals = [int(j*m3) for j in range(3)] # Points for which we know the value of the label

	X = mvn(centers[0],covar, m3)
	X = np.append(X,mvn(centers[1],covar, m3), axis=0)
	X = np.append(X,mvn(centers[2],covar, m3), axis=0)

	y = [1 for i in range(m3)]
	y = y + [-1 for i in range(2*m3)]

	return X, y, knownvals

### Well-Separated Clusters

Tuning of Parameters

In [10]:
M = 300 # Multiple of 3

centers = [[0,0],[1,0],[1,1]]
covar = 0.01*np.identity(2)

In [32]:
num_runs = 5

params = {
	"alpha": [1, 2, 4],
	"tau": [0.1, 1, 10],
	"lossf": ["probit", "regression"],
	"k": [5, 10, 15], 
	"kernel": [rbf, unif]
}

keys, values = zip(*params.items())
combinations = [dict(zip(keys, p)) for p in product(*values)] # Get all combinations of parameters

accuracies = []

for c in tqdm(combinations):
	sum_acc = 0
	for i in range(num_runs):
		X, y, knownvals = clusters(int(M/3), centers, covar)
		sum_acc += KNN_acc(X, y, knownvals, alpha = c["alpha"], tau = c["tau"], lossf = c["lossf"], k = c["k"], kernel = c["kernel"])
	accuracies.append([c, sum_acc/num_runs])


100%|██████████| 108/108 [4:09:18<00:00, 138.50s/it]  


In [45]:
accuracies

[[{'alpha': 1,
   'tau': 0.1,
   'lossf': 'probit',
   'k': 5,
   'kernel': <function __main__.<lambda>(x1, x2)>},
  1.0],
 [{'alpha': 1,
   'tau': 0.1,
   'lossf': 'probit',
   'k': 5,
   'kernel': <function __main__.<lambda>(x1, x2)>},
  1.0],
 [{'alpha': 1,
   'tau': 0.1,
   'lossf': 'probit',
   'k': 10,
   'kernel': <function __main__.<lambda>(x1, x2)>},
  1.0],
 [{'alpha': 1,
   'tau': 0.1,
   'lossf': 'probit',
   'k': 10,
   'kernel': <function __main__.<lambda>(x1, x2)>},
  1.0],
 [{'alpha': 1,
   'tau': 0.1,
   'lossf': 'probit',
   'k': 15,
   'kernel': <function __main__.<lambda>(x1, x2)>},
  1.0],
 [{'alpha': 1,
   'tau': 0.1,
   'lossf': 'probit',
   'k': 15,
   'kernel': <function __main__.<lambda>(x1, x2)>},
  1.0],
 [{'alpha': 1,
   'tau': 0.1,
   'lossf': 'regression',
   'k': 5,
   'kernel': <function __main__.<lambda>(x1, x2)>},
  1.0],
 [{'alpha': 1,
   'tau': 0.1,
   'lossf': 'regression',
   'k': 5,
   'kernel': <function __main__.<lambda>(x1, x2)>},
  1.0],
 [{'

### Challenging Clusters

In [34]:
M = 300 # Multiple of 3

centers = [[0,0],[1,0],[1,1]]
covar = 0.1*np.identity(2)

In [35]:
num_runs = 5

params = {
	"alpha": [1, 2, 4],
	"tau": [0.1, 1, 10],
	"lossf": ["probit", "regression"],
	"k": [5, 10, 15], 
	"kernel": [rbf, unif]
}

keys, values = zip(*params.items())
combinations = [dict(zip(keys, p)) for p in product(*values)] # Get all combinations of parameters

accuracies2 = []

for c in tqdm(combinations):
	sum_acc = 0
	for i in range(num_runs):
		X, y, knownvals = clusters(int(M/3), centers, covar)
		sum_acc += KNN_acc(X, y, knownvals, alpha = c["alpha"], tau = c["tau"], lossf = c["lossf"], k = c["k"], kernel = c["kernel"])
	accuracies2.append([c, sum_acc/num_runs])

100%|██████████| 108/108 [4:14:35<00:00, 141.44s/it]  


In [47]:
accuracies2

[[{'alpha': 1,
   'tau': 0.1,
   'lossf': 'probit',
   'k': 5,
   'kernel': <function __main__.<lambda>(x1, x2)>},
  0.9059999999999999],
 [{'alpha': 1,
   'tau': 0.1,
   'lossf': 'probit',
   'k': 5,
   'kernel': <function __main__.<lambda>(x1, x2)>},
  0.924],
 [{'alpha': 1,
   'tau': 0.1,
   'lossf': 'probit',
   'k': 10,
   'kernel': <function __main__.<lambda>(x1, x2)>},
  0.8746666666666666],
 [{'alpha': 1,
   'tau': 0.1,
   'lossf': 'probit',
   'k': 10,
   'kernel': <function __main__.<lambda>(x1, x2)>},
  0.8446666666666667],
 [{'alpha': 1,
   'tau': 0.1,
   'lossf': 'probit',
   'k': 15,
   'kernel': <function __main__.<lambda>(x1, x2)>},
  0.8653333333333334],
 [{'alpha': 1,
   'tau': 0.1,
   'lossf': 'probit',
   'k': 15,
   'kernel': <function __main__.<lambda>(x1, x2)>},
  0.752],
 [{'alpha': 1,
   'tau': 0.1,
   'lossf': 'regression',
   'k': 5,
   'kernel': <function __main__.<lambda>(x1, x2)>},
  0.9433333333333334],
 [{'alpha': 1,
   'tau': 0.1,
   'lossf': 'regressio

## Spiral Data

### Well-Separated Clusters

### Challenging Clusters