# Experiments

This jupyter notebooks contains the code for the experiments shown in the report.

# Shape 2

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import DatasetGeneratorsVF as DG
from sklearn.svm import SVC
from matplotlib import cm
import FrameworkFunctionsVF as FF

In [None]:
#### STYLING PARAMETERS ####
plt.rcParams.update({'font.family':'serif'})
plt.rcParams.update({'font.serif':'Times New Roman'})
plt.style.use('seaborn')
FONTSIZE = 20

save_path = 'Results/Shape2/'

In [None]:
#### CONSISTENT PARAMETERS ####
RANDOM_STATE = 1000 # Get different samples from the same distribution changing
                    # this parameter
# RANDOM_STATE = 1001
# RANDOM_STATE = 1002

number_iter = 101

# Bounds for dataspace
f1_b = [-6,6]
f2_b = [-6,6]

resol = 100 # Resolution for grid search

# Shape 2 dataset parameters
means = np.array([[1.5,0],\
    [-1.5,0]]) 
covs = np.array([[[1,0.5],[0.5,1.5]],\
    [[1,-0.5],[-0.5,1.5]]])

num_samples_test = np.array([1000,1000]) 
num_samples_train = np.array([300,300]) 

# Bias Parameters
planeCentre = np.array([1.5,0]) 
prob = 0.05
alphas = np.array([225.0, 270.0, 315.0, 360.0])

# Density function parameters
nu = 3.0
sigma = 0.5

# Grid for SVM boundary plotting
# for plotting boundary see https://jakevdp.github.io/PythonDataScienceHandbook/05.07-support-vector-machines.html
x = np.linspace(f1_b[0], f1_b[1], 100)
y = np.linspace(f2_b[0], f2_b[1], 100)
Y, X = np.meshgrid(y, x)
xy = np.vstack([X.ravel(), Y.ravel()]).T


In [None]:
#### UNBIASED TEST SET AND ORACLE ####

DSTest1, DSTest0 = DG.GenerateMultiVarGaussians(means,covs, num_samples_test, RANDOM_STATE)
TestX = np.vstack((DSTest1,DSTest0))
TestY = np.hstack((np.ones((np.shape(DSTest1)[0])),np.zeros((np.shape(DSTest0)[0]))))

np.savetxt(save_path + 'TestSet/TestX.csv', TestX, delimiter=',')
np.savetxt(save_path + 'TestSet/TestY.csv', TestY, delimiter=',')

# Train
Oracle = SVC(kernel = 'linear', random_state = RANDOM_STATE)
Oracle.fit(TestX,TestY)
OracleAcc = Oracle.score(TestX,TestY)
OracleDB = Oracle.decision_function(xy).reshape(X.shape)

fig, axs = plt.subplots(figsize=(10,10))
axs.contour(X, Y, OracleDB, colors='k',
            levels=[-1, 0, 1], alpha=0.5,
            linestyles=['--', '-', '--'])
axs.text(-4.0, -4.5, "Oracle SVM accuracy on unbiased testing set: {:.2%}".format(OracleAcc),
            bbox=dict(facecolor='white'), fontsize = 16)
axs.scatter(DSTest1[:,0],DSTest1[:,1], label = "1")
axs.scatter(DSTest0[:,0],DSTest0[:,1], label = '0')
axs.set_xlabel("F1", fontsize = FONTSIZE)
axs.set_ylabel("F2", fontsize = FONTSIZE)
axs.tick_params(labelsize=FONTSIZE)
axs.legend(fontsize = FONTSIZE)
axs.set_xlim(f1_b)
axs.set_ylim(f2_b)
plt.tight_layout()
plt.savefig(save_path + 'TestSet/Oracle.svg',format = 'svg')

In [None]:
#### UNBIASED TRAINING SET ####

DSUnbiased1, DSUnbiased0 = DG.GenerateMultiVarGaussians(means,covs, num_samples_train, RANDOM_STATE+2)
UnbiasedX = np.vstack((DSUnbiased1,DSUnbiased0))
UnbiasedY = np.hstack((np.ones((np.shape(DSUnbiased1)[0])),np.zeros((np.shape(DSUnbiased0)[0]))))

np.savetxt(save_path + 'RANDOMSTATE{:}/'.format(RANDOM_STATE)+'UnbiasedTrainingSet/UnbiasedX.csv', UnbiasedX, delimiter=',')
np.savetxt(save_path + 'RANDOMSTATE{:}/'.format(RANDOM_STATE)+'UnbiasedTrainingSet/UnbiasedY.csv', UnbiasedY, delimiter=',')

# Train
clfUnbiased = SVC(kernel = 'linear', random_state = RANDOM_STATE)
clfUnbiased.fit(UnbiasedX,UnbiasedY)
clfUnbiasedAcc = clfUnbiased.score(TestX,TestY)
clfUnbiasedDB = clfUnbiased.decision_function(xy).reshape(X.shape)

fig, axs = plt.subplots(figsize=(10,10))
axs.contour(X, Y, clfUnbiasedDB, colors='k',
            levels=[-1, 0, 1], alpha=0.5,
            linestyles=['--', '-', '--'])
axs.text(-4.0, -4.5, "Linear SVM accuracy on unbiased testing set: {:.2%}".format(clfUnbiasedAcc),
            bbox=dict(facecolor='white'), fontsize = 16)
axs.scatter(DSUnbiased1[:,0],DSUnbiased1[:,1], label = "1")
axs.scatter(DSUnbiased0[:,0],DSUnbiased0[:,1], label = '0')
axs.set_xlabel("F1", fontsize = FONTSIZE)
axs.set_ylabel("F2", fontsize = FONTSIZE)
axs.tick_params(labelsize=FONTSIZE)
axs.legend(fontsize = FONTSIZE)
axs.set_xlim(f1_b)
axs.set_ylim(f2_b)
plt.tight_layout()
plt.savefig(save_path + 'RANDOMSTATE{:}/'.format(RANDOM_STATE)+'UnbiasedTrainingSet/Unbiased.svg',format = 'svg')

In [None]:
#### ALPADR FOR BIASED TRAINING SETS ####

def ALPADRExperimentRegularised(DSUnbiased1, DSUnbiased0, TestX, TestY, Oracle,\
                    alphas, planeCentre, prob,\
                    resol, f1_b, f2_b, lambda_prm, sigma, nu,\
                    number_iter, save_path, random_state, FONTSIZE,\
                    NORMALISED = True):

    """ Note: Function is specific to experiment in report and only works for 
    inducing bias in class 1.
    """

    # Grid for SVM boundary plotting
    # for plotting boundary see https://jakevdp.github.io/PythonDataScienceHandbook/05.07-support-vector-machines.html
    x = np.linspace(f1_b[0], f1_b[1], 100)
    y = np.linspace(f2_b[0], f2_b[1], 100)
    Y, X = np.meshgrid(y, x)
    xy = np.vstack([X.ravel(), Y.ravel()]).T

    for i in range(len(alphas)):
        # Inducing bias in dataset
        DSBiased0 = DSUnbiased0 # NO BIAS FOR CLASS 0 in these experiements
        DSBiased1, plane = DG.KathBiasPlane2D(DSUnbiased1, -np.radians(alphas[i]),\
            planeCentre, prob, random_state)
        BiasedX = np.vstack((DSBiased1,DSBiased0))
        BiasedY = np.hstack((np.ones((np.shape(DSBiased1)[0])),np.zeros((np.shape(DSBiased0)[0]))))
        
        BiasedAccArray = np.zeros(number_iter)
        print('\n'+str(i))

        for j in range(number_iter):
            print(j,end = ',')
            # Train classifier on biased training set
            clfBiased = SVC(kernel = 'linear', random_state = RANDOM_STATE)
            clfBiased.fit(BiasedX,BiasedY)
            clfBiasedAcc = clfBiased.score(TestX,TestY)
            BiasedAccArray[j] = clfBiasedAcc
            clfBiasedDB = clfBiased.decision_function(xy).reshape(X.shape)

            # Saving Dataset
            np.savetxt(save_path + 'alpha{:}/Dataset/BiasedX_Iteration{:}.csv'.format(int(alphas[i]),j), BiasedX, delimiter=',')
            np.savetxt(save_path + 'alpha{:}/Dataset/BiasedY_Iteration{:}.csv'.format(int(alphas[i]),j), BiasedY, delimiter=',')

            # Finding adversarial and density regions
            LossMax, LossMaxLabel, Densities, DensityRegLoss, F1_pois, F2_pois = FF.ALPADR(BiasedX, BiasedY, clfBiased,\
                f1_b, f2_b, resol, lambda_prm, sigma, nu, normalise = NORMALISED, random_state = random_state)

            # Point in dataspace of maximums
            UnRegPosMax, UnRegValMax = FF.GridFindMax(F1_pois,F2_pois,LossMax)
            RegPosMax, RegValMax = FF.GridFindMax(F1_pois,F2_pois,DensityRegLoss)

            # Plotting
            fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(30,10))

            axs[0].grid(False)
            axs[1].grid(False)
            axs[2].grid(False)
            
            axs[0].contour(X, Y, clfBiasedDB, colors='k',
                        levels=[-1, 0, 1], alpha=0.5,
                        linestyles=['--', '-', '--'])
            cs0 = axs[0].contourf(F1_pois,F2_pois,LossMax, cmap = cm.jet, levels = 100,extend = 'max')
            axs[0].scatter(DSBiased1[:,0],DSBiased1[:,1], label = "1")
            axs[0].scatter(DSBiased0[:,0],DSBiased0[:,1], label = '0')
            axs[0].scatter(UnRegPosMax[0], UnRegPosMax[1], c = 'w',marker='*', s = 400, label = 'Max Point')
            axs[0].set_title("Adversarial regions (max [{:.2f},{:.2f}])".format(UnRegPosMax[0],UnRegPosMax[1]) , fontsize = FONTSIZE)
            axs[0].set_xlabel("F1", fontsize = FONTSIZE)
            axs[0].set_ylabel("F2", fontsize = FONTSIZE)
            axs[0].tick_params(labelsize=FONTSIZE)
            axs[0].legend(fontsize = FONTSIZE, labelcolor = 'white')
            axs[0].set_xlim(f1_b)
            axs[0].set_ylim(f2_b)

            axs[1].contour(X, Y, clfBiasedDB, colors='k',
                        levels=[-1, 0, 1], alpha=0.5,
                        linestyles=['--', '-', '--'])
            cs1 = axs[1].contourf(F1_pois,F2_pois,lambda_prm*Densities, cmap = cm.jet, levels = 100,extend = 'max')
            axs[1].text(-4.5, -4.5, "SVM accuracy on test set: {:.2%}".format(clfBiasedAcc),
                        bbox=dict(facecolor='white'), fontsize = FONTSIZE)            
            axs[1].scatter(DSBiased1[:,0],DSBiased1[:,1], label = "1")
            axs[1].scatter(DSBiased0[:,0],DSBiased0[:,1], label = '0')
            axs[1].set_title(r" $\lambda\times N$ Regions" , fontsize = FONTSIZE)
            axs[1].set_xlabel("F1", fontsize = FONTSIZE)
            axs[1].set_ylabel("F2", fontsize = FONTSIZE)
            axs[1].tick_params(labelsize=FONTSIZE)
            axs[1].legend(fontsize = FONTSIZE, labelcolor = 'white')
            axs[1].set_xlim(f1_b)
            axs[1].set_ylim(f2_b)


            axs[2].contour(X, Y, clfBiasedDB, colors='k',
                        levels=[-1, 0, 1], alpha=0.5,
                        linestyles=['--', '-', '--'])
            cs2 = axs[2].contourf(F1_pois,F2_pois,DensityRegLoss, cmap = cm.jet, levels = 100,extend = 'max')
            axs[2].scatter(DSBiased1[:,0],DSBiased1[:,1], label = "1")
            axs[2].scatter(DSBiased0[:,0],DSBiased0[:,1], label = '0')
            axs[2].scatter(RegPosMax[0], RegPosMax[1], c = 'w',marker='*', s = 400, label = 'Max Point')
            axs[2].set_title("Regularised Loss (max [{:.2f},{:.2f}])".format(RegPosMax[0],RegPosMax[1]) , fontsize = FONTSIZE)
            axs[2].set_xlabel("F1", fontsize = FONTSIZE)
            axs[2].set_ylabel("F2", fontsize = FONTSIZE)
            axs[2].tick_params(labelsize=FONTSIZE)
            axs[2].legend(fontsize = FONTSIZE, labelcolor = 'white')
            axs[2].set_xlim(f1_b)
            axs[2].set_ylim(f2_b)

            cbar1 = fig.colorbar(cs0, ax = axs[0])
            cbar2 = fig.colorbar(cs1, ax = axs[1])
            cbar3 = fig.colorbar(cs2, ax = axs[2])
            cbar1.ax.tick_params(labelsize=FONTSIZE)
            cbar2.ax.tick_params(labelsize=FONTSIZE)
            cbar3.ax.tick_params(labelsize=FONTSIZE)
            fig.suptitle('alpha: {:}, Iteration: {:}'.format(alphas[i], j), fontsize = FONTSIZE)
            plt.savefig(save_path + 'alpha{:}/Plots/Iteration{:}.svg'.format(int(alphas[i]),j), format = 'svg')
            plt.close()

            np.savetxt(save_path + 'alpha{:}/Grids/F1Grid_It{:}.csv'.format(int(alphas[i]),j), F1_pois, delimiter=',')
            np.savetxt(save_path + 'alpha{:}/Grids/F2Grid_It{:}.csv'.format(int(alphas[i]),j), F2_pois, delimiter=',')
            np.savetxt(save_path + 'alpha{:}/LossMax/LossMax_It{:}.csv'.format(int(alphas[i]),j), LossMax, delimiter=',')
            np.savetxt(save_path + 'alpha{:}/Densities/Densities_It{:}.csv'.format(int(alphas[i]),j), Densities, delimiter=',')
            np.savetxt(save_path + 'alpha{:}/DensityRegLoss/DensityRegLoss_Iteration{:}.csv'.format(int(alphas[i]),j), DensityRegLoss, delimiter=',')

            # Adding max point at regularised loss into training dataset 
            # using oracle to classify point
            newPointLabel = Oracle.predict([RegPosMax])

            BiasedX = np.vstack([BiasedX,RegPosMax])
            BiasedY = np.hstack([BiasedY,newPointLabel])

            if newPointLabel == 0:
                DSBiased0 = np.vstack([DSBiased0,RegPosMax])
            else:
                DSBiased1 = np.vstack([DSBiased1,RegPosMax])

        np.savetxt(save_path + 'alpha{:}/AccArray.csv'.format(int(alphas[i])), BiasedAccArray, delimiter=',')

def UncertaintyExperiment(DSUnbiased1, DSUnbiased0, TestX, TestY, Oracle,\
                    alphas, planeCentre, prob,\
                    resol, f1_b, f2_b,\
                    number_iter, save_path, random_state, FONTSIZE):

    """ Note: Function is specific to experiment in report and only works for 
    inducing bias in class 1.
    """

    # Grid for SVM boundary plotting
    # for plotting boundary see https://jakevdp.github.io/PythonDataScienceHandbook/05.07-support-vector-machines.html
    x = np.linspace(f1_b[0], f1_b[1], 100)
    y = np.linspace(f2_b[0], f2_b[1], 100)
    Y, X = np.meshgrid(y, x)
    xy = np.vstack([X.ravel(), Y.ravel()]).T

    for i in range(len(alphas)):
        
        # Inducing bias in dataset
        DSBiased0 = DSUnbiased0 # NO BIAS FOR CLASS 0 in these experiements
        DSBiased1, plane = DG.KathBiasPlane2D(DSUnbiased1, -np.radians(alphas[i]),\
            planeCentre, prob, random_state)
        BiasedX = np.vstack((DSBiased1,DSBiased0))
        BiasedY = np.hstack((np.ones((np.shape(DSBiased1)[0])),np.zeros((np.shape(DSBiased0)[0]))))
        
        BiasedAccArray = np.zeros(number_iter)
        print('\n'+str(i))

        for j in range(number_iter):
            print(j,end = ',')

            # Train classifier on biased training set
            clfBiased = SVC(kernel = 'linear', random_state = RANDOM_STATE, probability = True)
            clfBiased.fit(BiasedX,BiasedY)
            clfBiasedAcc = clfBiased.score(TestX,TestY)
            BiasedAccArray[j] = clfBiasedAcc
            clfBiasedDB = clfBiased.decision_function(xy).reshape(X.shape)

            # Saving Dataset
            np.savetxt(save_path + 'alpha{:}/Dataset/BiasedX_Iteration{:}.csv'.format(int(alphas[i]),j), BiasedX, delimiter=',')
            np.savetxt(save_path + 'alpha{:}/Dataset/BiasedY_Iteration{:}.csv'.format(int(alphas[i]),j), BiasedY, delimiter=',')

            # Finding uncertainties
            F1grid, F2grid, Uncertainties = FF.UncertaintySampling(clfBiased, f1_b, f2_b, resol)

            # Point in dataspace of uncertainty
            UncertaintyPosMax, UncertaintyValMax = FF.GridFindMax(F1grid,F2grid,Uncertainties)

            # Plotting 
            fig, axs = plt.subplots(figsize=(10,10))

            axs.contour(X, Y, clfBiasedDB, colors='k',
                levels=[-1, 0, 1], alpha=0.5,
                linestyles=['--', '-', '--'])
            cs0 = axs.contourf(F1grid,F2grid,Uncertainties, cmap = cm.jet, levels = 100, extend = 'max')
            axs.scatter(DSBiased1[:,0],DSBiased1[:,1], label = "1")
            axs.scatter(DSBiased0[:,0],DSBiased0[:,1], label = '0')
            axs.scatter(UncertaintyPosMax[0], UncertaintyPosMax[1], c = 'k',marker='*', s = 400, label = 'Max Point')
            axs.text(-4.5, -4.5, "SVM accuracy on test set: {:.2%}".format(clfBiasedAcc),
                bbox=dict(facecolor='white'), fontsize = FONTSIZE)           
            axs.set_title("Max Uncertainty([{:.2f},{:.2f}])".format(UncertaintyPosMax[0],UncertaintyPosMax[1]) , fontsize = FONTSIZE)
            axs.set_xlabel("F1", fontsize = FONTSIZE)
            axs.set_ylabel("F2", fontsize = FONTSIZE)
            axs.tick_params(labelsize=FONTSIZE)
            axs.legend(fontsize = FONTSIZE)
            axs.set_xlim(f1_b)
            axs.set_ylim(f2_b)

            fig.colorbar(cs0, ax = axs)

            plt.savefig(save_path + 'alpha{:}/Plots/Iteration{:}.svg'.format(int(alphas[i]),j), format = 'svg')
            plt.close()

            # Adding point at max uncertainty into training dataset 
            # using oracle to classify point
            newPointLabel = Oracle.predict([UncertaintyPosMax])

            BiasedX = np.vstack([BiasedX,UncertaintyPosMax])
            BiasedY = np.hstack([BiasedY,newPointLabel])

            if newPointLabel == 0:
                DSBiased0 = np.vstack([DSBiased0,UncertaintyPosMax])
            else:
                DSBiased1 = np.vstack([DSBiased1,UncertaintyPosMax])
            
            np.savetxt(save_path + 'alpha{:}/Grids/F1Grid_It{:}.csv'.format(int(alphas[i]),j), F1grid, delimiter=',')
            np.savetxt(save_path + 'alpha{:}/Grids/F2Grid_It{:}.csv'.format(int(alphas[i]),j), F2grid, delimiter=',')
            np.savetxt(save_path + 'alpha{:}/Uncertainties/Uncertainties_It{:}.csv'.format(int(alphas[i]),j), Uncertainties, delimiter=',')

        np.savetxt(save_path + 'alpha{:}/AccArray.csv'.format(int(alphas[i])), BiasedAccArray, delimiter=',')

def RandomIdealExperiment(DSUnbiased1, DSUnbiased0, TestX, TestY, Oracle,\
                    covs, means,\
                    alphas, planeCentre, prob,\
                    resol, f1_b, f2_b,\
                    number_iter, save_path, random_state, FONTSIZE):

    """ Note: Function is specific to experiment in report and only works for 
    inducing bias in class 1.
    """

    # Grid for SVM boundary plotting
    # for plotting boundary see https://jakevdp.github.io/PythonDataScienceHandbook/05.07-support-vector-machines.html
    x = np.linspace(f1_b[0], f1_b[1], 100)
    y = np.linspace(f2_b[0], f2_b[1], 100)
    Y, X = np.meshgrid(y, x)
    xy = np.vstack([X.ravel(), Y.ravel()]).T

    for i in range(len(alphas)):

        # Inducing bias in dataset
        DSBiased0 = DSUnbiased0 # NO BIAS FOR CLASS 0 in these experiements
        DSBiased1, plane = DG.KathBiasPlane2D(DSUnbiased1, -np.radians(alphas[i]),\
            planeCentre, prob, random_state)
        BiasedX = np.vstack((DSBiased1,DSBiased0))
        BiasedY = np.hstack((np.ones((np.shape(DSBiased1)[0])),np.zeros((np.shape(DSBiased0)[0]))))
        
        BiasedAccArray = np.zeros(number_iter)
        print('\n'+str(i))

        # Generate the points for Random Ideal Sampling
        new_points = FF.IdealRandomSampling(means,covs,number_iter, RANDOM_STATE+i)

        for j in range(number_iter):
            print(j,end = ',')

            # Saving Dataset
            np.savetxt(save_path + 'alpha{:}/Dataset/BiasedX_Iteration{:}.csv'.format(int(alphas[i]),j), BiasedX, delimiter=',')
            np.savetxt(save_path + 'alpha{:}/Dataset/BiasedY_Iteration{:}.csv'.format(int(alphas[i]),j), BiasedY, delimiter=',')

            # Train classifier on biased training set
            clfBiased = SVC(kernel = 'linear', random_state = RANDOM_STATE, probability = True)
            clfBiased.fit(BiasedX,BiasedY)
            clfBiasedAcc = clfBiased.score(TestX,TestY)
            BiasedAccArray[j] = clfBiasedAcc
            clfBiasedDB = clfBiased.decision_function(xy).reshape(X.shape)

            # Plotting 
            fig, axs = plt.subplots(figsize=(10,10))

            axs.contour(X, Y, clfBiasedDB, colors='k',
                levels=[-1, 0, 1], alpha=0.5,
                linestyles=['--', '-', '--'])
                
            axs.scatter(DSBiased1[:,0],DSBiased1[:,1], label = "1")
            axs.scatter(DSBiased0[:,0],DSBiased0[:,1], label = '0')
            axs.scatter(new_points[j,0], new_points[j,1], c = 'k',marker='*', s = 400, label = 'Sampled Point')
            axs.text(-4.5, -4.5, "SVM accuracy on test set: {:.2%}".format(clfBiasedAcc),
                bbox=dict(facecolor='white'), fontsize = FONTSIZE)           
            axs.set_title("Random Sample ([{:.2f},{:.2f}])".format(new_points[j,0],new_points[j,1]) , fontsize = FONTSIZE)
            axs.set_xlabel("F1", fontsize = FONTSIZE)
            axs.set_ylabel("F2", fontsize = FONTSIZE)
            axs.tick_params(labelsize=FONTSIZE)
            axs.legend(fontsize = FONTSIZE)
            axs.set_xlim(f1_b)
            axs.set_ylim(f2_b)

            plt.savefig(save_path + 'alpha{:}/Plots/Iteration{:}.svg'.format(int(alphas[i]),j), format = 'svg')
            plt.close()

            # Adding random point into training dataset 
            # using oracle to classify point
            newPointLabel = Oracle.predict([new_points[j,:]])

            BiasedX = np.vstack([BiasedX,[new_points[j,:]]])
            BiasedY = np.hstack([BiasedY,newPointLabel])

            if newPointLabel == 0:
                DSBiased0 = np.vstack([DSBiased0,[new_points[j,:]]])
            else:
                DSBiased1 = np.vstack([DSBiased1,[new_points[j,:]]])

        np.savetxt(save_path + 'alpha{:}/AccArray.csv'.format(int(alphas[i])), BiasedAccArray, delimiter=',')


In [None]:
# Ignoring MATPLOTLIB function deprecation warnings 
# https://stackoverflow.com/questions/24502500/python-matplotlib-getting-rid-of-matplotlib-mpl-warning
import warnings
import matplotlib.cbook
warnings.filterwarnings("ignore",category=matplotlib.cbook.mplDeprecation)

In [None]:
# Trade off parameter 0.4 experiment

lambda_prm = 0.4
save_path = 'Results/Shape2/RANDOMSTATE{:}/to_prm_0point4/'.format(RANDOM_STATE)

ALPADRExperimentRegularised(DSUnbiased1, DSUnbiased0, TestX, TestY, Oracle,\
                    alphas, planeCentre, prob,\
                    resol, f1_b, f2_b, lambda_prm, sigma, nu,\
                    number_iter, save_path, RANDOM_STATE, FONTSIZE,\
                    NORMALISED = True)

In [None]:
# Trade off parameter 0.8 experiment

lambda_prm = 0.8
save_path = 'Results/Shape2/RANDOMSTATE{:}/to_prm_0point8/'.format(RANDOM_STATE)

ALPADRExperimentRegularised(DSUnbiased1, DSUnbiased0, TestX, TestY, Oracle,\
                    alphas, planeCentre, prob,\
                    resol, f1_b, f2_b, lambda_prm, sigma, nu,\
                    number_iter, save_path, RANDOM_STATE, FONTSIZE,\
                    NORMALISED = True)

In [None]:
# Uncertainty sampling 

save_path = 'Results/Shape2/RANDOMSTATE{:}/UncertaintySampling/'.format(RANDOM_STATE)

UncertaintyExperiment(DSUnbiased1, DSUnbiased0, TestX, TestY, Oracle,\
                    alphas, planeCentre, prob,\
                    resol, f1_b, f2_b,\
                    number_iter, save_path, RANDOM_STATE, FONTSIZE)

In [None]:
# Random ideal sampling

save_path = 'Results/Shape2/RANDOMSTATE{:}/RandomIdealSampling/'.format(RANDOM_STATE)

RandomIdealExperiment(DSUnbiased1, DSUnbiased0, TestX, TestY, Oracle,\
                    covs, means,\
                    alphas, planeCentre, prob,\
                    resol, f1_b, f2_b,\
                    number_iter, save_path, RANDOM_STATE, FONTSIZE)

# Shape 1

In [None]:
save_path = 'Results/Shape1/'

# Shape 1 dataset parameters
covs = np.array([[[1,0],[0,1]],\
    [[1, 0],[0,1]]])


In [None]:
#### UNBIASED TEST SET AND ORACLE ####

DSTest1, DSTest0 = DG.GenerateMultiVarGaussians(means,covs, num_samples_test, RANDOM_STATE)
TestX = np.vstack((DSTest1,DSTest0))
TestY = np.hstack((np.ones((np.shape(DSTest1)[0])),np.zeros((np.shape(DSTest0)[0]))))

np.savetxt(save_path + 'TestSet/TestX.csv', TestX, delimiter=',')
np.savetxt(save_path + 'TestSet/TestY.csv', TestY, delimiter=',')

# Train
Oracle = SVC(kernel = 'linear', random_state = RANDOM_STATE)
Oracle.fit(TestX,TestY)
OracleAcc = Oracle.score(TestX,TestY)
OracleDB = Oracle.decision_function(xy).reshape(X.shape)

fig, axs = plt.subplots(figsize=(10,10))
axs.contour(X, Y, OracleDB, colors='k',
            levels=[-1, 0, 1], alpha=0.5,
            linestyles=['--', '-', '--'])
axs.text(-4.0, -4.5, "Oracle SVM accuracy on unbiased testing set: {:.2%}".format(OracleAcc),
            bbox=dict(facecolor='white'), fontsize = 16)
axs.scatter(DSTest1[:,0],DSTest1[:,1], label = "1")
axs.scatter(DSTest0[:,0],DSTest0[:,1], label = '0')
axs.set_xlabel("F1", fontsize = FONTSIZE)
axs.set_ylabel("F2", fontsize = FONTSIZE)
axs.tick_params(labelsize=FONTSIZE)
axs.legend(fontsize = FONTSIZE)
axs.set_xlim(f1_b)
axs.set_ylim(f2_b)
plt.tight_layout()
plt.savefig(save_path + 'TestSet/Oracle.svg',format = 'svg')

In [None]:
#### UNBIASED TRAINING SET ####

DSUnbiased1, DSUnbiased0 = DG.GenerateMultiVarGaussians(means,covs, num_samples_train, RANDOM_STATE+2)
UnbiasedX = np.vstack((DSUnbiased1,DSUnbiased0))
UnbiasedY = np.hstack((np.ones((np.shape(DSUnbiased1)[0])),np.zeros((np.shape(DSUnbiased0)[0]))))

np.savetxt(save_path + 'RANDOMSTATE{:}/'.format(RANDOM_STATE)+'UnbiasedTrainingSet/UnbiasedX.csv', UnbiasedX, delimiter=',')
np.savetxt(save_path + 'RANDOMSTATE{:}/'.format(RANDOM_STATE)+'UnbiasedTrainingSet/UnbiasedY.csv', UnbiasedY, delimiter=',')

# Train
clfUnbiased = SVC(kernel = 'linear', random_state = RANDOM_STATE)
clfUnbiased.fit(UnbiasedX,UnbiasedY)
clfUnbiasedAcc = clfUnbiased.score(TestX,TestY)
clfUnbiasedDB = clfUnbiased.decision_function(xy).reshape(X.shape)

fig, axs = plt.subplots(figsize=(10,10))
axs.contour(X, Y, clfUnbiasedDB, colors='k',
            levels=[-1, 0, 1], alpha=0.5,
            linestyles=['--', '-', '--'])
axs.text(-4.0, -4.5, "Linear SVM accuracy on unbiased testing set: {:.2%}".format(clfUnbiasedAcc),
            bbox=dict(facecolor='white'), fontsize = 16)
axs.scatter(DSUnbiased1[:,0],DSUnbiased1[:,1], label = "1")
axs.scatter(DSUnbiased0[:,0],DSUnbiased0[:,1], label = '0')
axs.set_xlabel("F1", fontsize = FONTSIZE)
axs.set_ylabel("F2", fontsize = FONTSIZE)
axs.tick_params(labelsize=FONTSIZE)
axs.legend(fontsize = FONTSIZE)
axs.set_xlim(f1_b)
axs.set_ylim(f2_b)
plt.tight_layout()
plt.savefig(save_path + 'RANDOMSTATE{:}/'.format(RANDOM_STATE)+'UnbiasedTrainingSet/Unbiased.svg',format = 'svg')

In [None]:
# Trade off parameter 0.4 experiment

lambda_prm = 0.4
save_path = 'Results/Shape1/RANDOMSTATE{:}/to_prm_0point4/'.format(RANDOM_STATE)

ALPADRExperimentRegularised(DSUnbiased1, DSUnbiased0, TestX, TestY, Oracle,\
                    alphas, planeCentre, prob,\
                    resol, f1_b, f2_b, lambda_prm, sigma, nu,\
                    number_iter, save_path, RANDOM_STATE, FONTSIZE,\
                    NORMALISED = True)

In [None]:
# Trade off parameter 0.8 experiment

lambda_prm = 0.8
save_path = 'Results/Shape1/RANDOMSTATE{:}/to_prm_0point8/'.format(RANDOM_STATE)

ALPADRExperimentRegularised(DSUnbiased1, DSUnbiased0, TestX, TestY, Oracle,\
                    alphas, planeCentre, prob,\
                    resol, f1_b, f2_b, lambda_prm, sigma, nu,\
                    number_iter, save_path, RANDOM_STATE, FONTSIZE,\
                    NORMALISED = True)

In [None]:
# Uncertainty sampling 

save_path = 'Results/Shape1/RANDOMSTATE{:}/UncertaintySampling/'.format(RANDOM_STATE)

UncertaintyExperiment(DSUnbiased1, DSUnbiased0, TestX, TestY, Oracle,\
                    alphas, planeCentre, prob,\
                    resol, f1_b, f2_b,\
                    number_iter, save_path, RANDOM_STATE, FONTSIZE)

In [None]:
# Random ideal sampling

save_path = 'Results/Shape1/RANDOMSTATE{:}/RandomIdealSampling/'.format(RANDOM_STATE)

RandomIdealExperiment(DSUnbiased1, DSUnbiased0, TestX, TestY, Oracle,\
                    covs, means,\
                    alphas, planeCentre, prob,\
                    resol, f1_b, f2_b,\
                    number_iter, save_path, RANDOM_STATE, FONTSIZE)