Let's define the functions we need to solve problems from 13 to 18.

In [41]:
import numpy as np
import matplotlib.pyplot as plt
import random
import pandas as pd
from sklearn.svm import SVC

def SVM_func_rbf(X_in, y_in, X_out, y_out, gam):
    clf_SVM = SVC( C= np.inf, kernel = 'rbf', shrinking = False, tol=1e-3, gamma= gam,  random_state=200) 
    clf_SVM.fit(X_in, y_in)
    E_in = 1 - clf_SVM.score(X_in, y_in)
    E_out = 1 - clf_SVM.score(X_out, y_out)
    return E_in, E_out

def RBF(X, y, K, gamma):    
    N_points = len(X)    
    
# Lloyd algorithm
    nu_k = np.zeros([K,2])
    delta_nu = np.ones([K,2])
    eucl_dists = np.zeros([K, 1])
    clust_lbls = np.zeros([N_points])
    Fai = np.zeros([N_points, K])
    for i in range(0, K):
        nu_k[i,:] = X[random.randint(0, N_points-1), :]
    
    counter=0    
    while np.max(delta_nu)!=0 and counter< 20:
        for i in range(0, N_points):
            for j in range(0, K):
                eucl_dists[j] = ( np.linalg.norm( nu_k[j,:] - X[i, :]) )**2
            clust_lbls[i] = np.argmin( eucl_dists )
        
        # here we check empty or not each cluster. 
        if( len(np.unique(clust_lbls)) != K):
            return np.zeros([K]), nu_k, -1
        
        for i in range(0, K):            
            delta_nu[i] = nu_k[i] - np.mean( X[np.where(clust_lbls==i)[0], :], axis=0 )
            nu_k[i] = np.mean( X[np.where(clust_lbls==i)[0], :], axis=0 )
        counter+=1
        #print("iteration:", counter, " delta_nu= ", np.max(delta_nu) )
        
    for i in range(0, N_points):        
        for j in range(0, K):
            Fai[i][j] = np.exp( (-1)*gamma* ( np.linalg.norm( X[i, :] - nu_k[j,:] ) )**2 )
    
    Fai_T = np.transpose(Fai)
    Fai_mult = np.dot(Fai_T, Fai)    
    Fai_inv = np.linalg.inv(Fai_mult)
    Fai_ps_inv = np.dot(Fai_inv, Fai_T)
    W_rbf = np.dot(Fai_ps_inv, y) 
    return W_rbf, nu_k, 0
    
def RBF_predict(X, W_rbf, K, gamma, nu_k):
    N_points = len(X)
    predicted = np.zeros([N_points])
    sum_1p=0
    for i  in range(0, N_points):
        for j in range(0, K):
            sum_1p += W_rbf[j] * np.exp( -1*gamma * ( np.linalg.norm( X[i, :] - nu_k[j,:] ) )**2 )
        predicted[i] = np.sign(sum_1p)
        sum_1p = 0
    return predicted

def rnd():
    return random.uniform(-1.0, 1.0)

def generate_rand_points(N_points):
    X = np.zeros([N_points, 2])
    for i in range(0, 2):
        for j in range(0, N_points):
            X[j, i] = rnd()
    return X

def target_function(X):
    y = np.sign( X[:, 1] - X[:, 0] - 0.25*np.sin(np.pi* X[:, 0]) )
    return y
    
def error_in_out_evaluation(f_labels, g_labels):
    N_points = len(f_labels)
    E_in = 0.0
    for i in range(0, N_points):
        if(f_labels[i] != g_labels[i]):
            E_in+=1
    E_in = E_in / N_points
    return E_in

<b>Problem 13</b>

In [43]:
N_runs = 1000
N_train = 100
N_test = 1000

E_in = np.zeros([N_runs])
for i in range(0, N_runs):
    train_set = generate_rand_points(N_train)
    f_labels = target_function(train_set)
    E_in[i], E_out = SVM_func_rbf(train_set, f_labels, train_set, f_labels, 1.5)
    E_in = np.zeros([N_runs])
print("Data set isn't separable by RBF kernel in ", np.count_nonzero(E_in!=0) / N_runs, "% of time.")

Data set isn't separable by RBF kernel in  0.0 % of time.


The correct answer is <b>[a]</b>.

<b>Problem 14</b>

In [47]:
counter = 0
iteration =0
status = np.ones([N_runs*10])
E_in_reg = np.zeros([N_runs])
E_out_reg = np.zeros([N_runs])
E_in_kern = np.zeros([N_runs])
E_out_kern = np.zeros([N_runs])
while counter< N_runs:
    train_set = generate_rand_points(N_train)
    test_set = generate_rand_points(N_test)
    f_labels_train = target_function(train_set)
    f_labels_test = target_function(test_set)
    W_rbf, nu_k, status[iteration] = RBF(train_set, f_labels_train, 9, 1.5)
    if status[iteration]!=-1:        
        predicted_in = RBF_predict( train_set, W_rbf, 9, 1.5, nu_k)
        predicted_out = RBF_predict( test_set, W_rbf, 9, 1.5, nu_k)
        E_in_reg[counter] =error_in_out_evaluation(f_labels_train, predicted_in)
        E_out_reg[counter] =error_in_out_evaluation(f_labels_test, predicted_out)

        E_in_kern[counter], E_out_kern[counter] = SVM_func_rbf(train_set, f_labels_train, test_set, f_labels_test, 1.5)
        counter+=1
    iteration+=1

print("fraction= ", np.count_nonzero(E_out_kern < E_out_reg) / np.float(N_runs))

fraction=  0.81


Thus the kernel with number of clusters K=9 and $\gamma = 1.5$  is better than regular RBF in 81% of the time. The correct answer is <b>[e]</b>.

<b>Problem 15</b>

In [46]:
counter = 0
iteration =0
status = np.ones([N_runs*10])
E_in_reg = np.zeros([N_runs])
E_out_reg = np.zeros([N_runs])
E_in_kern = np.zeros([N_runs])
E_out_kern = np.zeros([N_runs])
while counter< N_runs:
    train_set = generate_rand_points(N_train)
    test_set = generate_rand_points(N_test)
    f_labels_train = target_function(train_set)
    f_labels_test = target_function(test_set)
    W_rbf, nu_k, status[iteration] = RBF(train_set, f_labels_train, 12, 1.5)
    if status[iteration]!=-1:        
        predicted_in = RBF_predict( train_set, W_rbf, 12, 1.5, nu_k)
        predicted_out = RBF_predict( test_set, W_rbf, 12, 1.5, nu_k)
        E_in_reg[counter] =error_in_out_evaluation(f_labels_train, predicted_in)
        E_out_reg[counter] =error_in_out_evaluation(f_labels_test, predicted_out)

        E_in_kern[counter], E_out_kern[counter] = SVM_func_rbf(train_set, f_labels_train, test_set, f_labels_test, 1.5)
        counter+=1
    iteration+=1

print("fraction= ", np.count_nonzero(E_out_kern < E_out_reg) / np.float(N_runs) )

fraction=  0.746


Thus the kernel with number of clusters K=12 and $\gamma = 1.5$  is better than regular RBF in 74.6% of the time. The correct answer is <b>[d]</b>.

<b>Problem 16</b>

In [48]:
counter = 0
iteration =0
status9 = np.ones([N_runs*10])
status12 = np.ones([N_runs*10])
E_in_K = np.zeros([N_runs, 2])
E_out_K = np.zeros([N_runs, 2])
while counter< N_runs:
    train_set = generate_rand_points(N_train)
    test_set = generate_rand_points(N_test)
    f_labels_train = target_function(train_set)
    f_labels_test = target_function(test_set)
    
    W_rbf9, nu_k9, status9[iteration] = RBF(train_set, f_labels_train, 9, 1.5)
    W_rbf12, nu_k12, status12[iteration] = RBF(train_set, f_labels_train, 12, 1.5)
    if status9[iteration]!=-1 and status12[iteration]!=-1:        
        predicted_in_9 = RBF_predict( train_set, W_rbf9, 9, 1.5, nu_k9)
        predicted_out_9 = RBF_predict( test_set, W_rbf9, 9, 1.5, nu_k9)
        E_in_K[counter][0] = error_in_out_evaluation(f_labels_train, predicted_in_9)
        E_out_K[counter][0] =error_in_out_evaluation(f_labels_test, predicted_out_9)
        
        predicted_in_12 = RBF_predict( train_set, W_rbf12, 12, 1.5, nu_k12)
        predicted_out_12 = RBF_predict( test_set, W_rbf12, 12, 1.5, nu_k12)
        E_in_K[counter][1] = error_in_out_evaluation(f_labels_train, predicted_in_12)
        E_out_K[counter][1] =error_in_out_evaluation(f_labels_test, predicted_out_12)
        counter+=1   
    iteration+=1
probs = np.zeros([5])    
probs[0] =np.count_nonzero( np.all( ((E_in_K[:,1] < E_in_K[:,0]), (E_out_K[:,1] > E_out_K[:,0])), axis=0 ) ) / np.float(N_runs)
probs[1] =np.count_nonzero( np.all( ((E_in_K[:,0] < E_in_K[:,1]), (E_out_K[:,0] > E_out_K[:,1])), axis=0 ) ) / np.float(N_runs)
probs[2] =np.count_nonzero( np.all( ((E_in_K[:,1] > E_in_K[:,0]), (E_out_K[:,1] > E_out_K[:,0])), axis=0 ) ) / np.float(N_runs)
probs[3] =np.count_nonzero( np.all( ((E_in_K[:,1] < E_in_K[:,0]), (E_out_K[:,1] < E_out_K[:,0])), axis=0 ) ) / np.float(N_runs)
probs[4] =np.count_nonzero( np.all( ((E_in_K[:,1] == E_in_K[:,0]), (E_out_K[:,1] == E_out_K[:,0])), axis=0 ) ) / np.float(N_runs)
print(probs)

[ 0.112  0.065  0.073  0.415  0.01 ]


Thus the case when E_in and E_out both go down happens more often then the others. The correct answer is <b>[d]</b>.

<b>Problem 17</b>

In [49]:
counter = 0
iteration =0
status1_5 = np.ones([N_runs*10])
status2 = np.ones([N_runs*10])
E_in_K = np.zeros([N_runs, 2])
E_out_K = np.zeros([N_runs, 2])
while counter< N_runs:
    train_set = generate_rand_points(N_train)
    test_set = generate_rand_points(N_test)
    f_labels_train = target_function(train_set)
    f_labels_test = target_function(test_set)
    
    W_rbf1_5, nu_k1_5, status1_5[iteration] = RBF(train_set, f_labels_train, 9, 1.5)
    W_rbf2, nu_k2, status2[iteration] = RBF(train_set, f_labels_train, 9, 2)
    if status1_5[iteration]!=-1 and status2[iteration]!=-1:        
        predicted_in_1_5 = RBF_predict( train_set, W_rbf1_5, 9, 1.5, nu_k1_5)
        predicted_out_1_5 = RBF_predict( test_set, W_rbf1_5, 9, 1.5, nu_k1_5)
        E_in_K[counter][0] = error_in_out_evaluation(f_labels_train, predicted_in_1_5)
        E_out_K[counter][0] =error_in_out_evaluation(f_labels_test, predicted_out_1_5)
        
        predicted_in_2 = RBF_predict( train_set, W_rbf2, 9, 2, nu_k2)
        predicted_out_2 = RBF_predict( test_set, W_rbf2, 9, 2, nu_k2)
        E_in_K[counter][1] = error_in_out_evaluation(f_labels_train, predicted_in_2)
        E_out_K[counter][1] =error_in_out_evaluation(f_labels_test, predicted_out_2)
        counter+=1   
    iteration+=1
probs = np.zeros([5])    
probs[0] =np.count_nonzero( np.all( ((E_in_K[:,1] < E_in_K[:,0]), (E_out_K[:,1] > E_out_K[:,0])), axis=0 ) ) / np.float(N_runs)
probs[1] =np.count_nonzero( np.all( ((E_in_K[:,0] < E_in_K[:,1]), (E_out_K[:,0] > E_out_K[:,1])), axis=0 ) ) / np.float(N_runs)
probs[2] =np.count_nonzero( np.all( ((E_in_K[:,1] > E_in_K[:,0]), (E_out_K[:,1] > E_out_K[:,0])), axis=0 ) ) / np.float(N_runs)
probs[3] =np.count_nonzero( np.all( ((E_in_K[:,1] < E_in_K[:,0]), (E_out_K[:,1] < E_out_K[:,0])), axis=0 ) ) / np.float(N_runs)
probs[4] =np.count_nonzero( np.all( ((E_in_K[:,1] == E_in_K[:,0]), (E_out_K[:,1] == E_out_K[:,0])), axis=0 ) ) / np.float(N_runs)
print(probs)

[ 0.082  0.08   0.365  0.169  0.01 ]


Thus the case when E_in and E_out both go up happens more often then the others. The correct answer is <b>[c]</b>.

<b>Problem 18</b>

In [51]:
print("% of Ein==0: ", 100*np.count_nonzero(E_in_K[:,0]==0)/ np.float(N_runs))

% of Ein==0:  4.8


The regular RBF achieves $E_{in}=0$ in 4.8% of the time. The correct answer is <b>[a]</b>.