In [2]:

import numpy as np
from scipy.stats import mode
from sklearn.datasets import load_breast_cancer
from numpy.random import randint
from scipy.spatial.distance import cdist
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC


def ova_svm_train(X, y, C, sigma):
    param_grid = {'C': C, 'gamma': 1 / (2 * sigma ** 2)}
    svm = SVC(kernel='rbf', **param_grid)
    svm.fit(X, y)
    return svm


def ova_svm_predict(model, X_test):
    return model.predict(X_test)


def active_learning(X_train, y_train, X_pool, y_pool, n_queries, C, sigma):
    # initialize the model
    model = ova_svm_train(X_train, y_train, C, sigma)
    n_classes = len(np.unique(y_train))
    # active learning loop
    for idx in range(n_queries):
        # compute the mean of the pool set
        mean = np.mean(X_pool, axis=0)
        # compute the distance of each instance in the pool set from the mean
        distances = np.linalg.norm(X_pool - mean, axis=1)
        # sort the distances in ascending order
        sorted_idx = np.argsort(distances)
        # find the minimum, average, and maximum values of the distances
        dmin = np.min(distances)
        dmean = np.mean(distances)
        dmax = np.max(distances)
        # divide the sorted distances into two subsets D1 and D2
        D1 = distances[(distances >= dmin) & (distances <= dmean)]
        D2 = distances[(distances > dmean) & (distances <= dmax)]
        # compute the width of each partition in subsets D1 and D2
        W1 = np.mean(np.diff(D1))
        W2 = np.mean(np.diff(D2))
        # generate the partitions using the lower and upper bounds for each subset
        LP1 = dmin + np.arange(len(D1)) * W1
        UP1 = dmin + (np.arange(len(D1)) + 1) * W1
        LP2 = dmean + np.arange(len(D2)) * W2
        UP2 = dmean + (np.arange(len(D2)) + 1) * W2
        # select one instance from each partition that is at a minimum distance from the decision hyperplane
        query_idx = []
        for lp, up in zip(np.hstack([LP1, LP2]), np.hstack([UP1, UP2])):
            partition_idx = sorted_idx[(distances >= lp) & (distances <= up)]
            if len(partition_idx) > 0:
                dist = np.abs(model.decision_function(X_pool[partition_idx]))
                query_idx.append(partition_idx[np.argmin(dist)])
        
        # update the model with the queried instances
        X_train = np.vstack([X_train, X_pool[query_idx]])
        y_train = np.hstack([y_train, y_pool[query_idx]])
        model = ova_svm_train(X_train, y_train, C, sigma)

        # remove the queried instances from the pool
        X_pool = np.delete(X_pool, query_idx, axis=0)
    
    return model


def accu_scr(y_test, y_pred):
    return np.mean(y_test == y_pred)


# Loading the Data
iris = load_breast_cancer()

# Store features matrix in X
X = iris.data
# Store target vector in
y = iris.target

r = int(input("Specify the size of training dataset: "))
m = int(input("Specify the size for active learning: "))
l = int(input("Specify the size of testing dataset: "))

C_range = np.logspace(-3, 3, 7)
sigma_range = np.logspace(-3, 3, 7)

counter = 0
for g in range(15):
    count = 0
    arr = []
    # Creating the training Data
    train_idx = []
    for p in range(2):
        if(p == 0):
            tt = xxx = randint(0 ,212 ,int(int(r)/2))
        if(p == 1):
            tt = xxx = randint(212 ,569 ,int(int(r)/2)) 
            
       
        for w in range(len(tt)):
            train_idx.append(tt[w])
    X_train = X[train_idx]
    y_train = y[train_idx]

    # Creating the testing Data
    test_idx = randint(0, 569, l)
    X_test = X[test_idx]
    y_test = y[test_idx]

    # Perform grid search to find best hyperparameters
    svm = GridSearchCV(SVC(kernel='rbf'), {'C': C_range, 'gamma': 1 / (2 * sigma_range ** 2)}, cv=3)
    svm.fit(X_train, y_train)
    best_C = svm.best_params_['C']
    best_sigma = np.sqrt(1 / (2 * svm.best_params_['gamma']))

    # run active learning
    model = active_learning(X_train, y_train, X, y, m, best_C, best_sigma)

    # Applying the created function
    y_pred = ova_svm_predict(model, X_test)

    # Checking the accuracy
    a = accu_scr(y_test, y_pred)
    # keeping the value in an array
    arr.append(a)

    print("Accuracy for testcase", g + 1, "is:", a * 100, "%")

    # calculating mean accuracy
    counter = counter + a

overall_mean = counter / 15
print("Overall mean accuracy is:", overall_mean * 100)


Specify the size of training dataset: 20
Specify the size for active learning: 25
Specify the size of testing dataset: 569
Accuracy for testcase 1 is: 85.58875219683657 %
Accuracy for testcase 2 is: 90.68541300527241 %
Accuracy for testcase 3 is: 90.33391915641477 %
Accuracy for testcase 4 is: 87.34622144112478 %
Accuracy for testcase 5 is: 88.04920913884007 %
Accuracy for testcase 6 is: 90.50966608084359 %
Accuracy for testcase 7 is: 91.56414762741653 %
Accuracy for testcase 8 is: 86.99472759226714 %
Accuracy for testcase 9 is: 92.97012302284709 %
Accuracy for testcase 10 is: 94.20035149384886 %
Accuracy for testcase 11 is: 88.04920913884007 %
Accuracy for testcase 12 is: 94.02460456942003 %
Accuracy for testcase 13 is: 85.9402460456942 %
Accuracy for testcase 14 is: 89.63093145869946 %
Accuracy for testcase 15 is: 90.68541300527241 %
Overall mean accuracy is: 89.77152899824252
