In [39]:
import numpy as np
from scipy.stats import mode
from sklearn.datasets import load_iris
from numpy.random import randint
from scipy.spatial.distance import cdist
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score


def ova_svm_train(X, y, C_range=None, sigma_range=None):
    if C_range is None:
        C_range = np.logspace(-3, 3, 7)
    if sigma_range is None:
        sigma_range = np.logspace(-3, 3, 7)

    param_grid = {'C': C_range, 'gamma': 1 / (2 * sigma_range ** 2)}
    svm = GridSearchCV(SVC(kernel='rbf'), param_grid, cv=3)
    svm.fit(X, y)
    return svm


def ova_svm_predict(model, X_test):
    return model.predict(X_test)


def active_learning(X_train, y_train, X_pool, y_pool, n_queries):
    # initialize the model
    model = ova_svm_train(X_train, y_train)
    n_classes = len(np.unique(y_train))
    # active learning loop
    for idx in range(n_queries):
        # compute the distance to the decision hyperplane for each instance in the pool
        dist = np.abs(model.decision_function(X_pool))

        # select the instance with the smallest distance
        if n_classes == 2:
            query_idx = np.argmin(np.abs(dist))
        else:
            query_idx = np.argmin(np.max(dist, axis=1))

        # update the model with the queried instance
        X_train = np.vstack([X_train, X_pool[query_idx]])
        y_train = np.hstack([y_train, y_pool[query_idx]])
        model = ova_svm_train(X_train, y_train)

        # remove the queried instance from the pool
        X_pool = np.delete(X_pool, query_idx, axis=0)
    
    return model

# # initialize the pool of unlabeled data
# X_pool = X_test
# y_pool = y_test

# # run active learning
# model = active_learning(X_train, y_train, X_pool, n_queries=10)


def accu_scr(y_test, y_pred):
    return np.mean(y_test == y_pred)


# Loading the Data
iris = load_iris()

# Store features matrix in X
X = iris.data
# Store target vector in
y = iris.target

r = int(input("Specify the size of training dataset: "))
m = int(input("Specify the size for active learning: "))
l = int(input("Specify the size of testing dataset: "))

counter = 0
for g in range(15):
    count = 0
    arr = []
    # Creating the training Data
    train_idx = []
    for p in range(3):
        if(p == 0):
            tt = xxx = randint(0 ,50 ,int(int(r)/3))
        if(p == 1):
            tt = xxx = randint(50 ,100 ,int(int(r)/3))
        if(p == 2):
            tt = xxx = randint(100 ,150 ,int(int(r)/3))     
            
       
        for w in range(len(tt)):
            train_idx.append(tt[w])
    X_train = X[train_idx]
    y_train = y[train_idx]
    
    
    
     # Creating the testing Data
    test_idx = randint(0, 150, l)
    X_test = X[test_idx]
    y_test = y[test_idx]
    
    # initialize the pool of unlabeled data
    

    # run active learning
    model = active_learning(X_train, y_train, X, y, m)
    
    

   
    
    
    

    # Applying the created function
    
    y_pred = ova_svm_predict(model, X_test)

    # Checking the accuracy
    a = accu_scr(y_test, y_pred)
    # keeping the value in an array
    arr.append(a)

    print("Accuracy for testcase", g + 1, "is:", a * 100, "%")

    # calculating mean accuracy
    counter = counter + a

overall_mean = counter / 15
print("Overall mean accuracy is:", overall_mean * 100)


Specify the size of training dataset: 12
Specify the size for active learning: 15
Specify the size of testing dataset: 145
Accuracy for testcase 1 is: 97.24137931034483 %
Accuracy for testcase 2 is: 98.62068965517241 %
Accuracy for testcase 3 is: 99.3103448275862 %
Accuracy for testcase 4 is: 97.93103448275862 %
Accuracy for testcase 5 is: 96.55172413793103 %
Accuracy for testcase 6 is: 98.62068965517241 %
Accuracy for testcase 7 is: 95.86206896551724 %
Accuracy for testcase 8 is: 96.55172413793103 %
Accuracy for testcase 9 is: 95.86206896551724 %
Accuracy for testcase 10 is: 96.55172413793103 %
Accuracy for testcase 11 is: 97.93103448275862 %
Accuracy for testcase 12 is: 96.55172413793103 %
Accuracy for testcase 13 is: 97.24137931034483 %
Accuracy for testcase 14 is: 97.93103448275862 %
Accuracy for testcase 15 is: 97.93103448275862 %
Overall mean accuracy is: 97.37931034482759


In [40]:
#updated
import numpy as np
from scipy.stats import mode
from sklearn.datasets import load_iris
from numpy.random import randint
from scipy.spatial.distance import cdist
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC


def ova_svm_train(X, y, C, sigma):
    param_grid = {'C': C, 'gamma': 1 / (2 * sigma ** 2)}
    svm = SVC(kernel='rbf', **param_grid)
    svm.fit(X, y)
    return svm


def ova_svm_predict(model, X_test):
    return model.predict(X_test)


def active_learning(X_train, y_train, X_pool, y_pool, n_queries, C, sigma):
    # initialize the model
    model = ova_svm_train(X_train, y_train, C, sigma)
    n_classes = len(np.unique(y_train))
    # active learning loop
    for idx in range(n_queries):
        # compute the distance to the decision hyperplane for each instance in the pool
        dist = np.abs(model.decision_function(X_pool))

        # select the instance with the smallest distance
        if n_classes == 2:
            query_idx = np.argmin(np.abs(dist))
        else:
            query_idx = np.argmin(np.max(dist, axis=1))

        # update the model with the queried instance
        X_train = np.vstack([X_train, X_pool[query_idx]])
        y_train = np.hstack([y_train, y_pool[query_idx]])
        model = ova_svm_train(X_train, y_train, C, sigma)

        # remove the queried instance from the pool
        X_pool = np.delete(X_pool, query_idx, axis=0)
    
    return model


def accu_scr(y_test, y_pred):
    return np.mean(y_test == y_pred)


# Loading the Data
iris = load_iris()

# Store features matrix in X
X = iris.data
# Store target vector in
y = iris.target

r = int(input("Specify the size of training dataset: "))
m = int(input("Specify the size for active learning: "))
l = int(input("Specify the size of testing dataset: "))

C_range = np.logspace(-3, 3, 7)
sigma_range = np.logspace(-3, 3, 7)

counter = 0
for g in range(15):
    count = 0
    arr = []
    # Creating the training Data
    train_idx = []
    for p in range(3):
        if p == 0:
            tt = xxx = randint(0, 50, int(r / 3))
        if p == 1:
            tt = xxx = randint(50, 100, int(r / 3))
        if p == 2:
            tt = xxx = randint(100, 150, int(r / 3))

        for w in range(len(tt)):
            train_idx.append(tt[w])
    X_train = X[train_idx]
    y_train = y[train_idx]

    # Creating the testing Data
    test_idx = randint(0, 150, l)
    X_test = X[test_idx]
    y_test = y[test_idx]

    # Perform grid search to find best hyperparameters
    svm = GridSearchCV(SVC(kernel='rbf'), {'C': C_range, 'gamma': 1 / (2 * sigma_range ** 2)}, cv=3)
    
    svm.fit(X_train, y_train)
    best_C = svm.best_params_['C']
    best_sigma = np.sqrt(1 / (2 * svm.best_params_['gamma']))

    # run active learning
    model = active_learning(X_train, y_train, X, y, m, best_C, best_sigma)

    # Applying the created function
    y_pred = ova_svm_predict(model, X_test)

    # Checking the accuracy
    a = accu_scr(y_test, y_pred)
    # keeping the value in an array
    arr.append(a)

    print("Accuracy for testcase", g + 1, "is:", a * 100, "%")

    # calculating mean accuracy
    counter = counter + a

overall_mean = counter / 15
print("Overall mean accuracy is:", overall_mean * 100)


Specify the size of training dataset: 12
Specify the size for active learning: 15
Specify the size of testing dataset: 149
Accuracy for testcase 1 is: 97.98657718120806 %
Accuracy for testcase 2 is: 97.31543624161074 %
Accuracy for testcase 3 is: 96.64429530201343 %
Accuracy for testcase 4 is: 98.65771812080537 %
Accuracy for testcase 5 is: 94.63087248322147 %
Accuracy for testcase 6 is: 97.31543624161074 %
Accuracy for testcase 7 is: 97.31543624161074 %
Accuracy for testcase 8 is: 100.0 %
Accuracy for testcase 9 is: 97.98657718120806 %
Accuracy for testcase 10 is: 98.65771812080537 %
Accuracy for testcase 11 is: 97.98657718120806 %
Accuracy for testcase 12 is: 99.32885906040269 %
Accuracy for testcase 13 is: 97.98657718120806 %
Accuracy for testcase 14 is: 96.64429530201343 %
Accuracy for testcase 15 is: 94.63087248322147 %
Overall mean accuracy is: 97.5391498881432


In [5]:
#marginal sampling

import numpy as np
from sklearn.datasets import load_iris
from numpy.random import randint
from scipy.spatial.distance import cdist
from sklearn.svm import SVC

from sklearn.model_selection import cross_val_score


def grid_search(X, y, param_grid, cv=3):
    best_score = float('-inf')
    best_params = None
    
    for params in param_grid:
        kernel = params['kernel']
        if kernel == 'linear':
            
            svm = SVC(kernel='linear', C=params['C'])
        elif kernel == 'poly':
            svm = SVC(kernel='poly', C=params['C'], degree=params['degree'])
        elif kernel == 'rbf':
            svm = SVC(kernel='rbf', C=params['C'], gamma=1 / (2 * params['sigma'] ** 2))
        else:
            raise ValueError("Invalid kernel type!")
        
        scores = cross_val_score(svm, X, y, cv=cv)
        score = np.mean(scores)
        
        if score > best_score:
            best_score = score
            best_params = params
    
    return best_params


def ova_svm_train(X, y, C, sigma):
    param_grid = {'C': C, 'gamma': 1 / (2 * sigma ** 2)}
    svm = SVC(kernel='rbf', **param_grid)
    svm.fit(X, y)
    return svm


def ova_svm_predict(model, X_test):
    return model.predict(X_test)

def active_learning(X_train, y_train, X_pool, y_pool, n_queries, kernel, C, degree=None, sigma=None):
    model = None
    if kernel == 'linear':
        model = SVC(kernel='linear', C=C)
    elif kernel == 'poly':
        model = SVC(kernel='poly', C=C, degree=degree)
    elif kernel == 'rbf':
        model = SVC(kernel='rbf', C=C, gamma=1 / (2 * sigma ** 2))

    n_classes = len(np.unique(y_train))

    for _ in range(n_queries):
        model.fit(X_train, y_train)  # Fit the model before computing distances

        dist = np.abs(model.decision_function(X_pool))
        
        if n_classes == 2:
            query_idx = np.argmin(dist)
        else:
            query_idx = np.argmin(np.min(dist, axis=1))

        X_train = np.vstack([X_train, X_pool[query_idx]])
        y_train = np.hstack([y_train, y_pool[query_idx]])

        X_pool = np.delete(X_pool, query_idx, axis=0)
        
    model.fit(X_train, y_train)
    return model


def accu_scr(y_test, y_pred):
    return np.mean(y_test == y_pred)


# Loading the Data
iris = load_iris()

# Store features matrix in X
X = iris.data
# Store target vector in
y = iris.target

r = int(input("Specify the size of training dataset: "))
m = int(input("Specify the size for active learning: "))
l = int(input("Specify the size of testing dataset: "))

C_range = np.logspace(-3, 3, 7)
sigma_range = np.logspace(-3, 3, 7)
degree_range = [2, 3, 4]

counter = 0
for g in range(15):
    count = 0
    arr = []
    # Creating the training Data
    train_idx = []
    for p in range(3):
        if p == 0:
            tt = xxx = randint(0, 50, int(r / 3))
        if p == 1:
            tt = xxx = randint(50, 100, int(r / 3))
        if p == 2:
            tt = xxx = randint(100, 150, int(r / 3))

        for w in range(len(tt)):
            train_idx.append(tt[w])
    X_train = X[train_idx]
    y_train = y[train_idx]

    # Creating the testing Data
    test_idx = randint(0, 150, l)
    X_test = X[test_idx]
    y_test = y[test_idx]

    # Perform grid search to find best hyperparameters
    param_grid = [{'kernel': 'linear', 'C': C} for C in C_range] + [{'kernel': 'poly', 'C': C, 'degree': degree} for C in C_range for degree in degree_range] + [{'kernel': 'rbf', 'C': C, 'sigma': sigma} for C in C_range for sigma in sigma_range]
    
    best_params = grid_search(X_train, y_train, param_grid)
    # run active learning
    kernel = best_params['kernel']
    C = best_params['C']
    model = None

    if kernel == 'linear':
        model = active_learning(X_train, y_train, X, y, m, kernel, C)
    elif kernel == 'poly':
        degree = best_params['degree']
        model = active_learning(X_train, y_train, X, y, m, kernel, C, degree=degree)
    elif kernel == 'rbf':
        sigma = best_params['sigma']
        model = active_learning(X_train, y_train, X, y, m, kernel, C, sigma=sigma)

    # Applying the created function
    y_pred = ova_svm_predict(model, X_test)

    # Checking the accuracy
    a = accu_scr(y_test, y_pred)
    # keeping the value in an array
    arr.append(a)

    print("Accuracy for testcase", g + 1, "is:", a * 100, "%")

    # calculating mean accuracy
    counter = counter + a

overall_mean = counter / 15
print("Overall mean accuracy is:", overall_mean * 100)


Specify the size of training dataset: 12
Specify the size for active learning: 15
Specify the size of testing dataset: 145
Accuracy for testcase 1 is: 96.55172413793103 %
Accuracy for testcase 2 is: 95.17241379310344 %
Accuracy for testcase 3 is: 88.27586206896552 %
Accuracy for testcase 4 is: 95.86206896551724 %
Accuracy for testcase 5 is: 94.48275862068965 %
Accuracy for testcase 6 is: 93.10344827586206 %
Accuracy for testcase 7 is: 95.17241379310344 %
Accuracy for testcase 8 is: 84.13793103448276 %
Accuracy for testcase 9 is: 96.55172413793103 %
Accuracy for testcase 10 is: 98.62068965517241 %
Accuracy for testcase 11 is: 95.86206896551724 %
Accuracy for testcase 12 is: 95.86206896551724 %
Accuracy for testcase 13 is: 93.79310344827586 %
Accuracy for testcase 14 is: 95.86206896551724 %
Accuracy for testcase 15 is: 96.55172413793103 %
Overall mean accuracy is: 94.39080459770113


In [7]:
#using probabilistic approach

import numpy as np
from sklearn.datasets import load_iris
from numpy.random import randint
from scipy.spatial.distance import cdist
from sklearn.svm import SVC

from sklearn.model_selection import cross_val_score


def grid_search(X, y, param_grid, cv=3):
    best_score = float('-inf')
    best_params = None
    
    for params in param_grid:
        kernel = params['kernel']
        if kernel == 'linear':
            
            svm = SVC(kernel='linear', C=params['C'])
        elif kernel == 'poly':
            svm = SVC(kernel='poly', C=params['C'], degree=params['degree'])
        elif kernel == 'rbf':
            svm = SVC(kernel='rbf', C=params['C'], gamma=1 / (2 * params['sigma'] ** 2))
        else:
            raise ValueError("Invalid kernel type!")
        
        scores = cross_val_score(svm, X, y, cv=cv)
        score = np.mean(scores)
        
        if score > best_score:
            best_score = score
            best_params = params
    
    return best_params


def ova_svm_train(X, y, C, sigma):
    param_grid = {'C': C, 'gamma': 1 / (2 * sigma ** 2)}
    svm = SVC(kernel='rbf', **param_grid)
    svm.fit(X, y)
    return svm


def ova_svm_predict(model, X_test):
    return model.predict(X_test)

def active_learning(X_train, y_train, X_pool, y_pool, n_queries, kernel, C, degree=None, sigma=None):
    model = None
    if kernel == 'linear':
        model = SVC(kernel='linear', C=C)
    elif kernel == 'poly':
        model = SVC(kernel='poly', C=C, degree=degree)
    elif kernel == 'rbf':
        model = SVC(kernel='rbf', C=C, gamma=1 / (2 * sigma ** 2))

    n_classes = len(np.unique(y_train))

    for _ in range(n_queries):
        model.fit(X_train, y_train)  # Fit the model before computing distances

        dist = model.decision_function(X_pool)
        
        if n_classes == 2:
            query_idx = np.argmin(dist)
        else:
            query_idx = np.argmin(np.max(dist, axis=1)-np.partition(dist, -2, axis=1)[:, -2])


        X_train = np.vstack([X_train, X_pool[query_idx]])
        y_train = np.hstack([y_train, y_pool[query_idx]])

        X_pool = np.delete(X_pool, query_idx, axis=0)
        
    model.fit(X_train, y_train)
    return model


def accu_scr(y_test, y_pred):
    return np.mean(y_test == y_pred)


# Loading the Data
iris = load_iris()

# Store features matrix in X
X = iris.data
# Store target vector in
y = iris.target

r = int(input("Specify the size of training dataset: "))
m = int(input("Specify the size for active learning: "))
l = int(input("Specify the size of testing dataset: "))

C_range = np.logspace(-3, 3, 7)
sigma_range = np.logspace(-3, 3, 7)
degree_range = [2, 3, 4]

counter = 0
for g in range(15):
    count = 0
    arr = []
    # Creating the training Data
    train_idx = []
    for p in range(3):
        if p == 0:
            tt = xxx = randint(0, 50, int(r / 3))
        if p == 1:
            tt = xxx = randint(50, 100, int(r / 3))
        if p == 2:
            tt = xxx = randint(100, 150, int(r / 3))

        for w in range(len(tt)):
            train_idx.append(tt[w])
    X_train = X[train_idx]
    y_train = y[train_idx]

    # Creating the testing Data
    test_idx = randint(0, 150, l)
    X_test = X[test_idx]
    y_test = y[test_idx]

    # Perform grid search to find best hyperparameters
    param_grid = [{'kernel': 'linear', 'C': C} for C in C_range] + [{'kernel': 'poly', 'C': C, 'degree': degree} for C in C_range for degree in degree_range] + [{'kernel': 'rbf', 'C': C, 'sigma': sigma} for C in C_range for sigma in sigma_range]
    
    best_params = grid_search(X_train, y_train, param_grid)
    # run active learning
    kernel = best_params['kernel']
    C = best_params['C']
    model = None

    if kernel == 'linear':
        model = active_learning(X_train, y_train, X, y, m, kernel, C)
    elif kernel == 'poly':
        degree = best_params['degree']
        model = active_learning(X_train, y_train, X, y, m, kernel, C, degree=degree)
    elif kernel == 'rbf':
        sigma = best_params['sigma']
        model = active_learning(X_train, y_train, X, y, m, kernel, C, sigma=sigma)

    # Applying the created function
    y_pred = ova_svm_predict(model, X_test)

    # Checking the accuracy
    a = accu_scr(y_test, y_pred)
    # keeping the value in an array
    arr.append(a)

    print("Accuracy for testcase", g + 1, "is:", a * 100, "%")

    # calculating mean accuracy
    counter = counter + a

overall_mean = counter / 15
print("Overall mean accuracy is:", overall_mean * 100)


Specify the size of training dataset: 12
Specify the size for active learning: 15
Specify the size of testing dataset: 145
Accuracy for testcase 1 is: 81.37931034482759 %
Accuracy for testcase 2 is: 95.17241379310344 %
Accuracy for testcase 3 is: 91.72413793103448 %
Accuracy for testcase 4 is: 96.55172413793103 %
Accuracy for testcase 5 is: 95.86206896551724 %
Accuracy for testcase 6 is: 79.3103448275862 %
Accuracy for testcase 7 is: 97.24137931034483 %
Accuracy for testcase 8 is: 100.0 %
Accuracy for testcase 9 is: 92.41379310344827 %
Accuracy for testcase 10 is: 89.65517241379311 %
Accuracy for testcase 11 is: 94.48275862068965 %
Accuracy for testcase 12 is: 87.58620689655172 %
Accuracy for testcase 13 is: 90.3448275862069 %
Accuracy for testcase 14 is: 96.55172413793103 %
Accuracy for testcase 15 is: 90.3448275862069 %
Overall mean accuracy is: 91.90804597701148
