In [1]:
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier
import pandas as pd
import numpy as np

# Q2.2.1 Split training set and test set

In [2]:
dataset = pd.read_excel('Classification iris.xlsx')

class_names = dataset['class'].unique().tolist()
X = dataset.to_numpy()[:, 1:5]
y = dataset.to_numpy()[:, -1]

In [3]:
X_train, y_train = [], []
X_test, y_test = [], []
instance_ids_train, instance_ids_test = [], []

for i, name in enumerate(class_names):
    mask = (y == name)
    n_train = int(0.7 * mask.sum())
    
    X_train.append(X[mask][:n_train])
    y_train.append(y[mask][:n_train])

    X_test.append(X[mask][n_train:])
    y_test.append(y[mask][n_train:])

    instance_ids_train.extend((np.where(mask)[0][:n_train] + 1).tolist())
    instance_ids_test.extend((np.where(mask)[0][n_train:] + 1).tolist())

X_train = np.concatenate(X_train, axis=0)
y_train = np.concatenate(y_train, axis=0)
X_test = np.concatenate(X_test, axis=0)
y_test = np.concatenate(y_test, axis=0)

In [4]:
answer = f"""
Q2.2.1 Split training set and test set:
Training set: {instance_ids_train}

Test set: {instance_ids_test}
""".strip()

print(answer)

Q2.2.1 Split training set and test set:
Training set: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135]

Test set: [36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150]


# Q2.2.2 - Q2.2.4

## First Type of Solutions, Three Binary SVC

### Q2.2.2 Calculation using Standard SVM Model (Linear Kernel)

In [5]:
classifiers = []
for name in class_names:
    cur_y_train = np.where(y_train == name, 1, -1)
    classifier = SVC(kernel='linear', C=1e5)
    classifier.fit(X_train, cur_y_train)

    classifiers.append(classifier)

In [6]:
def predict(classifiers, class_names, X):
    preds = np.stack([clf.decision_function(X) for clf in classifiers], axis=-1)
    # preds2 = np.concatenate([X @ clf.coef_.T + clf.intercept_ for clf in classifiers], axis=-1)
    # assert np.allclose(preds.astype(np.float32), preds2.astype(np.float32))
    preds = preds.argmax(1)

    preds = np.array([class_names[p] for p in preds])
    return preds

In [7]:
# total training error
y_pred = predict(classifiers, class_names, X_train)
total_train_error = (y_pred != y_train).mean()

# total testing error
y_pred = predict(classifiers, class_names, X_test)
total_test_error = (y_pred != y_test).mean()

answer = f"""
Q2.2.2 Calculation using Standard SVM Model:
total training error: {total_train_error}, total testing error: {total_test_error},
""".strip()

In [8]:
class_answer_template = """
class {class_names}:
training error (multi-class): {training_error}, testing error (multi-class): {testing_error},
training error (binary): {training_error2}, testing error (binary): {testing_error2},
w: {w}, b: {b},
support vector indices (all): {sv_indices}
support vector indices (class_only): {sv_indices2}
""".strip()

linear_separable_classes = []
for i, name in enumerate(class_names):
    classifier = classifiers[i]
    
    # training error for each class
    mask_train = (y_train == name)
    class_X_train, class_y_train = X_train[mask_train], y_train[mask_train]
    ## multi-class classification, calculate error of the target class
    y_pred = predict(classifiers, class_names, class_X_train)
    training_error = (y_pred != class_y_train).mean()

    ## binary classification, calculate error of the single binary classifier, w.r.t. all samples
    y_pred = classifier.predict(X_train)
    cur_y_train = np.where(y_train == name, 1, -1)
    training_error2 = (y_pred != cur_y_train).mean()

    if training_error == 0:
        linear_separable_classes.append(name)

    # testing error for each class
    mask_test = (y_test == name)
    class_X_test, class_y_test = X_test[mask_test], y_test[mask_test]
    ## multi-class classification, calculate error of the target class
    y_pred = predict(classifiers, class_names, class_X_test)
    testing_error = (y_pred != class_y_test).mean()

    ## binary classification, calculate error of the single binary classifier, w.r.t. all samples
    y_pred = classifier.predict(X_test)
    cur_y_test = np.where(y_test == name, 1, -1)
    testing_error2 = (y_pred != cur_y_test).mean()

    # w and b
    w = classifier.coef_[0]
    b = classifier.intercept_

    # support vector indices
    support_vectors = classifier.support_vectors_
    ## all support vectors of this binary classifier
    data_mask = (X_train[:, None] == support_vectors[None]).all(2).any(1)
    sv_indices = np.where(data_mask)[0]

    ## class-specific support vectors
    sv_mask = (support_vectors[:, None] == class_X_train[None]).all(2).any(1)
    class_support_vectors = support_vectors[sv_mask]
    data_mask = (X_train[:, None] == class_support_vectors[None]).all(2).any(1)
    sv_indices2 = np.where(data_mask)[0]

    answer += ("\n\n" + class_answer_template.format(class_names=name, training_error=training_error, testing_error=testing_error, training_error2=training_error2, testing_error2=testing_error2, w=w.tolist(), b=b.tolist(), sv_indices=sv_indices.tolist(), sv_indices2=sv_indices2.tolist()))

answer += ("\n\n" + f"Linear separable classes: {linear_separable_classes}")
  

In [9]:
print(answer)

Q2.2.2 Calculation using Standard SVM Model:
total training error: 0.05714285714285714, total testing error: 0.08888888888888889,

class Iris-setosa:
training error (multi-class): 0.0, testing error (multi-class): 0.06666666666666667,
training error (binary): 0.0, testing error (binary): 0.0,
w: [0.00973271083680672, 0.5377790363135159, -0.8273513712498684, -0.3820427629025541], b: [0.7734548983801744],
support vector indices (all): [23, 24, 42]
support vector indices (class_only): [23, 24]

class Iris-versicolor:
training error (multi-class): 0.14285714285714285, testing error (multi-class): 0.2,
training error (binary): 0.24761904761904763, testing error (binary): 0.28888888888888886,
w: [1.8485997680464266, -4.502373898401856, -1.1043392983847298, 0.3212849929695949], b: [5.677304235095876],
support vector indices (all): [1, 2, 8, 9, 13, 25, 27, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 52, 55, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 71, 72, 73, 77, 78, 80, 81,

### Q2.2.3 Calculation using SVM with Slack Variables (C = 0.25 × t, where t = 1, . . . , 4)

In [10]:
C_list = [0.25 * i for i in range(1, 5)]

In [11]:
initial_answer_template = """
-------------------------------------------
C={C},
total training error: {total_train_error}, total testing error: {total_test_error},
""".strip()


class_answer_template = """
class {class_names}:
training error (multi-class): {training_error}, testing error (multi-class): {testing_error},
training error (binary): {training_error2}, testing error (binary): {testing_error2},
w: {w}, b: {b},
support vector indices (all): {sv_indices}
slack variable (all): {slack_variables}
support vector indices (class_only): {sv_indices2}
slack variable (class_only): {slack_variables2}
""".strip()


def get_answer_with_C_linear(C):
    classifiers = []
    for name in class_names:
        cur_y_train = np.where(y_train == name, 1, -1)
        classifier = SVC(kernel='linear', C=C)
        classifier.fit(X_train, cur_y_train)

        classifiers.append(classifier)

    # total training error
    y_pred = predict(classifiers, class_names, X_train)
    total_train_error = (y_pred != y_train).mean()

    # total testing error
    y_pred = predict(classifiers, class_names, X_test)
    total_test_error = (y_pred != y_test).mean()

    answer = initial_answer_template.format(C=C, total_train_error=total_train_error, total_test_error=total_test_error)

    for i, name in enumerate(class_names):
        classifier = classifiers[i]
        
        # training error for each class
        mask_train = (y_train == name)
        class_X_train, class_y_train = X_train[mask_train], y_train[mask_train]
        ## multi-class classification, calculate error of the target class
        y_pred = predict(classifiers, class_names, class_X_train)
        training_error = (y_pred != class_y_train).mean()

        ## binary classification, calculate error of the single binary classifier
        y_pred = classifier.predict(X_train)
        cur_y_train = np.where(y_train == name, 1, -1)
        training_error2 = (y_pred != cur_y_train).mean()

        # testing error for each class
        mask_test = (y_test == name)
        class_X_test, class_y_test = X_test[mask_test], y_test[mask_test]
        ## multi-class classification, calculate error of the target class
        y_pred = predict(classifiers, class_names, class_X_test)
        testing_error = (y_pred != class_y_test).mean()

        ## binary classification, calculate error of the single binary classifier
        y_pred = classifier.predict(X_test)
        cur_y_test = np.where(y_test == name, 1, -1)
        testing_error2 = (y_pred != cur_y_test).mean()

        # w and b
        w = classifier.coef_[0]
        b = classifier.intercept_

        # support vector indices and slack variables
        support_vectors = classifier.support_vectors_
        ## all support vectors and slack variables of this binary classifier
        data_mask = (X_train[:, None] == support_vectors[None]).all(2).any(1)
        sv_indices = np.where(data_mask)[0]
        slack_variables = 1 - support_vectors @ w + b
        slack_variables = np.where(slack_variables > 0, slack_variables, 0)

        ## class-specific support vectors and slack variables
        sv_mask = (support_vectors[:, None] == class_X_train[None]).all(2).any(1)
        class_support_vectors = support_vectors[sv_mask]
        data_mask = (X_train[:, None] == class_support_vectors[None]).all(2).any(1)
        sv_indices2 = np.where(data_mask)[0]
        slack_variables2 = 1 - class_support_vectors @ w + b
        slack_variables2 = np.where(slack_variables2 > 0, slack_variables2, 0)

        answer += ("\n\n" + class_answer_template.format(class_names=name, training_error=training_error, testing_error=testing_error, training_error2=training_error2, testing_error2=testing_error2, w=w.tolist(), b=b.tolist(), sv_indices=sv_indices.tolist(), slack_variables=slack_variables.tolist(), sv_indices2=sv_indices2.tolist(), slack_variables2=slack_variables2.tolist()))

    return answer
    

In [12]:
answer_list = [get_answer_with_C_linear(C) for C in C_list]

answer = '\n'.join(answer_list)
print(answer)

-------------------------------------------
C=0.25,
total training error: 0.08571428571428572, total testing error: 0.08888888888888889,

class Iris-setosa:
training error (multi-class): 0.0, testing error (multi-class): 0.0,
training error (binary): 0.0, testing error (binary): 0.0,
w: [-0.1677418640327492, 0.4185483235846658, -0.7870966471693318, -0.3185483235846659], b: [1.9412904435714382],
support vector indices (all): [23, 24, 42, 64]
slack variable (all): [5.674676859972172, 5.882580015915305, 3.912902944289259, 3.882580385079434]
support vector indices (class_only): [23, 24]
slack variable (class_only): [3.912902944289259, 3.882580385079434]

class Iris-versicolor:
training error (multi-class): 0.2571428571428571, testing error (multi-class): 0.26666666666666666,
training error (binary): 0.3333333333333333, testing error (binary): 0.3333333333333333,
w: [0.17238571726129104, -0.7815740256164891, -0.035646697459622345, -0.1715862851470794], b: [0.7823192614015012],
support vecto

### Q2.2.4 Calculation using SVM with Kernel Functions

In [13]:
initial_answer_template = """
total training error: {total_train_error}, total testing error: {total_test_error},
""".strip()


class_answer_template = """
class {class_names}:
training error (multi-class): {training_error}, testing error (multi-class): {testing_error},
training error (binary): {training_error2}, testing error (binary): {testing_error2},
w: {w}, b: {b},
support vector indices (all): {sv_indices}
support vector indices (class_only): {sv_indices2}
""".strip()


def get_answer_with_kernel(**kwagrs):
    classifiers = []
    for name in class_names:
        cur_y_train = np.where(y_train == name, 1, -1)
        classifier = SVC(**kwagrs)
        classifier.fit(X_train, cur_y_train)

        classifiers.append(classifier)

    # total training error
    y_pred = predict(classifiers, class_names, X_train)
    total_train_error = (y_pred != y_train).mean()

    # total testing error
    y_pred = predict(classifiers, class_names, X_test)
    total_test_error = (y_pred != y_test).mean()

    answer = initial_answer_template.format(total_train_error=total_train_error, total_test_error=total_test_error)

    for i, name in enumerate(class_names):
        classifier = classifiers[i]
        
        # training error for each class
        mask_train = (y_train == name)
        class_X_train, class_y_train = X_train[mask_train], y_train[mask_train]
        ## multi-class classification, calculate error of the target class
        y_pred = predict(classifiers, class_names, class_X_train)
        training_error = (y_pred != class_y_train).mean()

        ## binary classification, calculate error of the single binary classifier
        y_pred = classifier.predict(X_train)
        cur_y_train = np.where(y_train == name, 1, -1)
        training_error2 = (y_pred != cur_y_train).mean()

        # testing error for each class
        mask_test = (y_test == name)
        class_X_test, class_y_test = X_test[mask_test], y_test[mask_test]
        ## multi-class classification, calculate error of the target class
        y_pred = predict(classifiers, class_names, class_X_test)
        testing_error = (y_pred != class_y_test).mean()

        ## binary classification, calculate error of the single binary classifier
        y_pred = classifier.predict(X_test)
        cur_y_test = np.where(y_test == name, 1, -1)
        testing_error2 = (y_pred != cur_y_test).mean()

        # w and b
        try:
            w = classifier.coef_[0]
        except:
            w = ""
        b = classifier.intercept_

        # support vector indices and slack variables
        support_vectors = classifier.support_vectors_
        ## all support vectors of this binary classifier
        data_mask = (X_train[:, None] == support_vectors[None]).all(2).any(1)
        sv_indices = np.where(data_mask)[0]

        ## class-specific support vectors
        sv_mask = (support_vectors[:, None] == class_X_train[None]).all(2).any(1)
        class_support_vectors = support_vectors[sv_mask]
        data_mask = (X_train[:, None] == class_support_vectors[None]).all(2).any(1)
        sv_indices2 = np.where(data_mask)[0]

        answer += ("\n\n" + class_answer_template.format(class_names=name, training_error=training_error, testing_error=testing_error, training_error2=training_error2, testing_error2=testing_error2, w=w, b=b.tolist(), sv_indices=sv_indices.tolist(), sv_indices2=sv_indices2.tolist()))

    return answer
    

In [14]:
answer = (
    "Q2.2.4 Calculation using SVM with Kernel Functions:\n"
    "-------------------------------------------\n"
    "(a) 2nd-order Polynomial Kernel,\n"
    f"{get_answer_with_kernel(kernel='poly', degree=2, C=1e5)}\n"
    "-------------------------------------------\n"
    "(b) 3nd-order Polynomial Kernel,\n"
    f"{get_answer_with_kernel(kernel='poly', degree=3, C=1e5)}\n"
    "-------------------------------------------\n"
    "(c) Radial Basis Function Kernel with σ = 1,\n"
    f"{get_answer_with_kernel(kernel='rbf', gamma=1, C=1e5)}\n"
    "-------------------------------------------\n"
    "(d) Sigmoidal Kernel with σ = 1,\n"
    f"{get_answer_with_kernel(kernel='sigmoid', gamma=1, C=1e5)}\n"

)

print(answer)

Q2.2.4 Calculation using SVM with Kernel Functions:
-------------------------------------------
(a) 2nd-order Polynomial Kernel,
total training error: 0.009523809523809525, total testing error: 0.022222222222222223,

class Iris-setosa:
training error (multi-class): 0.0, testing error (multi-class): 0.06666666666666667,
training error (binary): 0.0, testing error (binary): 0.0,
w: , b: [0.7989027877076639],
support vector indices (all): [23, 24, 42]
support vector indices (class_only): [23, 24]

class Iris-versicolor:
training error (multi-class): 0.02857142857142857, testing error (multi-class): 0.0,
training error (binary): 0.009523809523809525, testing error (binary): 0.022222222222222223,
w: , b: [3.3773888193804047],
support vector indices (all): [31, 55, 58, 62, 68, 76, 89, 97, 101, 103, 104]
support vector indices (class_only): [55, 58, 62, 68]

class Iris-virginica:
training error (multi-class): 0.0, testing error (multi-class): 0.0,
training error (binary): 0.009523809523809525

## Second Type of Solutions, OneVsRestClassifier

### Q2.2.2 Calculation using Standard SVM Model (Linear Kernel)

In [15]:
classifier = OneVsRestClassifier(SVC(kernel='linear', C=1e5))
classifier.fit(X_train, y_train)

In [16]:
# total training error
y_pred = classifier.predict(X_train)
total_train_error = (y_pred != y_train).mean()

# total testing error
y_pred = classifier.predict(X_test)
total_test_error = (y_pred != y_test).mean()

answer = f"""
Q2.2.2 Calculation using Standard SVM Model:
total training error: {total_train_error}, total testing error: {total_test_error},
""".strip()

In [17]:
class_answer_template = """
class {class_names}:
training error (multi-class): {training_error}, testing error (multi-class): {testing_error},
training error (binary): {training_error2}, testing error (binary): {testing_error2},
w: {w}, b: {b},
support vector indices (all): {sv_indices}
support vector indices (class_only): {sv_indices2}
""".strip()

linear_separable_classes = []
for i, name in enumerate(class_names):
    clf = classifier.estimators_[i]
    # training error for each class
    mask_train = (y_train == name)
    class_X_train, class_y_train = X_train[mask_train], y_train[mask_train]
    ## multi-class classification, calculate error of the target class
    y_pred = classifier.predict(class_X_train)
    training_error = (y_pred != class_y_train).mean()

    ## binary classification, calculate error of the single binary classifier
    y_pred = clf.predict(X_train)
    cur_y_train = np.where(y_train == name, 1, 0)
    training_error2 = (y_pred != cur_y_train).mean()

    if training_error == 0:
        linear_separable_classes.append(name)

    # testing error for each class
    mask_test = (y_test == name)
    class_X_test, class_y_test = X_test[mask_test], y_test[mask_test]
    ## multi-class classification, calculate error of the target class
    y_pred = classifier.predict(class_X_test)
    testing_error = (y_pred != class_y_test).mean()

    ## binary classification, calculate error of the single binary classifier
    y_pred = clf.predict(X_test)
    cur_y_test = np.where(y_test == name, 1, 0)
    testing_error2 = (y_pred != cur_y_test).mean()

    # w and b
    w = clf.coef_[0]
    b = clf.intercept_

    # support vector indices
    support_vectors = clf.support_vectors_
    ## all support vectors of this binary classifier
    data_mask = (X_train[:, None] == support_vectors[None]).all(2).any(1)
    sv_indices = np.where(data_mask)[0]

    ## class-specific support vectors
    sv_mask = (support_vectors[:, None] == class_X_train[None]).all(2).any(1)
    class_support_vectors = support_vectors[sv_mask]
    data_mask = (X_train[:, None] == class_support_vectors[None]).all(2).any(1)
    sv_indices2 = np.where(data_mask)[0]

    answer += ("\n\n" + class_answer_template.format(class_names=name, training_error=training_error, testing_error=testing_error, training_error2=training_error2, testing_error2=testing_error2, w=w.tolist(), b=b.tolist(), sv_indices=sv_indices.tolist(), sv_indices2=sv_indices2.tolist()))

answer += ("\n\n" + f"Linear separable classes: {linear_separable_classes}")
  

In [18]:
print(answer)

Q2.2.2 Calculation using Standard SVM Model:
total training error: 0.05714285714285714, total testing error: 0.08888888888888889,

class Iris-setosa:
training error (multi-class): 0.0, testing error (multi-class): 0.06666666666666667,
training error (binary): 0.0, testing error (binary): 0.0,
w: [0.00973271083680672, 0.5377790363135159, -0.8273513712498684, -0.3820427629025541], b: [0.7734548983801744],
support vector indices (all): [23, 24, 42]
support vector indices (class_only): [23, 24]

class Iris-versicolor:
training error (multi-class): 0.14285714285714285, testing error (multi-class): 0.2,
training error (binary): 0.24761904761904763, testing error (binary): 0.28888888888888886,
w: [1.8485997680464266, -4.502373898401856, -1.1043392983847298, 0.3212849929695949], b: [5.677304235095876],
support vector indices (all): [1, 2, 8, 9, 13, 25, 27, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 52, 55, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 71, 72, 73, 77, 78, 80, 81,

### Q2.2.3 Calculation using SVM with Slack Variables (C = 0.25 × t, where t = 1, . . . , 4)

In [19]:
C_list = [0.25 * i for i in range(1, 5)]

In [20]:
initial_answer_template = """
-------------------------------------------
C={C},
total training error: {total_train_error}, total testing error: {total_test_error},
""".strip()


class_answer_template = """
class {class_names}:
training error (multi-class): {training_error}, testing error (multi-class): {testing_error},
training error (binary): {training_error2}, testing error (binary): {testing_error2},
w: {w}, b: {b},
support vector indices (all): {sv_indices}
slack variable (all): {slack_variables}
support vector indices (class_only): {sv_indices2}
slack variable (class_only): {slack_variables2}
""".strip()


def get_answer_with_C_linear(C):
    classifier = OneVsRestClassifier(SVC(kernel='linear', C=C))
    classifier.fit(X_train, y_train)

    # total training error
    y_pred = classifier.predict(X_train)
    total_train_error = (y_pred != y_train).mean()

    # total testing error
    y_pred = classifier.predict(X_test)
    total_test_error = (y_pred != y_test).mean()

    answer = initial_answer_template.format(C=C, total_train_error=total_train_error, total_test_error=total_test_error)

    for i, name in enumerate(class_names):
        clf = classifier.estimators_[i]
        
        # training error for each class
        mask_train = (y_train == name)
        class_X_train, class_y_train = X_train[mask_train], y_train[mask_train]
        ## multi-class classification, calculate error of the target class
        y_pred = classifier.predict(class_X_train)
        training_error = (y_pred != class_y_train).mean()

        ## binary classification, calculate error of the single binary classifier
        y_pred = clf.predict(X_train)
        cur_y_train = np.where(y_train == name, 1, 0)
        training_error2 = (y_pred != cur_y_train).mean()

        # testing error for each class
        mask_test = (y_test == name)
        class_X_test, class_y_test = X_test[mask_test], y_test[mask_test]
        ## multi-class classification, calculate error of the target class
        y_pred = classifier.predict(class_X_test)
        testing_error = (y_pred != class_y_test).mean()

        ## binary classification, calculate error of the single binary classifier
        y_pred = clf.predict(X_test)
        cur_y_test = np.where(y_test == name, 1, 0)
        testing_error2 = (y_pred != cur_y_test).mean()

        # w and b
        w = clf.coef_[0]
        b = clf.intercept_

        # support vector indices and slack variables
        support_vectors = clf.support_vectors_
        ## all support vectors and slack variables of this binary classifier
        data_mask = (X_train[:, None] == support_vectors[None]).all(2).any(1)
        sv_indices = np.where(data_mask)[0]
        slack_variables = 1 - support_vectors @ w + b
        slack_variables = np.where(slack_variables > 0, slack_variables, 0)

        ## class-specific support vectors and slack variables
        sv_mask = (support_vectors[:, None] == class_X_train[None]).all(2).any(1)
        class_support_vectors = support_vectors[sv_mask]
        data_mask = (X_train[:, None] == class_support_vectors[None]).all(2).any(1)
        sv_indices2 = np.where(data_mask)[0]
        slack_variables2 = 1 - class_support_vectors @ w + b
        slack_variables2 = np.where(slack_variables2 > 0, slack_variables2, 0)

        answer += ("\n\n" + class_answer_template.format(class_names=name, training_error=training_error, testing_error=testing_error, training_error2=training_error2, testing_error2=testing_error2, w=w.tolist(), b=b.tolist(), sv_indices=sv_indices.tolist(), slack_variables=slack_variables.tolist(), sv_indices2=sv_indices2.tolist(), slack_variables2=slack_variables2.tolist()))

    return answer
    

In [21]:
answer_list = [get_answer_with_C_linear(C) for C in C_list]

answer = '\n'.join(answer_list)
print(answer)

-------------------------------------------
C=0.25,
total training error: 0.08571428571428572, total testing error: 0.08888888888888889,

class Iris-setosa:
training error (multi-class): 0.0, testing error (multi-class): 0.0,
training error (binary): 0.0, testing error (binary): 0.0,
w: [-0.1677418640327492, 0.4185483235846658, -0.7870966471693318, -0.3185483235846659], b: [1.9412904435714382],
support vector indices (all): [23, 24, 42, 64]
slack variable (all): [5.674676859972172, 5.882580015915305, 3.912902944289259, 3.882580385079434]
support vector indices (class_only): [23, 24]
slack variable (class_only): [3.912902944289259, 3.882580385079434]

class Iris-versicolor:
training error (multi-class): 0.2571428571428571, testing error (multi-class): 0.26666666666666666,
training error (binary): 0.3333333333333333, testing error (binary): 0.3333333333333333,
w: [0.17238571726129104, -0.7815740256164891, -0.035646697459622345, -0.1715862851470794], b: [0.7823192614015012],
support vecto

### Q2.2.4 Calculation using SVM with Kernel Functions

In [22]:
initial_answer_template = """
total training error: {total_train_error}, total testing error: {total_test_error},
""".strip()


class_answer_template = """
class {class_names}:
training error (multi-class): {training_error}, testing error (multi-class): {testing_error},
training error (binary): {training_error2}, testing error (binary): {testing_error2},
w: {w}, b: {b},
support vector indices (all): {sv_indices}
support vector indices (class_only): {sv_indices2}
""".strip()


def get_answer_with_kernel(**kwagrs):
    classifier = OneVsRestClassifier(SVC(**kwagrs))
    classifier.fit(X_train, y_train)

    # total training error
    y_pred = classifier.predict(X_train)
    total_train_error = (y_pred != y_train).mean()

    # total testing error
    y_pred = classifier.predict(X_test)
    total_test_error = (y_pred != y_test).mean()

    answer = initial_answer_template.format(total_train_error=total_train_error, total_test_error=total_test_error)

    for i, name in enumerate(class_names):
        clf = classifier.estimators_[i]
        
        # training error for each class
        mask_train = (y_train == name)
        class_X_train, class_y_train = X_train[mask_train], y_train[mask_train]
        ## multi-class classification, calculate error of the target class
        y_pred = classifier.predict(class_X_train)
        training_error = (y_pred != class_y_train).mean()

        ## binary classification, calculate error of the single binary classifier
        y_pred = clf.predict(X_train)
        cur_y_train = np.where(y_train == name, 1, 0)
        training_error2 = (y_pred != cur_y_train).mean()

        # testing error for each class
        mask_test = (y_test == name)
        class_X_test, class_y_test = X_test[mask_test], y_test[mask_test]
        ## multi-class classification, calculate error of the target class
        y_pred = classifier.predict(class_X_test)
        testing_error = (y_pred != class_y_test).mean()

        ## binary classification, calculate error of the single binary classifier
        y_pred = clf.predict(X_test)
        cur_y_test = np.where(y_test == name, 1, 0)
        testing_error2 = (y_pred != cur_y_test).mean()

        # w and b
        try:
            w = clf.coef_[0]
        except:
            w = ""
        b = clf.intercept_

        # support vector indices and slack variables
        support_vectors = clf.support_vectors_
        ## all support vectors of this binary classifier
        data_mask = (X_train[:, None] == support_vectors[None]).all(2).any(1)
        sv_indices = np.where(data_mask)[0]

        ## class-specific support vectors
        sv_mask = (support_vectors[:, None] == class_X_train[None]).all(2).any(1)
        class_support_vectors = support_vectors[sv_mask]
        data_mask = (X_train[:, None] == class_support_vectors[None]).all(2).any(1)
        sv_indices2 = np.where(data_mask)[0]

        answer += ("\n\n" + class_answer_template.format(class_names=name, training_error=training_error, testing_error=testing_error, training_error2=training_error2, testing_error2=testing_error2, w=w, b=b.tolist(), sv_indices=sv_indices.tolist(), sv_indices2=sv_indices2.tolist()))

    return answer
    

In [23]:
answer = (
    "Q2.2.4 Calculation using SVM with Kernel Functions:\n"
    "-------------------------------------------\n"
    "(a) 2nd-order Polynomial Kernel,\n"
    f"{get_answer_with_kernel(kernel='poly', degree=2, C=1e5)}\n"
    "-------------------------------------------\n"
    "(b) 3nd-order Polynomial Kernel,\n"
    f"{get_answer_with_kernel(kernel='poly', degree=3, C=1e5)}\n"
    "-------------------------------------------\n"
    "(c) Radial Basis Function Kernel with σ = 1,\n"
    f"{get_answer_with_kernel(kernel='rbf', gamma=1, C=1e5)}\n"
    "-------------------------------------------\n"
    "(d) Sigmoidal Kernel with σ = 1,\n"
    f"{get_answer_with_kernel(kernel='sigmoid', gamma=1, C=1e5)}\n"

)

print(answer)

Q2.2.4 Calculation using SVM with Kernel Functions:
-------------------------------------------
(a) 2nd-order Polynomial Kernel,
total training error: 0.009523809523809525, total testing error: 0.022222222222222223,

class Iris-setosa:
training error (multi-class): 0.0, testing error (multi-class): 0.06666666666666667,
training error (binary): 0.0, testing error (binary): 0.0,
w: , b: [0.7989027877076639],
support vector indices (all): [23, 24, 42]
support vector indices (class_only): [23, 24]

class Iris-versicolor:
training error (multi-class): 0.02857142857142857, testing error (multi-class): 0.0,
training error (binary): 0.009523809523809525, testing error (binary): 0.022222222222222223,
w: , b: [3.3773888193804047],
support vector indices (all): [31, 55, 58, 62, 68, 76, 89, 97, 101, 103, 104]
support vector indices (class_only): [55, 58, 62, 68]

class Iris-virginica:
training error (multi-class): 0.0, testing error (multi-class): 0.0,
training error (binary): 0.009523809523809525

## Last Type of Solutions, `decision_function_shape='ovr'`

### Q2.2.2 Calculation using Standard SVM Model (Linear Kernel)

In [24]:
classifier = SVC(kernel='linear', C=1e5, decision_function_shape='ovr')
classifier.fit(X_train, y_train)

In [25]:
# total training error
y_pred = classifier.predict(X_train)
total_train_error = (y_pred != y_train).mean()

# total testing error
y_pred = classifier.predict(X_test)
total_test_error = (y_pred != y_test).mean()

answer = f"""
Q2.2.2 Calculation using Standard SVM Model:
total training error: {total_train_error}, total testing error: {total_test_error},
""".strip()

In [26]:
class_answer_template = """
class {class_names}:
training error: {training_error}, testing error: {testing_error},
w: {w}, b: {b},
support vector indices: {sv_indices}
""".strip()

linear_separable_classes = []
for i, name in enumerate(class_names):
    # training error for each class
    mask_train = (y_train == name)
    class_X_train, class_y_train = X_train[mask_train], y_train[mask_train]
    y_pred = classifier.predict(class_X_train)
    # y_pred2 = classifier.decision_function(class_X_train).argmax(-1)
    # y_pred2 = np.array([class_names[p] for p in y_pred2])
    # assert np.all(y_pred == y_pred2)
    training_error = (y_pred != class_y_train).mean()
    if training_error == 0:
        linear_separable_classes.append(name)

    # testing error for each class
    mask_test = (y_test == name)
    class_X_test, class_y_test = X_test[mask_test], y_test[mask_test]
    y_pred = classifier.predict(class_X_test)
    # y_pred2 = classifier.decision_function(class_X_test).argmax(-1)
    # y_pred2 = np.array([class_names[p] for p in y_pred2])
    # assert np.all(y_pred == y_pred2)
    testing_error = (y_pred != class_y_test).mean()

    # w and b
    w = classifier.coef_[i]
    b = classifier.intercept_[i]

    # support vector indices
    support_vectors = classifier.support_vectors_
    sv_mask = (support_vectors[:, None] == class_X_train[None]).all(2).any(1)
    class_support_vectors = support_vectors[sv_mask]
    data_mask = (X_train[:, None] == class_support_vectors[None]).all(2).any(1)
    sv_indices = np.where(data_mask)[0]

    answer += ("\n\n" + class_answer_template.format(class_names=name, training_error=training_error, testing_error=testing_error, w=w.tolist(), b=b.tolist(), sv_indices=sv_indices.tolist()))

answer += ("\n\n" + f"Linear separable classes: {linear_separable_classes}")
  

In [27]:
print(answer)

Q2.2.2 Calculation using Standard SVM Model:
total training error: 0.01904761904761905, total testing error: 0.0,

class Iris-setosa:
training error: 0.0, testing error: 0.0,
w: [0.009450205326566152, 0.5376531878710997, -0.826829215428676, -0.38216594579684937], b: 0.774099177589861,
support vector indices: [23, 24]

class Iris-versicolor:
training error: 0.02857142857142857, testing error: 0.0,
w: [-0.007082578342378354, 0.17885071534115204, -0.538323991547213, -0.29218157632449726], b: 1.5070188241897433,
support vector indices: [42, 55, 57, 62, 68]

class Iris-virginica:
training error: 0.02857142857142857, testing error: 0.0,
w: [3.6465033842250705, 5.176363995415159, -7.428525380557403, -11.002415827650111], b: 17.570392115365593,
support vector indices: [76, 96, 97, 99, 103]

Linear separable classes: ['Iris-setosa']


### Q2.2.3 Calculation using SVM with Slack Variables (C = 0.25 × t, where t = 1, . . . , 4)

In [28]:
C_list = [0.25 * i for i in range(1, 5)]

In [29]:
initial_answer_template = """
-------------------------------------------
C={C},
total training error: {total_train_error}, total testing error: {total_test_error},
""".strip()


class_answer_template = """
class {class_names}:
training error: {training_error}, testing error: {testing_error},
w: {w}, b: {b},
support vector indices: {sv_indices}
slack variable: {slack_variables}
""".strip()


def get_answer_with_C_linear(C):
    classifier = SVC(kernel='linear', C=C, decision_function_shape='ovr')
    classifier.fit(X_train, y_train)

    # total training error
    y_pred = classifier.predict(X_train)
    total_train_error = (y_pred != y_train).mean()

    # total testing error
    y_pred = classifier.predict(X_test)
    total_test_error = (y_pred != y_test).mean()

    answer = initial_answer_template.format(C=C, total_train_error=total_train_error, total_test_error=total_test_error)

    for i, name in enumerate(class_names):
        # training error for each class
        mask_train = (y_train == name)
        class_X_train, class_y_train = X_train[mask_train], y_train[mask_train]
        y_pred = classifier.predict(class_X_train)
        training_error = (y_pred != class_y_train).mean()

        # testing error for each class
        mask_test = (y_test == name)
        class_X_test, class_y_test = X_test[mask_test], y_test[mask_test]
        y_pred = classifier.predict(class_X_test)
        testing_error = (y_pred != class_y_test).mean()

        # w and b
        w = classifier.coef_[i]
        b = classifier.intercept_[i]

        # support vector indices
        support_vectors = classifier.support_vectors_
        sv_mask = (support_vectors[:, None] == class_X_train[None]).all(2).any(1)
        class_support_vectors = support_vectors[sv_mask]
        data_mask = (X_train[:, None] == class_support_vectors[None]).all(2).any(1)
        sv_indices = np.where(data_mask)[0]

        # slack variables
        slack_variables = 1 - class_support_vectors @ w + b
        slack_variables = np.where(slack_variables > 0, slack_variables, 0)

        answer += ("\n\n" + class_answer_template.format(class_names=name, training_error=training_error, testing_error=testing_error, w=w.tolist(), b=b.tolist(), sv_indices=sv_indices.tolist(), slack_variables=slack_variables.tolist()))

    return answer
    

In [30]:
answer_list = [get_answer_with_C_linear(C) for C in C_list]

answer = '\n'.join(answer_list)
print(answer)

-------------------------------------------
C=0.25,
total training error: 0.047619047619047616, total testing error: 0.022222222222222223,

class Iris-setosa:
training error: 0.0, testing error: 0.0,
w: [-0.16774186403274927, 0.41854832358466565, -0.7870966471693315, -0.3185483235846657], b: 1.9412904435714369,
support vector indices: [23, 24]
slack variable: [3.9129029442892582, 3.882580385079433]

class Iris-versicolor:
training error: 0.02857142857142857, testing error: 0.0,
w: [-0.007082578342378354, 0.17885071534115204, -0.538323991547213, -0.29218157632449726], b: 1.5070188241897433,
support vector indices: [35, 37, 39, 40, 41, 42, 48, 51, 53, 55, 57, 58, 61, 62, 63, 64, 68, 69]
slack variable: [4.923451550620903, 5.077511320262673, 4.966836306063902, 4.848901528970379, 4.959044989512022, 4.181132489678933, 4.970732444715107, 4.870859443332811, 5.018189562611159, 5.086365744128806, 5.180572202461937, 4.9301812009843236, 5.0474077202436085, 5.206248590547933, 4.891577546203877, 4.

### Q2.2.4 Calculation using SVM with Kernel Functions

In [31]:
initial_answer_template = """
total training error: {total_train_error}, total testing error: {total_test_error},
""".strip()


class_answer_template = """
class {class_names}:
training error: {training_error}, testing error: {testing_error},
w: {w}, b: {b},
support vector indices: {sv_indices}
""".strip()


def get_answer_with_kernel(**kwagrs):
    classifier = SVC(decision_function_shape='ovr', **kwagrs)
    classifier.fit(X_train, y_train)

    # total training error
    y_pred = classifier.predict(X_train)
    total_train_error = (y_pred != y_train).mean()

    # total testing error
    y_pred = classifier.predict(X_test)
    total_test_error = (y_pred != y_test).mean()

    answer = initial_answer_template.format(total_train_error=total_train_error, total_test_error=total_test_error)

    for i, name in enumerate(class_names):
        # training error for each class
        mask_train = (y_train == name)
        class_X_train, class_y_train = X_train[mask_train], y_train[mask_train]
        y_pred = classifier.predict(class_X_train)
        training_error = (y_pred != class_y_train).mean()

        # testing error for each class
        mask_test = (y_test == name)
        class_X_test, class_y_test = X_test[mask_test], y_test[mask_test]
        y_pred = classifier.predict(class_X_test)
        testing_error = (y_pred != class_y_test).mean()

        # w and b
        try:
            w = classifier.coef_[i].tolist()
        except:
            w = ''
        b = classifier.intercept_[i]

        # support vector indices
        support_vectors = classifier.support_vectors_
        sv_mask = (support_vectors[:, None] == class_X_train[None]).all(2).any(1)
        class_support_vectors = support_vectors[sv_mask]
        data_mask = (X_train[:, None] == class_support_vectors[None]).all(2).any(1)
        sv_indices = np.where(data_mask)[0]

        answer += ("\n\n" + class_answer_template.format(class_names=name, training_error=training_error, testing_error=testing_error, w=w, b=b.tolist(), sv_indices=sv_indices.tolist()))

    return answer
    

In [32]:
answer = (
    "Q2.2.4 Calculation using SVM with Kernel Functions:\n"
    "-------------------------------------------\n"
    "(a) 2nd-order Polynomial Kernel,\n"
    f"{get_answer_with_kernel(kernel='poly', degree=2, C=1e5)}\n"
    "-------------------------------------------\n"
    "(b) 3nd-order Polynomial Kernel,\n"
    f"{get_answer_with_kernel(kernel='poly', degree=3, C=1e5)}\n"
    "-------------------------------------------\n"
    "(c) Radial Basis Function Kernel with σ = 1,\n"
    f"{get_answer_with_kernel(kernel='rbf', gamma=1, C=1e5)}\n"
    "-------------------------------------------\n"
    "(d) Sigmoidal Kernel with σ = 1,\n"
    f"{get_answer_with_kernel(kernel='sigmoid', gamma=1, C=1e5)}\n"

)

print(answer)

Q2.2.4 Calculation using SVM with Kernel Functions:
-------------------------------------------
(a) 2nd-order Polynomial Kernel,
total training error: 0.009523809523809525, total testing error: 0.0,

class Iris-setosa:
training error: 0.0, testing error: 0.0,
w: , b: 0.7983033321993324,
support vector indices: [23, 24]

class Iris-versicolor:
training error: 0.02857142857142857, testing error: 0.0,
w: , b: 1.3833559905589203,
support vector indices: [42, 55, 57, 58, 62, 68]

class Iris-virginica:
training error: 0.0, testing error: 0.0,
w: , b: 5.508177447555025,
support vector indices: [76, 89, 96, 97, 103]
-------------------------------------------
(b) 3nd-order Polynomial Kernel,
total training error: 0.0, total testing error: 0.0,

class Iris-setosa:
training error: 0.0, testing error: 0.0,
w: , b: 0.8181466825705307,
support vector indices: [23, 24]

class Iris-versicolor:
training error: 0.0, testing error: 0.0,
w: , b: 1.2931818892886409,
support vector indices: [42, 55, 57, 58