In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Enter the password here


In [2]:
password = '3me7vdo_'

In [3]:
from npz_loader import load_encrypted_npz

data = load_encrypted_npz(password)

In [4]:
print("Decrypted data:", data)

Decrypted data: {'X_train': array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8), 'y_train': array([1, 2, 1, ..., 2, 2, 1], dtype=uint8), 'X_test': array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)}


# Now start working


## **SVM class**

In [5]:
def shuffle_sample(train):
    tab = np.hstack((train[0], train[1].reshape(train[1].shape[0], 1)))
    np.random.shuffle(tab)
    X_train, y_train = np.hsplit(tab, [tab.shape[1] - 1])
    y_train = y_train.reshape(y_train.shape[0])
    return (X_train, y_train)
    
def split_sample(train, n_split=5):
    n_samples, n_features = train[0].shape
    shuffle_sample(train)
    M = []
    train_set = np.array_split(train[0], n_split)
    train_label = np.array_split(train[1], n_split)
    for i in range(n_split):
        M.append((train_set[i], train_label[i]))
    return (M)

In [6]:
class SVM:
    
    def __init__(self, learning_rate=0.001, lambda_param=0.001, iteration=100):
        self.lr = learning_rate
        self.lambda_param = lambda_param
        self.iteration = iteration
        self.w = None
        self.b = None

    def fit(self, X, y, splits=5):
        self.Kfold_train(X, y, splits)
        
    def std_train(self, X, y):
        y_ = np.where(y == 1, 1, -1)
        for _ in range(self.iteration):
            for i, x_i in enumerate(X):
                condition = y_[i] * (np.dot(x_i, self.w) + self.b) >= 1
                if condition:
                    self.w -= self.lr * (2 * self.lambda_param * self.w)
                else:
                    self.w -= self.lr * (2 * self.lambda_param * self.w - np.dot(x_i, y_[i]))
                    self.b -= self.lr * y_[i]

    def prediction(self, X):
        ret = np.dot(X, self.w) + self.b
        if (ret < 0):
            return (2)
        else:
            return (1)

    def F1_scoring(self, data_validation):
        n_samples, n_features = data_validation[0].shape
        [F1, accuracy, precision, recall] = [0, 0, 0, 0]
        [TP, FP, FN, TN] = [0, 0, 0, 0]
        i = 0
    
        while (i < n_samples):
            val = self.prediction(data_validation[0][i])
            diff = val - int(data_validation[1][i])
            if val == 1:
                if diff == 0:
                    TP += 1
                else:
                    FP += 1
            else:
                if diff == 0:
                    TN += 1
                else:
                    FN += 1
            i += 1
    
        precision = TP / (TP + FP) * 100
        recall = TP / (TP + FN) * 100
        accuracy = (TP + TN) / n_samples * 100
        F1 = (2 * precision * recall) / (precision + recall)
        return ([F1, accuracy, precision, recall])
    
    def Kfold_train(self, X_train, y_train, split=5):
        n_samples, n_features = X_train.shape
        self.w = np.random.randn(n_features)
        self.b = 0
        score = None
        high_score = None
        train = (X_train, y_train)
        train = shuffle_sample(train)
        if (split <= 1):
            split = 2
        train = split_sample(train, split)
        i = 0
        while i < len(train) - 1:
            j = 0
            while j < len(train) - 1:
                if i != j:
                    self.std_train(train[j][0], train[j][1])
                w = self.w
                b = self.b
                j += 1
            score = self.F1_scoring(train[i])
            i += 1
            if (high_score == None) or (score[0] > high_score[0]):
                parametre = (w, b)
                high_score = score
        self.w = parametre[0]
        self.b = parametre[1]

## TEST

In [9]:
C = SVM()
C.fit(data["X_train"], data["y_train"], 5)

In [10]:
train = (data["X_train"], data["y_train"])
train = shuffle_sample(train)
C.F1_scoring(train)

[99.18227772821885, 99.13385826771653, 99.41877794336811, 98.94690002966479]