Assignment 1: Predict diabetes using Perceptron
Student: Pujan Maharjan (a1863495)
Course: Deep Learning Fundamentals

In [1]:
# install required libraries
# !pip install ipywidgets
# !pip install seaborn
# !pip install ipywidgets


In [2]:
import torch
from sklearn.datasets import load_svmlight_file
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pandas.plotting import scatter_matrix
sns.set()


In [None]:
# Understand Data
# filename = "diabetes.txt"
filename = "diabetes_scale.txt"
X, y = load_svmlight_file(filename)
X = X.toarray()
# reshape y from (768,) to (768,1)
y = y.reshape(-1, 1)
print('X : ', X.shape)
print('Y : ', y.shape)
all_data = np.append(X, y, axis=1)
print('all_data Shape ', all_data.shape)
columns = ['Pregnancies', 'Glucose', 'Blood Pressure', 'Skin Thickness', 'Insulin', 'BMI', 'Diabetes Pedigree', 'Age', 'Output']
df = pd.DataFrame(all_data, columns=columns)
df

In [None]:
desc_data = df.describe().T
desc_data

In [None]:
desc_data[["mean", "std", "min", "max"]]

In [None]:
df.info(verbose=True)

In [None]:
histogram = df.hist(figsize=(6,8))

In [None]:
print(df['Output'].value_counts())

In [None]:
scatter_plot = scatter_matrix(df, figsize=(15,15))


In [8]:
class Perceptron():
    def __init__(self, 
        file_path, 
        weights, 
        loss_function_name, 
        learning_rate, 
        epoch,
        add_bias = False) -> None:
        self.file_path = file_path
        self.weights = weights
        self.loss_function_name = loss_function_name
        self.learning_rate = learning_rate
        self.epoch = epoch
        self.add_bias = add_bias
        self.train_data = None
        self.epochs = []
        self.train_losses = []
        self.validation_losses = []        
        self.train_accuracies = []
        self.validation_accuracies = []
        self.train_validation_y_labels = ['Train', 'Validation']

    def get_features_labels_from_file_data(self):
        X, y = load_svmlight_file(self.file_path)
        # convert X from scipy.sparce.csr.csr_matrix to numpy array
        X = X.toarray()
        # reshape y from (768,) to (768,1)
        y = y.reshape(-1, 1)
        return X,y

    def predict(self, X):
        np_sign_values = np.sign(np.dot(X, self.weights))
        # numpy sign function returns -1, 0, 1,
        # thus for 1 and 0, return 1
        predictions_list = []
        for np_sign_value in np_sign_values:
            if np_sign_value >= 0:
                predictions_list.append(1)
            else:
                predictions_list.append(-1)

        return np.array(predictions_list).reshape(-1,1)

    def zero_one_loss(self, X, y):
        # print('zero one loss X ', X.shape, ', w , ',self.weights.shape)
        xw = np.dot(X, self.weights)
        losses = []
        # if correct prediction, then loss = 0, else loss = 1
        for i in range(len(y)):
            indicator = y[i] * xw[i]
            if indicator < 0:
                losses.append(1)
            else:
                losses.append(0)

        return np.array(losses).reshape(-1,1)

    def perceptron_loss(self, X, y):        
        x_w = np.dot(X, self.weights)
        p_loss_mat = y * x_w
        
        #multiply by minus
        p_loss_mat = -p_loss_mat
        
        zero_column_matrix = np.zeros(p_loss_mat.shape[0]).reshape(-1,1)
        loss_matrix_with_zero_at_first_column = np.append(zero_column_matrix, p_loss_mat, axis=1)
        
        #find max in each row
        perceptron_loss_values = np.amax(loss_matrix_with_zero_at_first_column, axis=1).reshape(-1,1)
        
        return perceptron_loss_values

    def normal_loss(self, X, y):
        normal_loss_values = self.predict(X) - y
        return normal_loss_values

    def l1_loss(self, X, y):
        return abs(self.normal_loss(X,y))

    def l2_loss(self, X, y):
        return (self.predict(X) - y) ** 2

    def add_bias_in_features(self, X_for_bias):
        bias_X = np.ones((X_for_bias.shape[0],1))
        X_for_bias = np.append(bias_X, X_for_bias, axis=1)
        return X_for_bias
        
    def train(self, X_train, y_train, X_val, y_val):
        train_data = []
        if self.add_bias:
            X_train = self.add_bias_in_features(X_train)
            X_val = self.add_bias_in_features(X_val)
            bias_value = np.random.uniform(low=-.1,high=.1, size=1)
            print('Bias Value ', bias_value)
            bias_W = np.array([bias_value])
            self.weights = np.append(bias_W, self.weights, axis=0)
            
        for epoch_number in range(self.epoch):
            train_loss = None
            validation_loss = None
            train_accuracy = None
          
            if (self.loss_function_name == "zero_one_loss"):
                train_loss = self.zero_one_loss(X_train, y_train)
                validation_loss = self.zero_one_loss(X_val, y_val)  
                                                                         
            elif self.loss_function_name == "perceptron_loss":
                train_loss = self.perceptron_loss(X_train, y_train)
                validation_loss = self.perceptron_loss(X_val, y_val)

            elif self.loss_function_name == "l1_loss":
                train_loss = self.l1_loss(X_train, y_train)
                validation_loss = self.l1_loss(X_val, y_val)

            elif self.loss_function_name == "l2_loss":
                train_loss = self.l2_loss(X_train, y_train)
                validation_loss = self.l2_loss(X_val, y_val)

            else:
                raise "Loss function error " + self.loss_function_name
                
            yxlr = self.learning_rate * y_train * X_train * train_loss
            yxlr_sum = np.sum(yxlr, axis=0).reshape(-1,1)                
            self.weights = self.weights + yxlr_sum

            train_accuracy = self.accuracy(X_train, y_train)
            validation_accuracy = self.accuracy(X_val, y_val)
            train_loss_sum = np.sum(train_loss) / X_train.shape[0]
            validation_loss_sum = np.sum(validation_loss) / X_val.shape[0]

            # print('Epoch ', epoch_number, ', Val accuracy: ', validation_accuracy, ', Val loss: ', validation_loss_sum)
            self.epochs.append(epoch_number)
            self.train_losses.append(train_loss_sum)
            self.validation_losses.append(validation_loss_sum)
            self.train_accuracies.append(train_accuracy)
            self.validation_accuracies.append(validation_accuracy)
            train_data.append({
                'learning_rate': self.learning_rate, 
                'epoch': epoch_number, 
                'train_loss': train_loss_sum, 
                'val_loss': validation_loss_sum,
                'val_accuracy': validation_accuracy,
                'train_accuracy': train_accuracy})

        # print('Training Completed')
        self.train_data = train_data
        return train_data

    def accuracy(self, X_accuracy, y_accuracy):
        predictions_for_accuracy = self.predict(X_accuracy)
        accuracy_score_from_sk_learn = accuracy_score(y_accuracy, predictions_for_accuracy)
        return accuracy_score_from_sk_learn

    def split_train_validation_test(self, X, y, test_split_percentage):
        # reference to split (train/validation/test):
        #  https://datascience.stackexchange.com/questions/15135/train-test-validation-set-splitting-in-sklearn
        # print('test_split_percentage ', test_split_percentage)
        X_train, X_test, y_train, y_test = train_test_split(X, y, 
            test_size=test_split_percentage, 
            random_state=1, 
            shuffle=True,
            # stratify=y
            )
        # 0.25 * 0.8 = 0.2
        val_split_percentage = test_split_percentage / (1 - test_split_percentage)
        # print('val_split_percentage ', val_split_percentage)
        X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, 
            test_size=val_split_percentage, 
            random_state=1, 
            shuffle=True,
            # stratify=y_train
            )

        # print('Train: ', X_train.shape[0], ', Val: ', X_val.shape[0], ', Test: ', X_test.shape[0])
        return X_train, X_val, X_test, y_train, y_val, y_test

    def plot_line_graphs(self, x_values, y_values_list, y_labels, title):
        for y_values, y_label in zip(y_values_list, y_labels):
            plt.plot(x_values, y_values, label = y_label)

        plt.legend()
        plt.title(title)
        plt.show()

    def plot_loss_curves(self, title_suffix):        
        y_values_list = [self.train_losses, self.validation_losses]
        self.plot_line_graphs(self.epochs, y_values_list, self.train_validation_y_labels, 'Loss Curves: ' + title_suffix)

    def plot_accuracy_curves(self, title_suffix):
        y_values_list = [self.train_accuracies, self.validation_accuracies]
        self.plot_line_graphs(self.epochs, y_values_list, self.train_validation_y_labels, 'Accuracy Curves: ' + title_suffix)



In [22]:
# Test
# 1. Input Size, split sizes
split_sizes = [.1,.15,.2]
# 2. Weight Random seed
random_seeds = [0, 100, 200]
# 3. Weight pairs
weight_pairs = [(-1,1), (0,1)]
# 4. bias
bias_values = [False, True]
# 5. loss functions
loss_functions = ['zero_one_loss', 'perceptron_loss', 'l1_loss', 'l2_loss']
# 6. Learning rate
learning_rates = [1,0.1,0.001]
# 7. Epoch
epochs = [10,20,30]
default_file_path = 'diabetes_scale.txt'
def train_model(weight_pair, loss_function_name, learning_rate, epoch, split_size,
    bias_value):
    low, high = weight_pair
    np.random.seed(random_seed)
    weights = np.random.uniform(low=low, high = high, size=8).reshape(-1,1)
    perceptron = Perceptron( 
        file_path=default_file_path,
        weights=weights, 
        loss_function_name=loss_function_name, 
        learning_rate=learning_rate, 
        epoch=epoch,
        add_bias=bias_value)
    X, y = perceptron.get_features_labels_from_file_data()
    X_train, X_val, X_test, y_train, y_val, y_test = perceptron.split_train_validation_test(
        X, y, split_size)
    train_data = perceptron.train(X_train, y_train, X_val, y_val)
    # perceptron.plot_loss_curves(default_file_path)
    # perceptron.plot_accuracy_curves(default_file_path)
    # print('train data ', train_data)
    last_train_data = [td for td in train_data if td['epoch'] == epoch - 1][0]
    # print('lt ', last_train_data)
    return {
            'split_size': split_size,
            'random_seed': random_seed,
            'weight_pair': weight_pair,
            'bias_value': bias_value,
            'learning_rate': learning_rate,
            'epoch': epoch,
            'train_accuracy': last_train_data['train_accuracy'],
            'val_accuracy': last_train_data['val_accuracy'],
            'train_loss': last_train_data['train_loss'],
            'val_loss': last_train_data['val_loss'],
            'loss_function_name': loss_function_name
            }

counter = 0
results = []
for split_size in split_sizes:
    for random_seed in random_seeds:
        for weight_pair in weight_pairs:
            for bias_value in bias_values:
                for loss_function_name in loss_functions:
                    for learning_rate in learning_rates:
                        for epoch in epochs:
                            counter += 1
                            print('counter = ', counter)
                            # Experiment
                            # print('Experiment ',
                            #     ', split_size: ', split_size, 
                            #     ', random_seed: ', random_seed,
                            #     ', weight_pair: ', weight_pair,
                            #     ', bias_value: ', bias_value,
                            #     ', loss_function_name: ', loss_function_name,
                            #     ', learning_rate ', learning_rate,
                            #     ', epoch: ', epoch)
                            result = train_model(
                                weight_pair, 
                                loss_function_name, 
                                learning_rate, 
                                epoch, 
                                split_size,
                                bias_value)

                            results.append(result)


                        
                

counter =  1
counter =  2
counter =  3
counter =  4
counter =  5
counter =  6
counter =  7
counter =  8
counter =  9
counter =  10
counter =  11
counter =  12
counter =  13
counter =  14
counter =  15
counter =  16
counter =  17
counter =  18
counter =  19
counter =  20
counter =  21
counter =  22
counter =  23
counter =  24
counter =  25
counter =  26
counter =  27
counter =  28
counter =  29
counter =  30
counter =  31
counter =  32
counter =  33
counter =  34
counter =  35
counter =  36
counter =  37
Bias Value  [0.09273255]
counter =  38
Bias Value  [0.09273255]
counter =  39
Bias Value  [0.09273255]
counter =  40
Bias Value  [0.09273255]
counter =  41
Bias Value  [0.09273255]
counter =  42
Bias Value  [0.09273255]
counter =  43
Bias Value  [0.09273255]
counter =  44
Bias Value  [0.09273255]
counter =  45
Bias Value  [0.09273255]
counter =  46
Bias Value  [0.09273255]
counter =  47
Bias Value  [0.09273255]
counter =  48
Bias Value  [0.09273255]
counter =  49
Bias Value  [0.09273255

In [29]:
results_df = pd.DataFrame(results)
# top 3 records
redf = results_df.sort_values(by=["val_accuracy"], ascending=False)[0:3].T
redf

Unnamed: 0,727,178,287
split_size,0.15,0.1,0.1
random_seed,200,100,100
weight_pair,"(-1, 1)","(-1, 1)","(0, 1)"
bias_value,False,False,True
learning_rate,0.001,0.001,0.001
epoch,20,20,30
train_accuracy,0.731343,0.752443,0.758958
val_accuracy,0.844828,0.844156,0.844156
train_loss,0.264925,1.387622,1.009772
val_loss,0.163793,0.935065,1.350649


In [11]:
results_df.sort_values(by=["val_loss"], ascending=False)

Unnamed: 0,split_size,random_seed,weight_pair,bias_value,learning_rate,epoch,train_accuracy,val_accuracy,train_loss,val_loss
191,0.10,100,"(-1, 1)",True,1.000,30,0.641694,0.753247,7.669364e+85,9.047924e+85
47,0.10,0,"(-1, 1)",True,1.000,30,0.641694,0.753247,5.285713e+85,6.235814e+85
119,0.10,0,"(0, 1)",True,1.000,30,0.358306,0.246753,6.547466e+85,4.620707e+85
263,0.10,100,"(0, 1)",True,1.000,30,0.358306,0.246753,6.181096e+85,4.362151e+85
407,0.10,200,"(0, 1)",True,1.000,30,0.358306,0.246753,4.160777e+85,2.936362e+85
...,...,...,...,...,...,...,...,...,...,...
17,0.10,0,"(-1, 1)",False,0.001,30,0.682410,0.714286,4.964321e-02,3.486574e-02
701,0.15,100,"(0, 1)",True,0.001,30,0.358209,0.422414,4.460111e-02,3.476788e-02
556,0.15,0,"(0, 1)",True,0.001,20,0.559701,0.603448,4.582638e-02,3.147201e-02
412,0.10,200,"(0, 1)",True,0.001,20,0.579805,0.545455,3.401790e-02,3.122935e-02


In [None]:
# Experiments
# 1. Different split sizes
np.random.seed(0)
weights = np.random.rand(8,1)
for split_size in [.1,.15,.2]:
    perceptron = Perceptron( 
        file_path=default_file_path,
        weights=weights, 
        loss_function_name="zero_one_loss", 
        learning_rate=default_learning_rate, 
        epoch=default_epoch)
    X, y = perceptron.get_features_labels_from_file_data()
    X_train, X_val, X_test, y_train, y_val, y_test = perceptron.split_train_validation_test(X, y, split_size)
    train_data = perceptron.train(X_train, y_train, X_val, y_val)
    perceptron.plot_loss_curves(default_file_path)
    perceptron.plot_accuracy_curves(default_file_path)


In [None]:
# Result
# split of 80, 10, 10 is optimal
optimal_split_size = 0.1
optimal_split_size

In [None]:
# 2. Weights
# low,high pair
#  [(-1,1), (0,1), (-2,2),(-0.5,0.5),(0,0)]
# weights_pairs = [(-1,1), (0,1), (-2,2),(-0.5,0.5),(0,0)]
random_seeds = range(0,400,100)
for random_seed in random_seeds:
    np.random.seed(random_seed)
    # low, high = weight_pair
    # weights = np.random.uniform(low=low, high = high, size=8).reshape(-1,1)
    # weights = np.random.normal(size=8).reshape(-1,1)
    weights = np.random.rand(8,1)
    perceptron = Perceptron(
        file_path='diabetes_scale.txt',
        weights=weights, 
        loss_function_name="zero_one_loss", 
        learning_rate=default_learning_rate, 
        epoch=default_epoch)
    X, y = perceptron.get_features_labels_from_file_data()
    X_train, X_val, X_test, y_train, y_val, y_test = perceptron.split_train_validation_test(X, y, optimal_split_size)
    train_data = perceptron.train(X_train, y_train, X_val, y_val)
    title_suffix = 'Random seed: ' + str(random_seed)
    perceptron.plot_loss_curves(title_suffix)
    perceptron.plot_accuracy_curves(title_suffix)

In [None]:
# 3. Weights distribution
# low,high pair
#  [(-1,1), (0,1), (-2,2),(-0.5,0.5),(0,0)]
weights_pairs = [(-1,1), (0,1), (-2,2),(-0.5,0.5),(0,0)]
np.random.seed(0)
for weight_pair in weights_pairs:
    low, high = weight_pair
    weights = np.random.uniform(low=low, high = high, size=8).reshape(-1,1)
    perceptron = Perceptron(
        file_path=default_file_path,
        weights=weights, 
        loss_function_name="zero_one_loss", 
        learning_rate=default_learning_rate, 
        epoch=default_epoch)
    X, y = perceptron.get_features_labels_from_file_data()
    X_train, X_val, X_test, y_train, y_val, y_test = perceptron.split_train_validation_test(X, y, optimal_split_size)
    train_data = perceptron.train(X_train, y_train, X_val, y_val)
    title_suffix = 'Weight pair: ' + str(weight_pair)
    perceptron.plot_loss_curves(title_suffix)
    perceptron.plot_accuracy_curves(title_suffix)

In [None]:
np.random.seed(0)
optimal_weights = np.random.uniform(low=-0.5, high = 0.5, size=8).reshape(-1,1)
optimal_weights

In [None]:
# Experiments
# 4. Bias
np.random.seed(0)
optimal_weights = np.random.uniform(low=-0.5, high = 0.5, size=8).reshape(-1,1)
perceptron = Perceptron(
    file_path=default_file_path,
    weights=optimal_weights, 
    loss_function_name="zero_one_loss", 
    learning_rate=default_learning_rate, 
    epoch=default_epoch,
    add_bias=True)
X, y = perceptron.get_features_labels_from_file_data()
X_train, X_val, X_test, y_train, y_val, y_test = perceptron.split_train_validation_test(X, y, optimal_split_size)
train_data = perceptron.train(X_train, y_train, X_val, y_val)
perceptron.plot_loss_curves('Bias = True')
perceptron.plot_accuracy_curves('Bias = True')

In [None]:
# 5. Loss functions
loss_function_names = ["zero_one_loss", "perceptron_loss", "normal_absolute_loss", "mean_squared_error_loss"]

for loss_function_name in loss_function_names:
    np.random.seed(0)
    # optimal_weights = np.random.uniform(low=-0.5, high = 0.5, size=8).reshape(-1,1)
    optimal_weights = np.random.rand()
    perceptron = Perceptron(
        file_path=default_file_path,
        weights=optimal_weights, 
        loss_function_name=loss_function_name, 
        learning_rate=default_learning_rate, 
        epoch=default_epoch)
    X, y = perceptron.get_features_labels_from_file_data()
    X_train, X_val, X_test, y_train, y_val, y_test = perceptron.split_train_validation_test(X, y, optimal_split_size)
    train_data = perceptron.train(X_train, y_train, X_val, y_val)
    title_suffix = 'Loss Function name: ' + str(loss_function_name)
    perceptron.plot_loss_curves(title_suffix)
    perceptron.plot_accuracy_curves(title_suffix)

In [None]:
# 2. Learning Rate

for learning_rate in [1,0.1,0.01,0.001,0.0001]:
    np.random.seed(0)
    optimal_weights = np.random.uniform(low=-0.5, high = 0.5, size=8).reshape(-1,1)
    perceptron = Perceptron(
        file_path=default_file_path,
        weights=optimal_weights, 
        loss_function_name="zero_one_loss", 
        learning_rate=learning_rate, 
        epoch=default_epoch)
    X, y = perceptron.get_features_labels_from_file_data()
    X_train, X_val, X_test, y_train, y_val, y_test = perceptron.split_train_validation_test(X, y, optimal_split_size)
    train_data = perceptron.train(X_train, y_train, X_val, y_val)
    # test_accuracy = perceptron.accuracy(X_test, y_test)
    # print('Learning Rate = ', learning_rate, ' test_accuracy = ', test_accuracy)
    title_suffix = 'Learning Rate ' + str(learning_rate)
    perceptron.plot_loss_curves(title_suffix)
    perceptron.plot_accuracy_curves(title_suffix)

In [None]:
optimal_learning_rate = 0.1
optimal_learning_rate

In [None]:
# 3. Epoch

for epoch in range(10, 100, 10):
    np.random.seed(0)
    optimal_weights = np.random.uniform(low=-0.5, high = 0.5, size=8).reshape(-1,1)
    perceptron = Perceptron(
        file_path=default_file_path,
        weights=optimal_weights, 
        loss_function_name="zero_one_loss", 
        learning_rate=default_learning_rate, 
        epoch=epoch)
    X, y = perceptron.get_features_labels_from_file_data()
    X_train, X_val, X_test, y_train, y_val, y_test = perceptron.split_train_validation_test(X, y, optimal_split_size)
    train_data = perceptron.train(X_train, y_train, X_val, y_val)
    # test_accuracy = perceptron.accuracy(X_test, y_test)
    # print('Epoch = ', epoch, ' test_accuracy = ', test_accuracy)
    title_suffix = 'Epoch ' + str(epoch)
    perceptron.plot_loss_curves(title_suffix)
    perceptron.plot_accuracy_curves(title_suffix)

In [None]:
import numpy as np
x1 = np.arange(15).reshape(3,5)
y1 = np.array([1,-1,1]).reshape(3,1)
w1 = np.random.rand(5,1)
# print(x.shape, y.shape, w.shape)

# zero one loss
def zero_one_indicator_function(yi, xi, w):
    xi_dot_wi = np.dot(xi, w)
    print('xi_dot_wi value ', xi_dot_wi)
    yi_xi_w = yi * xi_dot_wi
    print('yi_xi_w value ', yi_xi_w)
    if yi_xi_w < 0:
        return 1
    else:
        return 0
    
def zero_one_loss(y, x, w):
    zero_one_losses = []
    for yi, xi in zip(y, x):
        print('yi value ', yi)
        print('xi value ', xi)
        print('w value ', w)
        zero_one = zero_one_indicator_function(yi, xi, w)
        zero_one_losses.append(zero_one)

    return zero_one_losses

loss = zero_one_loss(y1, x1, w1)
print(' loss values ', loss)


In [None]:
# Experiments
# 1. Different Files (Without scaling vs scaled)
np.random.seed(0)
# weights = np.random.uniform(low = -1, high = 1, size=8).reshape(-1,1)
# weights = np.random.normal(size=8).reshape(-1,1)
weights = np.random.rand(8,1)
for file_path in ['diabetes_scale.txt','diabetes.txt']:
    perceptron = Perceptron(
        file_path=file_path,
        weights=weights, 
        loss_function_name="zero_one_loss", 
        # loss_function_name="perceptron_loss",
        # loss_function_name="normal_loss",
        # loss_function_name="normal_absolute_loss",
        # loss_function_name="mean_squared_error_loss",
        learning_rate=0.01, 
        epoch=20)
    X, y = perceptron.get_features_labels_from_file_data()
    X_train, X_val, X_test, y_train, y_val, y_test = perceptron.split_train_validation_test(X, y)
    train_data = perceptron.train(X_train, y_train, X_val, y_val)
    test_accuracy = perceptron.accuracy(X_test, y_test)
    print('File = ', file_path, ' test_accuracy = ', test_accuracy)
    # train_data_pd = pd.DataFrame(train_data)
    # print(train_data_pd)
    perceptron.plot_loss_curves(file_path)
    perceptron.plot_accuracy_curves(file_path)
    

In [None]:
res = []


res.append({
    'lr': 1,
    'loss': 'loss_1',
    'val_acc': 5
})

res.append({
    'lr': 1,
    'loss': 'loss_2',
    'val_acc': 5
})

res.append({
    'lr': 1,
    'val_acc': 50
})

print(res)
resdf = pd.DataFrame(res)
resdf