# Linear Perceptron
By Oliver Blasko

# Loading data

Importing needed libraries.

In [408]:
import pandas as pd
import random

We're using the regular expression `\s+` for separator to indicate that separator is one or multiple spaces.

In [409]:
data = pd.read_csv("./spine_data_ascii.txt", sep="\s+", header=None)

Let's look at our data.

In [410]:
data.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,300,301,302,303,304,305,306,307,308,309
0,33.841641,45.252792,80.111572,70.399308,61.446597,56.535051,79.93857,53.936748,73.635962,84.585607,...,69.297008,87.679087,65.007964,68.832021,74.094731,77.409333,39.056951,95.480229,49.782121,63.027817
1,5.073991,8.693157,33.942432,13.469986,22.694968,14.377189,18.774071,20.721496,9.711318,30.361685,...,24.652878,20.365613,27.602608,22.218482,18.823727,29.396545,10.060991,46.550053,6.466805,22.552586
2,36.641233,41.583126,85.101608,61.2,46.170347,44.991547,63.311835,29.220534,63.0,65.479486,...,44.311238,93.822416,50.947519,50.092194,76.032156,63.232302,25.015378,59.0,53.0,39.609117
3,28.767649,36.559635,46.169139,56.929322,38.751628,42.157862,61.164499,33.215251,63.924644,54.223922,...,44.64413,67.313473,37.405357,46.613539,55.271004,48.012788,28.99596,48.930176,43.315316,40.475232
4,123.94524,118.54584,125.59362,102.33752,125.67072,101.72333,114.78711,114.36584,98.72793,108.01022,...,101.8685,120.94483,116.58111,105.98514,128.40573,118.45073,114.40543,96.683903,110.86478,98.672917
5,-0.199249,0.21475,100.29211,25.538429,-2.707879,25.773174,38.538741,-0.42101,26.975787,25.118478,...,11.211523,76.730629,7.015978,-3.530317,73.388216,93.563737,4.564259,77.283072,25.335647,-0.2544
6,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Our dataframe seems to be "upside down", we need to transpose it.

In [411]:
data = data.T

In [412]:
data.head(10)

Unnamed: 0,0,1,2,3,4,5,6
0,33.841641,5.073991,36.641233,28.767649,123.94524,-0.199249,0.0
1,45.252792,8.693157,41.583126,36.559635,118.54584,0.21475,0.0
2,80.111572,33.942432,85.101608,46.169139,125.59362,100.29211,1.0
3,70.399308,13.469986,61.2,56.929322,102.33752,25.538429,1.0
4,61.446597,22.694968,46.170347,38.751628,125.67072,-2.707879,0.0
5,56.535051,14.377189,44.991547,42.157862,101.72333,25.773174,1.0
6,79.93857,18.774071,63.311835,61.164499,114.78711,38.538741,1.0
7,53.936748,20.721496,29.220534,33.215251,114.36584,-0.42101,0.0
8,73.635962,9.711318,63.0,63.924644,98.72793,26.975787,1.0
9,84.585607,30.361685,65.479486,54.223922,108.01022,25.118478,1.0


In [413]:
data.shape

(310, 7)

As we can see we have 310 rows -- patients, and 7 columns -- 6 spine measurements and binary output variable where:
- 1 stands for patients with abnormal spines
- 0 stands for patients with normal spines

# Implementation

In [414]:
class LinearPerceptron:
    #constructor
    def __init__(self, df, input_vars, target_vars, bias, weight_initialization):
        #dataframe
        self.data = df
        #indexes of input variables
        self.input_vars = input_vars
        #index of target -- output variable
        self.target_vars = target_vars
        #dictionary of weights
        self.weights = {}
        self.bias = bias
        #type of initialization
        self.weight_initialization = weight_initialization
        #initialize weights
        self.init()
        #splits the data into training and testing set
        self.splitData()
        
    #initializes starting weights
    def init(self):
        if( self.weight_initialization == "default" ):
            for index in self.input_vars:
                self.weights[index] = 0
        elif( self.weight_initialization == "random"):
            for index in self.input_vars:
                self.weights[index] = random.uniform(-1, 1)
                
    #print current weights
    def printWeights(self):
        for key, value in self.weights.items():
            print( "w_" + str(key) + " = " + str(value))
    
    #returns output of the perceptron
    def predict(self, row):
        sum_ = 0
        #iterate over all input variables
        for col_index in self.input_vars:
            #sum = sum + x_i * w_i 
            sum_ += row[col_index]*self.weights[col_index]
        #sum = sum + bias(b)
        sum_ += self.bias
        #sign function
        return self.activate(sum_)
    
    #sign function, but returns 0 instead of -1 if less than 0
    def activate(self, x):
        if( x > 0 ):
            return 1;
        return 0;
    
    def updateWeights(self, row, error):
        #iterate over all weights
        for index in self.input_vars:
            #w_i = w_i + (t-y)*x_i
            self.weights[index] += error*row[index]
        
    def train(self):
        #iterate over every data_point
        for index, row in self.train_data.iterrows():
            #output of perceptron
            prediction = self.predict( row )
            #actual target value(t) - output of the perceptron(y)
            error = row[self.target_vars] - prediction
            #b = b + ( target value(t) - output of the perceptron(y) )
            self.bias += error
            #update weights by adding (t-y)*x
            self.updateWeights( row, error )
            
    #splits data into training and testing set
    def splitData(self):
        #index of 75%
        threshold = int(np.floor((self.data.shape[0]/100)*75))
        self.train_data = pd.DataFrame(self.data.iloc[0:threshold,:])
        self.test_data = pd.DataFrame(self.data.iloc[threshold:,:])
    
    #feeding the perceptron with testing data and computes accuracy
    #ACCURACY = (correct_prediction/total_predictions)*100
    def evaluate(self):
        correct_prediction = 0
        for index, row in self.test_data.iterrows():
               prediction = self.predict( row )
               if( prediction == row[self.target_vars] ):
                   correct_prediction+=1
        accuracy = (correct_prediction/self.test_data.shape[0])*100
        return accuracy

# Testing 

Let's test our LinearPerceptron class.

- Firstly LinearPerceptron with defualt( =0 ) init weights.

In [415]:
LP = LinearPerceptron(data, [0,1,2,3,4,5], 6, 0, "default")
LP.train()
print("Final Weights:")
LP.printWeights()

print()
print("Final bias is: " + str(LP.bias))
print()
print("Accuracy of the linear perceptron " + str(LP.evaluate()))

Final Weights:
w_0 = 102.28249800000003
w_1 = 218.88135369000003
w_2 = 31.274696999999996
w_3 = -116.59885199999998
w_4 = -7.6972439999999125
w_5 = 609.3911698000002

Final bias is: 2.0

Accuracy of the linear perceptron 69.23076923076923


- Secondly LinearPerceptron with random( range(-1,1) ) init weights.

In [416]:
LP2 = LinearPerceptron(data, [0,1,2,3,4,5], 6, 0, "random")
LP2.train()
print("Final Weights:")
LP2.printWeights()

print()
print("Final bias is: " + str(LP2.bias))
print()
print("Accuracy of the linear perceptron " + str(LP2.evaluate()))

Final Weights:
w_0 = 112.6340186458844
w_1 = 214.80493275709682
w_2 = -19.835330777032155
w_3 = -104.04755944628823
w_4 = -50.72803036343562
w_5 = 553.1241589976183

Final bias is: 2.0

Accuracy of the linear perceptron 71.7948717948718


# Conclusion

Our linear perceptrons aren't very accurate. We could probably improve our models by splitting the data into training and testing set randomly or training them on multiple epochs. Initializing initial weights randomly rather than default assignment to 0 improved our perceptron by ~2%. 