Im using the Connectionist Bench (Sonar, Mines vs Rocks) Data set for this Perceptron classification. The data set classifies if the sonar is bounced off of a rock or metal cylinder. The data set has 208 data points, with each data point having 60 features which are the energy frequencies. The last column is the label 'R' for Rock and 'M' for metal.   

# Data Preprocessing

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('dataset1.csv', header=None)

In [3]:
df.shape

(208, 61)

In [4]:
df.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


In [5]:
df = df.rename(columns={ df.columns[60]: "label" })
df.head(1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,label
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R


In [6]:
df['label'] = df.label.map({'R':0, 'M':1})

In [7]:
df.head(1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,label
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,0


In [8]:
# lets shuffle the data.
from sklearn.utils import shuffle
df = shuffle(df)
df.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,label
14,0.0124,0.0433,0.0604,0.0449,0.0597,0.0355,0.0531,0.0343,0.1052,0.212,...,0.0083,0.0057,0.0174,0.0188,0.0054,0.0114,0.0196,0.0147,0.0062,0
45,0.0408,0.0653,0.0397,0.0604,0.0496,0.1817,0.1178,0.1024,0.0583,0.2176,...,0.0062,0.0129,0.0184,0.0069,0.0198,0.0199,0.0102,0.007,0.0055,0
98,0.1313,0.2339,0.3059,0.4264,0.401,0.1791,0.1853,0.0055,0.1929,0.2231,...,0.0362,0.021,0.0154,0.018,0.0013,0.0106,0.0127,0.0178,0.0231,1
156,0.0047,0.0059,0.008,0.0554,0.0883,0.1278,0.1674,0.1373,0.2922,0.3469,...,0.0095,0.0126,0.0069,0.0039,0.0068,0.006,0.0045,0.0002,0.0029,1
108,0.0599,0.0474,0.0498,0.0387,0.1026,0.0773,0.0853,0.0447,0.1094,0.0351,...,0.0013,0.0005,0.0227,0.0209,0.0081,0.0117,0.0114,0.0112,0.01,1


In [9]:
# get the features
features = df.loc[:, df.columns!='label']
features.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
14,0.0124,0.0433,0.0604,0.0449,0.0597,0.0355,0.0531,0.0343,0.1052,0.212,...,0.0078,0.0083,0.0057,0.0174,0.0188,0.0054,0.0114,0.0196,0.0147,0.0062
45,0.0408,0.0653,0.0397,0.0604,0.0496,0.1817,0.1178,0.1024,0.0583,0.2176,...,0.0066,0.0062,0.0129,0.0184,0.0069,0.0198,0.0199,0.0102,0.007,0.0055
98,0.1313,0.2339,0.3059,0.4264,0.401,0.1791,0.1853,0.0055,0.1929,0.2231,...,0.0156,0.0362,0.021,0.0154,0.018,0.0013,0.0106,0.0127,0.0178,0.0231
156,0.0047,0.0059,0.008,0.0554,0.0883,0.1278,0.1674,0.1373,0.2922,0.3469,...,0.0129,0.0095,0.0126,0.0069,0.0039,0.0068,0.006,0.0045,0.0002,0.0029
108,0.0599,0.0474,0.0498,0.0387,0.1026,0.0773,0.0853,0.0447,0.1094,0.0351,...,0.0028,0.0013,0.0005,0.0227,0.0209,0.0081,0.0117,0.0114,0.0112,0.01


In [10]:
from sklearn.model_selection import train_test_split

In [11]:
# split the train and test set 70: 30.  
x_train, x_test, y_train, y_test = train_test_split(features, df['label'], test_size=0.3, random_state=10)

In [12]:
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(145, 60) (63, 60) (145,) (63,)


In [13]:
x_train.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
202,0.0272,0.0378,0.0488,0.0848,0.1127,0.1103,0.1349,0.2337,0.3113,0.3997,...,0.0146,0.0091,0.0045,0.0043,0.0043,0.0098,0.0054,0.0051,0.0065,0.0103
207,0.026,0.0363,0.0136,0.0272,0.0214,0.0338,0.0655,0.14,0.1843,0.2354,...,0.0181,0.0146,0.0129,0.0047,0.0039,0.0061,0.004,0.0036,0.0061,0.0115
36,0.0094,0.0166,0.0398,0.0359,0.0681,0.0706,0.102,0.0893,0.0381,0.1328,...,0.0134,0.0141,0.0191,0.0145,0.0065,0.0129,0.0217,0.0087,0.0077,0.0122
9,0.0164,0.0173,0.0347,0.007,0.0187,0.0671,0.1056,0.0697,0.0962,0.0251,...,0.0118,0.009,0.0223,0.0179,0.0084,0.0068,0.0032,0.0035,0.0056,0.004
185,0.034,0.0625,0.0381,0.0257,0.0441,0.1027,0.1287,0.185,0.2647,0.4117,...,0.0329,0.0141,0.0019,0.0067,0.0099,0.0042,0.0057,0.0051,0.0033,0.0058


In [14]:
x_test.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
157,0.0201,0.0178,0.0274,0.0232,0.0724,0.0833,0.1232,0.1298,0.2085,0.272,...,0.0253,0.0131,0.0049,0.0104,0.0102,0.0092,0.0083,0.002,0.0048,0.0036
76,0.0239,0.0189,0.0466,0.044,0.0657,0.0742,0.138,0.1099,0.1384,0.1376,...,0.0155,0.0091,0.0151,0.008,0.0018,0.0078,0.0045,0.0026,0.0036,0.0024
69,0.0216,0.0215,0.0273,0.0139,0.0357,0.0785,0.0906,0.0908,0.1151,0.0973,...,0.0082,0.014,0.0044,0.0052,0.0073,0.0021,0.0047,0.0024,0.0009,0.0017
133,0.079,0.0707,0.0352,0.166,0.133,0.0226,0.0771,0.2678,0.5664,0.6609,...,0.0193,0.0298,0.039,0.0294,0.0175,0.0249,0.0141,0.0073,0.0025,0.0101
141,0.0707,0.1252,0.1447,0.1644,0.1693,0.0844,0.0715,0.0947,0.1583,0.1247,...,0.0291,0.0156,0.0197,0.0135,0.0127,0.0138,0.0133,0.0131,0.0154,0.0218


In [15]:
# testing some array and print functions
import numpy as np
arr = np.array(x_train)
print (arr.shape)
print (len(arr))
print (arr[0])
print (len(arr[0]))

# if it is a 2d matrix/dataframe then df.size will give every single item. df.shape[0] will give row count of 2d mat
# and df.shape[1] will give column count
# It is easier to use len(arr) than size or shape

(145, 60)
145
[ 0.0272  0.0378  0.0488  0.0848  0.1127  0.1103  0.1349  0.2337  0.3113
  0.3997  0.3941  0.3309  0.2926  0.176   0.1739  0.2043  0.2088  0.2678
  0.2434  0.1839  0.2802  0.6172  0.8015  0.8313  0.844   0.8494  0.9168
  1.      0.7896  0.5371  0.6472  0.6505  0.4959  0.2175  0.099   0.0434
  0.1708  0.1979  0.188   0.1108  0.1702  0.0585  0.0638  0.1391  0.0638
  0.0581  0.0641  0.1044  0.0732  0.0275  0.0146  0.0091  0.0045  0.0043
  0.0043  0.0098  0.0054  0.0051  0.0065  0.0103]
60


# Building a single Perceptron network

In [18]:
class Perceptron:
    
#     Pass in the weights, including the bias as weight[0]
    def __init__(self, weights):
        self.weights = weights
    
#     the activate function will return whether the inputs to the perceptron will trigger it or not
    def activate(self, datapoint):
        val = self.weights[0]
        for i in range(len(datapoint)):
            val+=self.weights[i+1]*datapoint[i]
        if val > 0.0:
            return 1.0
        else:
            return 0.0
    
#     the adjust function will adjust the weights of the perceptron based on the error delta
    def adjust(self, datapoint, lr, delta):
        self.weights[0]+=lr*delta 
        for i in range(len(datapoint)):
            self.weights[i+1]+=lr*delta*datapoint[i]
    
#     the train method takes in the training set and learns the weights by adjusting them 
    def train(self, trainingSet, labels, lr, repeat):
        for n in range(repeat):
            for i in range(len(trainingSet)):
                result = self.activate(trainingSet[i])
                delta = labels[i] - result
                if delta != 0.0:
                    self.adjust(trainingSet[i], lr, delta)
    
#     the predict method will use the learned weights by calling activate and getting the result. We will
#     compare the result to actual labels and get the count of how many were correctly classified.
    def predict(self, testSet, labels):
        accuracy = 0.0
        for i in range(len(testSet)):
            predicted = self.activate(testSet[i])
            if predicted == labels[i]:
                accuracy+=1
        return accuracy/len(testSet) * 100.0

    
def main():
    trainingSet = np.array(x_train)
    trainingLabels = np.array(y_train)
    testSet = np.array(x_test)
    testLabels = np.array(y_test)
    n = len(trainingSet[0]) + 1  #column size plus 1 to include bias
    lr = 0.1  #learning rate
    repeat = 1000
        
#     lets set all the weights to 0 initially.
    weights = [0.0 for i in range(n)]
    perceptron = Perceptron(weights)
    
#     train the perceptron with training set
    perceptron.train(trainingSet, trainingLabels, lr, repeat)
        
#     predict on test set
    accuracy = perceptron.predict(testSet, testLabels)
        
    print("accuracy is ",accuracy,"%")
    print("The learned weights are:")
    print(perceptron.weights)

if __name__== "__main__":
    main()

accuracy is  84.12698412698413 %
The learned weights are:
[-2.600000000000001, 2.0580600000001867, -2.1310300000000546, -4.281050000000274, 8.2852100000007471, 4.8192799999997815, 5.7146099999998761, 0.44589999999997187, -4.5473400000002799, -1.7075499999999233, 3.0176500000002404, 5.5368600000004946, 3.5965700000001628, -2.1885999999998531, -1.8140600000001412, 1.7637800000002646, -3.30966999999958, 0.89961999999999098, -0.56029999999992175, 0.91790999999998035, -0.26923999999996329, 1.9244900000001217, -3.5329899999999697, 3.8311799999996707, 2.0599399999999628, -2.8102300000000624, 0.44228000000000045, 1.6663300000000396, -1.5786099999999559, 0.20473000000001718, 2.598220000000135, -6.1611899999997304, 3.5974200000000778, -1.5315700000000319, 0.53638999999994497, -0.92152000000005863, -0.19559999999998873, -3.3846700000004133, 2.1163499999999829, 2.7286500000000538, -5.4172299999995195, -0.33411000000001712, 0.97855000000002013, -0.29698000000003888, 3.5542999999998184, 0.6693799999

# Results

The Perceptron classified the test set with 84% accuracy. This can be improved with K-fold cross validation.