Im using the Connectionist Bench (Sonar, Mines vs Rocks) Data set for this Perceptron classification. The data set classifies if the sonar is bounced off of a rock or metal cylinder. The data set has 208 data points, with each data point having 60 features which are the energy frequencies. The last column is the label 'R' for Rock and 'M' for metal.   

# Data Preprocessing

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('dataset1.csv', header=None)

In [3]:
df.shape

(208, 61)

In [4]:
df.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


In [5]:
df = df.rename(columns={ df.columns[60]: "label" })
df.head(1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,label
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R


In [6]:
df['label'] = df.label.map({'R':0, 'M':1})

In [7]:
df.head(1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,label
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,0


In [8]:
# lets shuffle the data.
from sklearn.utils import shuffle
df = shuffle(df)
df.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,label
130,0.0443,0.0446,0.0235,0.1008,0.2252,0.2611,0.2061,0.1668,0.1801,0.3083,...,0.0274,0.0205,0.0141,0.0185,0.0055,0.0045,0.0115,0.0152,0.01,1
149,0.0207,0.0535,0.0334,0.0818,0.074,0.0324,0.0918,0.107,0.1553,0.1234,...,0.0033,0.005,0.019,0.0103,0.0121,0.0042,0.009,0.007,0.0099,1
64,0.0071,0.0103,0.0135,0.0494,0.0253,0.0806,0.0701,0.0738,0.0117,0.0898,...,0.0043,0.0048,0.0076,0.0124,0.0105,0.0054,0.0032,0.0073,0.0063,0
172,0.018,0.0444,0.0476,0.0698,0.1615,0.0887,0.0596,0.1071,0.3175,0.2918,...,0.0122,0.0114,0.0098,0.0027,0.0025,0.0026,0.005,0.0073,0.0022,1
101,0.0335,0.0134,0.0696,0.118,0.0348,0.118,0.1948,0.1607,0.3036,0.4372,...,0.0244,0.0232,0.0093,0.0159,0.0193,0.0032,0.0377,0.0126,0.0156,1


In [9]:
# get the features
features = df.loc[:, df.columns!='label']
features.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
130,0.0443,0.0446,0.0235,0.1008,0.2252,0.2611,0.2061,0.1668,0.1801,0.3083,...,0.0248,0.0274,0.0205,0.0141,0.0185,0.0055,0.0045,0.0115,0.0152,0.01
149,0.0207,0.0535,0.0334,0.0818,0.074,0.0324,0.0918,0.107,0.1553,0.1234,...,0.0171,0.0033,0.005,0.019,0.0103,0.0121,0.0042,0.009,0.007,0.0099
64,0.0071,0.0103,0.0135,0.0494,0.0253,0.0806,0.0701,0.0738,0.0117,0.0898,...,0.0252,0.0043,0.0048,0.0076,0.0124,0.0105,0.0054,0.0032,0.0073,0.0063
172,0.018,0.0444,0.0476,0.0698,0.1615,0.0887,0.0596,0.1071,0.3175,0.2918,...,0.0122,0.0122,0.0114,0.0098,0.0027,0.0025,0.0026,0.005,0.0073,0.0022
101,0.0335,0.0134,0.0696,0.118,0.0348,0.118,0.1948,0.1607,0.3036,0.4372,...,0.0302,0.0244,0.0232,0.0093,0.0159,0.0193,0.0032,0.0377,0.0126,0.0156


In [10]:
from sklearn.model_selection import train_test_split

In [11]:
# split the train and test set 70: 30.  
x_train, x_test, y_train, y_test = train_test_split(features, df['label'], test_size=0.3, random_state=10)

In [12]:
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(145, 60) (63, 60) (145,) (63,)


In [13]:
x_train.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
14,0.0124,0.0433,0.0604,0.0449,0.0597,0.0355,0.0531,0.0343,0.1052,0.212,...,0.0078,0.0083,0.0057,0.0174,0.0188,0.0054,0.0114,0.0196,0.0147,0.0062
131,0.115,0.1163,0.0866,0.0358,0.0232,0.1267,0.2417,0.2661,0.4346,0.5378,...,0.0228,0.0099,0.0065,0.0085,0.0166,0.011,0.019,0.0141,0.0068,0.0086
160,0.0258,0.0433,0.0547,0.0681,0.0784,0.125,0.1296,0.1729,0.2794,0.2954,...,0.0121,0.0091,0.0062,0.0019,0.0045,0.0079,0.0031,0.0063,0.0048,0.005
196,0.005,0.0017,0.027,0.045,0.0958,0.083,0.0879,0.122,0.1977,0.2282,...,0.0281,0.0165,0.0056,0.001,0.0027,0.0062,0.0024,0.0063,0.0017,0.0028
182,0.0095,0.0308,0.0539,0.0411,0.0613,0.1039,0.1016,0.1394,0.2592,0.3745,...,0.0357,0.0181,0.0019,0.0102,0.0133,0.004,0.0042,0.003,0.0031,0.0033


In [14]:
x_test.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
105,0.0116,0.0179,0.0449,0.1096,0.1913,0.0924,0.0761,0.1092,0.0757,0.1006,...,0.0031,0.0163,0.0099,0.0084,0.027,0.0277,0.0097,0.0054,0.0148,0.0092
176,0.0635,0.0709,0.0453,0.0333,0.0185,0.126,0.1015,0.1918,0.3362,0.39,...,0.0154,0.0048,0.0025,0.0087,0.0072,0.0095,0.0086,0.0085,0.004,0.0051
77,0.0336,0.0294,0.0476,0.0539,0.0794,0.0804,0.1136,0.1228,0.1235,0.0842,...,0.0033,0.015,0.0111,0.0032,0.0035,0.0169,0.0137,0.0015,0.0069,0.0051
126,0.0715,0.0849,0.0587,0.0218,0.0862,0.1801,0.1916,0.1896,0.296,0.4186,...,0.0216,0.0153,0.0121,0.0096,0.0196,0.0042,0.0066,0.0099,0.0083,0.0124
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0033,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078


In [15]:
# testing some array and print functions
import numpy as np
arr = np.array(x_train)
print (arr.shape)
print (len(arr))
print (arr[0])
print (len(arr[0]))

# if it is a 2d matrix/dataframe then df.size will give every single item. df.shape[0] will give row count of 2d mat
# and df.shape[1] will give column count
# It is easier to use len(arr) than size or shape

(145, 60)
145
[ 0.0124  0.0433  0.0604  0.0449  0.0597  0.0355  0.0531  0.0343  0.1052
  0.212   0.164   0.1901  0.3026  0.2019  0.0592  0.239   0.3657  0.3809
  0.5929  0.6299  0.5801  0.4574  0.4449  0.3691  0.6446  0.894   0.8978
  0.498   0.3333  0.235   0.1553  0.3666  0.434   0.3082  0.3024  0.4109
  0.5501  0.4129  0.5499  0.5018  0.3132  0.2802  0.2351  0.2298  0.1155
  0.0724  0.0621  0.0318  0.045   0.0167  0.0078  0.0083  0.0057  0.0174
  0.0188  0.0054  0.0114  0.0196  0.0147  0.0062]
60


# Building a single Perceptron network

In [26]:
class Perceptron:
    
    # Pass in the weights, including the bias as weight[0]
    def __init__(self, weights):
        self.weights = weights
    
    # the activate function will return whether the inputs to the perceptron will trigger it or not
    def activate(self, datapoint):
        val = self.weights[0]
        for i in range(len(datapoint)):
            val+=self.weights[i+1]*datapoint[i]
        if val > 0.0:
            return 1.0
        else:
            return 0.0
    
    # the adjust function will adjust the weights of the perceptron based on the error delta
    def adjust(self, datapoint, lr, delta):
        self.weights[0]+=lr*delta 
        for i in range(len(datapoint)):
            self.weights[i+1]+=lr*delta*datapoint[i]
    
    # the train method takes in the training set and learns the weights by adjusting them 
    def train(self, trainingSet, labels, lr, repeat):
        for n in range(repeat):
            for i in range(len(trainingSet)):
                result = self.activate(trainingSet[i])
                delta = labels[i] - result
                if delta != 0.0:
                    self.adjust(trainingSet[i], lr, delta)
    
    # the predict method will use the learned weights by calling activate and getting the result. We will
    # compare the result to actual labels and get the count of how many were correctly classified.
    def predict(self, testSet, labels):
        accuracy = 0.0
        for i in range(len(testSet)):
            predicted = self.activate(testSet[i])
            if predicted == labels[i]:
                accuracy+=1
        return accuracy/len(testSet) * 100.0

    
def main():
    trainingSet = np.array(x_train)
    trainingLabels = np.array(y_train)
    testSet = np.array(x_test)
    testLabels = np.array(y_test)
    n = len(trainingSet[0]) + 1  #column size plus 1 to include bias
    lr = 0.1  #learning rate
    repeat = 1000 
        
    #lets set all the weights to 0 initially.
    weights = [0.0 for i in range(n)]
    perceptron = Perceptron(weights)
    
    # train the perceptron with training set
    perceptron.train(trainingSet, trainingLabels, lr, repeat)
        
    # predict on test set
    accuracy = perceptron.predict(testSet, testLabels)
        
    print("accuracy is ",accuracy,"%")
    print("The learned weights are:")
    print(perceptron.weights)

if __name__== "__main__":
    main()

accuracy is  77.77777777777779 %
The learned weights are:
[-1.9000000000000004, 2.6564300000001326, -3.1250599999998232, -6.0876399999996353, 4.6949800000000188, 6.2753900000001668, -0.02166999999998994, -0.55785999999998548, -6.278890000000044, 3.9094799999999998, 1.2729200000003202, 5.2481700000000195, 2.1173899999999799, -0.73227999999993143, 2.1724199999999225, -0.1938500000000149, 0.19540999999999079, -3.5446200000000547, -0.15877999999999912, -0.97594999999999621, 0.91318000000000166, 1.2993400000001012, 1.0014600000000478, 1.0981800000001076, -0.50255000000001615, -1.4317599999999688, -0.385539999999991, 0.52510000000003931, -1.4926299999999406, 1.9016000000000142, 1.0156800000000263, -3.983519999999761, 3.9178500000005734, -2.9051800000002763, -3.5913300000000725, 7.1981199999998937, -6.1522600000000081, 1.6026500000000214, 0.44486999999999022, -0.29701000000000799, -6.7654899999993949, 4.2669899999997156, -0.9348299999999955, 2.7750400000000242, 3.1017399999999578, -0.69887000

# Results

The Perceptron classified the test set with 77% accuracy. This can be improved with K-fold cross validation.