# Perceptron

The dataset was obtained from https://archive.ics.uci.edu/ml/datasets/Iris.

In [3]:
import numpy as np
import pandas as pd

In [60]:
data = pd.read_csv('../datasets/pancan-rna-seq/data.csv')
data = data.iloc[:, 1:]
print(data.info())
data = data.values

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 801 entries, 0 to 800
Columns: 20531 entries, gene_0 to gene_20530
dtypes: float64(20531)
memory usage: 125.5 MB
None


In [66]:
labels = pd.read_csv('../datasets/pancan-rna-seq/labels.csv')
labels = labels.iloc[:, 1:]
print(labels.info())
labels = labels.values.flatten()
label2id = {
    'BRCA' : 1,
    'COAD' : 2,
    'KIRC' : 3,
    'LUAD' : 4,
    'PRAD' : 5
}
labels = np.array([label2id[label] for label in labels])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 801 entries, 0 to 800
Data columns (total 1 columns):
Class    801 non-null object
dtypes: object(1)
memory usage: 6.3+ KB
None


In [77]:
class Perceptron:
    def __init__(self, X, Y, N):
        self.N = N
        # size of weights array must be equal to the number of features
        self.weights = np.zeros([1, X.shape[1]])
        self.bias = 0
        # setting average parameters
        self.avg_weights = np.zeros([1, X.shape[1]])
        self.avg_bias = 0
        self.train(X,Y)

    def parameter_averaging(self):
        self.weights = self.avg_weights
        self.bias = self.avg_bias
        return

    def train(self, X, Y):
        c = 0
        for _ in range(self.N):
            # process each data point
            for i in range(X.shape[0]):
                Yi_hat = int(self.predict(X[i,:]))
                # check if incorrect
                if Y[i] != Yi_hat:
                    # update non-averaged parameters
                    self.weights += Y[i] * X[i,:]
                    self.bias += Y[i]
                    # update averaged parameters
                    self.avg_weights += c * Y[i] * X[i,:]
                    self.avg_bias += c * Y[i]
                c += 1
        self.avg_weights = self.weights - self.avg_weights*1./c
        self.avg_bias = self.bias - self.avg_bias*1./c  
        return

    def predict(self, X):
        # decision rule (X . wT + b) > 0 
        # from (true, false) -> (1, 0) -> (1, -1)
        return ((X.dot(self.weights.T) + self.bias > 0).astype(int) * 2.0 - 1).flatten()

    def calculate_accuracy(self, X_test, Y_test):
        Y_pred = self.predict(X_test)
        return np.sum(np.equal(Y_pred, Y_test), dtype=float) / len(Y_test)

In [78]:
ptron = Perceptron(data[:600], labels[:600], 10)
ptron.parameter_averaging()
print("Accuracy: ", ptron.calculate_accuracy(data[600:], labels[600:]))

Accuracy:  0.4079601990049751
