# N-Dimensional Perceptron

This Jupyter Notebook implements an N-dimensional perceptron. More information can be found in the repository README.

## Setup

### Packages

In [None]:
!pip install pandas numpy matplotlib scikit-learn

### Importing Packages

In [253]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

## 1. Data Loading

In [254]:
DATA_DIR = 'data'
DATA_FILE = 'neo_v2.csv'
DATA_FILEPATH = f'{DATA_DIR}/{DATA_FILE}'

# for reproducibility
# RANDOM_STATE = 42

# np.random.seed(RANDOM_STATE)
np.random.seed(random.randint(-1000, 1000))

### Clean and Format the Data

In [255]:
data = pd.read_csv(DATA_FILEPATH, header=0)

data['hazardous'] = data['hazardous'].astype(int)

data = data.drop(columns = ['sentry_object', 'orbiting_body', 'name', 'id'])

In [256]:
data.head()

Unnamed: 0,est_diameter_min,est_diameter_max,relative_velocity,miss_distance,absolute_magnitude,hazardous
0,1.198271,2.679415,13569.249224,54839740.0,16.73,0
1,0.2658,0.594347,73588.726663,61438130.0,20.0,1
2,0.72203,1.614507,114258.692129,49798720.0,17.83,0
3,0.096506,0.215794,24764.303138,25434970.0,22.2,0
4,0.255009,0.570217,42737.733765,46275570.0,20.09,1


#### Labels

In [259]:
y = data.iloc[:,-1]

y = y.to_numpy()

print(y)

classes, amount = np.unique(y, return_counts=True)

for idx in range(len(classes)):
    print(f'Class: {classes[idx]}, Percent: {amount[idx] / len(y) * 100}')

[0 1 0 ... 0 0 0]
Class: 0, Percent: 90.26817561319301
Class: 1, Percent: 9.731824386806993


#### Features

In [260]:
scaler = MinMaxScaler(feature_range = (0,1))

X = scaler.fit_transform(data.iloc[:, :-1])

X

array([[3.16072151e-02, 3.16072151e-02, 5.64469972e-02, 7.33140811e-01,
        3.12891114e-01],
       [6.99859604e-03, 6.99859603e-03, 3.09921777e-01, 8.21364026e-01,
        4.49311640e-01],
       [1.90388437e-02, 1.90388437e-02, 4.81679530e-01, 6.65740213e-01,
        3.58781811e-01],
       ...,
       [8.27278640e-04, 8.27278640e-04, 3.10974339e-02, 3.79640314e-01,
        6.41218189e-01],
       [1.77130266e-04, 1.77130265e-04, 2.91383698e-01, 9.18352249e-01,
        7.74718398e-01],
       [1.03590911e-03, 1.03590911e-03, 1.13271142e-01, 7.99088943e-01,
        6.21193158e-01]])

### Split the Data

In [261]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

## 2. Initialize Perceptron

Before we can do that, we must create the perceptron class.

In [262]:
class Perceptron:
    def __init__(self, D, epochs, acc_threshold, activ, print_freq, learning_rate):
        self.D = D
        self.epochs = epochs
        self.acc_threshold = acc_threshold
        self.activ = activ
        self.print_freq = print_freq
        self.learning_rate = learning_rate
        
        # initialize weights and biases
        self.W = np.random.rand(self.D,)
        self.b = np.random.rand()

    def train(self, X, y):
        
        # for each epoch
        for e in range(epochs):
            errors = 0
            
            # for each datapoint
            for idx, x in enumerate(X):
                # make a prediction
                y_prime = self.predict(x)

                # update rule
                if y_prime != y[idx]:
                    errors += 1
                    
                    self.W = self.W + (y[idx] - y_prime) * x * self.learning_rate
                    self.b = self.b + (y[idx] - y_prime) * self.learning_rate

            # check accuracy
            N = len(y)
            correct = N - errors
            accuracy = correct / N
            if e % self.print_freq == 0:
                print(f'W: {self.W}, b: {self.b}')
                print(f'Achieved accuracy of {round(accuracy * 100, 2)}%.')
            if accuracy > acc_threshold:
                print(f'Surpassed threshold ({self.acc_threshold * 100}%). Stopping.')
    
    def predict(self, x):
        y_prime = np.dot(self.W, x) + self.b
        return self.activ(y_prime)

    def test(self, X, y):
        # for calculating metrics later
        P, N, TP, TN = 0, 0, 0, 0
        
        for idx, x in enumerate(X):
            is_positive = y[idx]
            if is_positive:
                P += 1
            else:
                N += 1
            
            y_prime = self.predict(x, self._sigmoid)

            if y_prime == y[idx]:
                if is_positive:
                    TP += 1
                else:
                    TN += 1

        print(f'Accuracy: {(TP + TN) / (P + N)}')

In [263]:
learning_rate = 0.001
epochs = 1000
acc_threshold = 0.95
D = X_train.shape[1]
print_freq = 5

def sigmoid(x):
    y = 1 / (1 + np.exp(-x))
    return 1 if y > 0.5 else 0

net = Perceptron(
    D=D, 
    epochs=epochs, 
    acc_threshold=acc_threshold, 
    activ=sigmoid, 
    print_freq=print_freq, 
    learning_rate=learning_rate
)

In [None]:
net.train(X_train, y_train)

W: [ 0.68812226  0.27511444 -0.00169467 -0.00419586  0.01909403], b: -0.018196745686998556
Achieved accuracy of 83.13%.
W: [ 0.44521106  0.03220323 -0.00189308 -0.00145165  0.00802828], b: -0.009196745686998548
Achieved accuracy of 84.76%.
W: [ 0.2421872  -0.17082063 -0.00132558 -0.0009426  -0.00379083], b: -0.0001967456869985465
Achieved accuracy of 84.95%.
W: [ 0.17907521 -0.23393262 -0.00028899 -0.00134811 -0.01161444], b: 0.004803254313001454
Achieved accuracy of 86.0%.
W: [ 0.16810588 -0.24490195 -0.00063312 -0.00153557 -0.0131242 ], b: 0.0058032543130014545
Achieved accuracy of 86.06%.
W: [ 1.63094676e-01 -2.49913151e-01 -1.15941512e-04 -4.20745945e-04
 -1.39466068e-02], b: 0.0068032543130014545
Achieved accuracy of 86.08%.
W: [ 1.61384937e-01 -2.51622891e-01  6.57161233e-05 -1.26225352e-03
 -1.45363857e-02], b: 0.0068032543130014545
Achieved accuracy of 86.09%.
W: [ 1.60890968e-01 -2.52116860e-01  8.30631735e-05 -1.53520387e-03
 -1.41283340e-02], b: 0.0068032543130014545
Achieve