In [2]:
import numpy as np
import torch
import json

In [28]:
##########################
### DATASET
##########################

# Load data
with open('training_data.json', 'r') as f:
    data = json.load(f)

data = np.array(data)

X = data.astype(np.float32)
y = np.array([1] * 1500 + [0] * 1500, dtype=np.int64)


print('Class label counts:', np.bincount(y))
print('X.shape:', X.shape)
print('y.shape:', y.shape)

# Shuffling & train/test split
shuffle_idx = np.arange(y.shape[0])
shuffle_rng = np.random.default_rng(123)
shuffle_rng.shuffle(shuffle_idx)
X, y = X[shuffle_idx], y[shuffle_idx]

# 70/30 split
split = int(0.7 * y.shape[0])
X_train, X_test = X[shuffle_idx[:split]], X[shuffle_idx[split:]]
y_train, y_test = y[shuffle_idx[:split]], y[shuffle_idx[split:]]

# Normalize (mean zero, unit variance)
# mu, sigma = X_train.mean(axis=0), X_train.std(axis=0)
# X_train = (X_train - mu) / sigma
# X_test = (X_test - mu) / sigma

Class label counts: [1500 1500]
X.shape: (3000, 502)
y.shape: (3000,)


In [29]:
class Perceptron():
    def __init__(self, num_features):
        self.num_features = num_features
        self.weights = torch.zeros(num_features, 1, 
                                   dtype=torch.float32)
        self.bias = torch.zeros(1, dtype=torch.float32)
        
        # placeholder vectors so they don't
        # need to be recreated each time
        self.ones = torch.ones(1)
        self.zeros = torch.zeros(1)

    def forward(self, x):
        linear = torch.mm(x, self.weights) + self.bias
        predictions = torch.where(linear > 0., self.ones, self.zeros)
        return predictions
        
    def backward(self, x, y):  
        predictions = self.forward(x)
        errors = y - predictions
        return errors
        
    def train(self, x, y, epochs):
        for e in range(epochs):
            for i in range(y.shape[0]):
                # Reshape inputs to maintain proper dimensions
                errors = self.backward(x[i].reshape(1, self.num_features), y[i]).reshape(-1)
                self.weights += (errors * x[i]).reshape(self.num_features, 1)
                self.bias += errors

            acc = self.evaluate(x, y)
            print('Test set accuracy: %.2f%%' % (acc*100))
                            
    def evaluate(self, x, y):
        predictions = self.forward(x).reshape(-1)
        accuracy = torch.sum(predictions == y).float() / y.shape[0]
        return accuracy

In [30]:
ppn = Perceptron(num_features=X_train.shape[1])

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)

ppn.train(X_train_tensor, y_train_tensor, epochs=100)



Test set accuracy: 99.81%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Test set accuracy: 100.00%
Te

In [31]:
with open('model.json', 'w') as f:
    json.dump({'weights': [x[0] for x in ppn.weights.tolist()], 'bias': ppn.bias.item()}, f)

In [34]:
import random

print('Positive Case')
positive_case = X_test[y_test == 1][random.randint(0, 299)]

with open('positive_case.text', 'w') as f:
    for idx, num in enumerate(positive_case):
        print(idx, num)
        if idx == len(positive_case) - 1:
            f.write(str((int(num))))
        else:
            f.write(str((int(num))) + ',\n')


print('Negative Case')
negative_case = X_test[y_test == 0][random.randint(0, 299)]
print(negative_case)

with open('negative_case.text', 'w') as f:
    for idx, num in enumerate(negative_case):
        if idx == len(negative_case) - 1:
            f.write(int(num))
        else:
            f.write(str((int(num))) + ',\n')


Positive Case
0 951.0
1 951.0
2 890.0
3 879.0
4 863.0
5 861.0
6 775.0
7 721.0
8 691.0
9 688.0
10 638.0
11 620.0
12 638.0
13 656.0
14 574.0
15 579.0
16 592.0
17 564.0
18 555.0
19 502.0
20 563.0
21 528.0
22 534.0
23 482.0
24 438.0
25 481.0
26 467.0
27 474.0
28 456.0
29 453.0
30 439.0
31 442.0
32 435.0
33 398.0
34 427.0
35 448.0
36 450.0
37 438.0
38 443.0
39 432.0
40 436.0
41 418.0
42 468.0
43 400.0
44 403.0
45 432.0
46 370.0
47 407.0
48 439.0
49 435.0
50 458.0
51 455.0
52 535.0
53 563.0
54 576.0
55 561.0
56 592.0
57 585.0
58 598.0
59 592.0
60 624.0
61 698.0
62 752.0
63 758.0
64 725.0
65 823.0
66 809.0
67 806.0
68 818.0
69 865.0
70 870.0
71 867.0
72 878.0
73 880.0
74 987.0
75 946.0
76 874.0
77 899.0
78 903.0
79 898.0
80 902.0
81 907.0
82 895.0
83 903.0
84 912.0
85 900.0
86 935.0
87 933.0
88 912.0
89 988.0
90 913.0
91 917.0
92 909.0
93 947.0
94 802.0
95 775.0
96 755.0
97 727.0
98 754.0
99 738.0
100 743.0
101 755.0
102 704.0
103 630.0
104 559.0
105 587.0
106 559.0
107 535.0
108 507.0
109 45

TypeError: write() argument must be str, not int

## Evaluating the model

In [9]:
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

test_acc = ppn.evaluate(X_test_tensor, y_test_tensor)
print('Test set accuracy: %.2f%%' % (test_acc*100))

Test set accuracy: 100.00%
