-
Notifications
You must be signed in to change notification settings - Fork 0
/
Neural-Network-Scratch-HAM-SPAM-(V1).py
208 lines (162 loc) · 7.19 KB
/
Neural-Network-Scratch-HAM-SPAM-(V1).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
import numpy as np
class MyClassifier:
def one_hot_encoding(self):
"""
Converts the true labels to one-hot encoded form.
Args:
Y (numpy.ndarray): True labels.
Returns:
numpy.ndarray: One-hot encoded labels.
"""
#creates a matrix of zeros initially for all the data
Y = self.Y
matrix = np.zeros((Y.size, 2))
# Iterates over the indices and assign 1 to the corresponding element in each row
for i, y in enumerate(Y):
matrix[i, y] = 1
# Return the transposed matrix
return matrix.T
def __init__(self, X, alpha, iterations, Y):
"""
initialises a neural network with the specified number of features, hidden size, and output size.
Args:
total_features (int): total number of features in the input data.
hidden_size (int): Num of hidden layers
output_size (int): Num of neurons in the output layer.
"""
self.X = np.loadtxt(open("data/testing_spam.csv"), delimiter=",").astype(int)
# self.X = pd.read_csv(X) # we are reading the entire contents via pandas.
self.alpha = alpha
self.iterations = iterations
self.dim = np.shape(self.X)
self.Y = self.X[:, 0]
self.one_hot = self.one_hot_encoding()
self.gradient_decent()
def initalise_parameters(self):
# takes in dimension of dataset X and 2 output neurons
weights1 = np.random.randn(2, 55)
# 2 neurons in the hidden layer and 2 output neurons
weights2 = np.random.randn(2, 2)
# single col vector, but each of the two neurons need a unique val
bias1 = np.random.randn(2, 1)
bias2 = np.random.rand(2, 1)
# generates a random for the dims specified.
# number between the specific ranges of -0.5 and 0/5
# this ensures symmetry.
return weights1, weights2, bias1, bias2
def rectified_linear_activation_funct(self, Z1):
"""
applies the Rectified Linear Unit (ReLU) activation function element-wise to the input Z.
Args:
Z (numpy.ndarray): Input to the activation function
Returns:
numpy.ndarray: Result of applying the ReLU activation function to Z
"""
return np.maximum(Z1, 0)
def softmax_activation_funct(self, Z2):
"""
applies the softmax activation function to the input Z
Args:
Z (numpy.ndarray): Input to the activation function
Returns:
numpy.ndarray: Result of applying the softmax activation function to Z
"""
exp_Z = np.exp(Z)
# return the exponential of the input / the total sum of the exponential input.
return np.exp(Z2) / sum(np.exp(Z2))
def forward_propogation(self, weights1, weight2, bias1, bias2, X):
"""
performs forward propagation through the neural network
args:
X (numpy.ndarray): Input data
returns:
tuple: Tuple containing the intermediate results of forward propagation
(Z1, A1, Z2, A2)
"""
Z1 = np.dot(weights1, X.T) + bias1
layer1_out = self.rectified_linear_activation_funct(Z1)
# weights * output for activation function + bias
Z2 = np.dot(weight2, layer1_out) + bias2
layer2_out = self.softmax_activation_funct(Z2)
return Z1, Z2, layer1_out, layer2_out
def ReLU_deriv(self, Z):
"""
computes the derivative of the ReLU activation function with respect to the input Z
Args:
z (numpy.ndarray): Input to the activation function
Returns:
numpy.ndarray: Derivative of the ReLU activation function with respect to Z
"""
return Z > 0
def backward_propogation(self, Z1, Z2, layer1_out, layer2_out, weights2):
"""
Performs backward propagation to compute the gradients of the parameters.
Args:
X (numpy.ndarray): Input data.
Y (numpy.ndarray): True labels.
Z1 (numpy.ndarray): output of forward propagation (hidden layer pre-activation)
A1 (numpy.ndarray): output result of forward propagation (hidden layer after activation)
Z2 (numpy.ndarray): result of forward propagation (output layer pre-activation)
A2 (numpy.ndarray): result of forward propagation (output layer after activation) final
Returns:
tuple: Tuple containing the gradients of the parameters:
(dW1, db1, dW2, db2)
"""
one_hot_format = self.one_hot
m, n = self.dim
# layer 2 first as we are calculating in reverse
diffZ2 = layer2_out - one_hot_format
dW2 = 1 / m * diffZ2.dot(layer1_out.T)
db2 = 1 / m * np.sum(diffZ2)
diffZ1 = weights2.T.dot(diffZ2) * self.ReLU_deriv(Z1)
dW1 = 1 / m * diffZ1.dot(self.X)
db1 = 1 / m * np.sum(diffZ1)
return dW2, db2, dW1, db1
def update_params(self, W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
"""
Updates the weights and biases using gradient descent.
"""
W1 = W1 - alpha * dW1
b1 = b1 - alpha * db1
W2 = W2 - alpha * dW2
b2 = b2 - alpha * db2
return W1, b1, W2, b2
def get_predictions(self, A2):
"""
Returns the predictions based on the output of the neural network.
Args:
A2 (ndarray): Output of the second layer after activation.
Returns:
ndarray: Predicted class labels.
"""
return np.argmax(A2, 0)
# find the largest value in each col in matrix A2, (this will be the prediction, the highest probability it is in this class)
def get_accuracy(self, predictions, Y):
"""
Calculates the accuracy of the predictions.
Args:
predictions (ndarray): Predicted class labels.
Y (ndarray): True class labels.
Returns:
float: Accuracy of the predictions.
"""
print(predictions, "\n", Y)
return np.sum(predictions == Y) / Y.size
def gradient_decent(self):
"""
Performs gradient descent to train the neural network.
"""
# note to self (if you assign the weights and biases as self, then you wouldn't need to return them)
weights1, weights2, bias1, bias2 = self.initalise_parameters()
for i in range(self.iterations):
Z1, Z2, layer1_out, layer2_out = self.forward_propogation(weights1, weights2, bias1, bias2, self.X)
# now perform forward and backward propagation.
dW2, db2, dW1, db1 = self.backward_propogation(Z1, Z2, layer1_out, layer2_out, weights2)
weights1, bias1, weight2, bias2 = self.update_params(weights1, bias1, weights2, bias2, dW1, db1, dW2, db2,
self.alpha)
if i % 10 == 0:
print("Iteration: ", i)
predictions = self.get_predictions(layer2_out)
print(self.get_accuracy(predictions, self.Y))
return weights1, bias1, weights2, bias2
s = MyClassifier("data/training_spam.csv", 0.1, 100, "data/testing_spam.csv")