<a href="https://colab.research.google.com/github/praseedm/Learn-Deep-Learning/blob/main/Neural_Networks_From_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

src : Future Mojo YT [link here](https://www.youtube.com/watch?v=VS1mgwAS8EM)

https://www.nlpdemystified.org/course/neural-networks-1

## From Scratch

In [1]:
import numpy as np

In [2]:
samples = np.array([
  [1, 2, 3],
  [6, 7, 8],
  [7, 8, 9],
  [3, 4, 5],
  [4, 5, 6],
])

X = samples

targets = np.array([False, True, True, False, False])

In [3]:
uniques, indices = np.unique(targets, return_inverse=True)
print(f"Original targets: {targets}")
print(f"Unique values: {uniques}")
print(f"Target indices: {indices}")

Original targets: [False  True  True False False]
Unique values: [False  True]
Target indices: [0 1 1 0 0]


In [4]:
n_samples = targets.shape[0]
n_classes = len(uniques)
y = np.zeros((n_samples, n_classes))
print(y)

[[0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]]


In [5]:
y[np.arange(n_samples), indices] = 1
print(targets, '\n')
print("Our one-hot encoded targets:")
print(y)

[False  True  True False False] 

Our one-hot encoded targets:
[[1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]]


In [6]:
from sklearn.preprocessing import OneHotEncoder

In [15]:
targets.reshape(-1,1).shape

(5, 1)

In [10]:
one_hot_encoder = OneHotEncoder()
one_hotted_targets = one_hot_encoder.fit_transform(targets.reshape(-1,1))
print(one_hotted_targets)

  (0, 0)	1.0
  (1, 1)	1.0
  (2, 1)	1.0
  (3, 0)	1.0
  (4, 0)	1.0


In [12]:
one_hotted_targets.toarray()

array([[1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.]])

### Forward pass

In [13]:
# The shape of our dataset.
print(X.shape)

n_features = X.shape[1]

print(f'Dataset size: {n_samples}')
print(f'Number of features: {n_features}')

(5, 3)
Dataset size: 5
Number of features: 3


In [16]:
# The number of units in our hidden_layer.
n_hidden_units = 4

# random.seed is here to ensure we get the same results during this part of the demo.
# Comment the line out to get a different set of weights.
np.random.seed(10) 

Wh = np.random.uniform(low=-0.5, high=0.5, size=(n_features, n_hidden_units))
bh = np.zeros((1, n_hidden_units))

In [21]:
# The initial hidden weights. {follows Transposed format}
print(Wh)
print(Wh.shape)

[[ 0.27132064 -0.47924805  0.13364823  0.24880388]
 [-0.00149299 -0.27520335 -0.30193714  0.26053071]
 [-0.33088916 -0.41166019  0.18535982  0.45339335]]
(3, 4)


In [18]:
print(f"Input shape: {X.shape}")
print(f"Hidden weights shape: {Wh.shape}")
print(f"Hidden biases shape: {bh.shape}")

Input shape: (5, 3)
Hidden weights shape: (3, 4)
Hidden biases shape: (1, 4)


In [22]:
# The weights of the first hidden unit.
# The "reshape" call is just to make the result 
# display in column format.
print(Wh, '\n')
print("Weights of first hidden unit:")
print(Wh[:, 0].reshape(3, 1))

[[ 0.27132064 -0.47924805  0.13364823  0.24880388]
 [-0.00149299 -0.27520335 -0.30193714  0.26053071]
 [-0.33088916 -0.41166019  0.18535982  0.45339335]] 

Weights of first hidden unit:
[[ 0.27132064]
 [-0.00149299]
 [-0.33088916]]


In [23]:
h1 = np.dot(X, Wh) + bh
print(h1.shape)
print(h1)

(5, 4)
[[-0.72433282 -2.26463532  0.08585342  2.13004535]
 [-1.02964036 -8.09519327  0.17120801  6.94368505]
 [-1.09070187 -9.26130486  0.18827893  7.90641299]
 [-0.84645584 -4.5968585   0.11999526  4.05550123]
 [-0.90751735 -5.76297009  0.13706617  5.01822917]]


In [24]:
a1 = np.maximum(0, h1)
print(a1.shape, '\n')

print("Before ReLU (h1):")
print(h1, '\n')

print("After ReLU (a1):")
print(a1)

(5, 4) 

Before ReLU (h1):
[[-0.72433282 -2.26463532  0.08585342  2.13004535]
 [-1.02964036 -8.09519327  0.17120801  6.94368505]
 [-1.09070187 -9.26130486  0.18827893  7.90641299]
 [-0.84645584 -4.5968585   0.11999526  4.05550123]
 [-0.90751735 -5.76297009  0.13706617  5.01822917]] 

After ReLU (a1):
[[0.         0.         0.08585342 2.13004535]
 [0.         0.         0.17120801 6.94368505]
 [0.         0.         0.18827893 7.90641299]
 [0.         0.         0.11999526 4.05550123]
 [0.         0.         0.13706617 5.01822917]]


In [25]:
# OUTPUT Layer

# random.seed is here to ensure we get the same results during this part of the demo.
# Comment the line out to get a different set of weights.
np.random.seed(100) 

Wo = np.random.uniform(low=-0.5, high=0.5, size=(n_hidden_units, n_classes))
bo = np.zeros((1, n_classes))

In [27]:
print(Wo)

print(f"Hidden layer output shape: {a1.shape}")
print(f"Output weights shape: {Wo.shape}")
print(f"Output biases shape: {bo.shape}")

print(bo)

[[ 0.04340494 -0.22163061]
 [-0.07548241  0.34477613]
 [-0.49528114 -0.37843088]
 [ 0.17074908  0.32585276]]
Hidden layer output shape: (5, 4)
Output weights shape: (4, 2)
Output biases shape: (1, 2)
[[0. 0.]]


In [28]:
h2 = np.dot(a1, Wo) + bo
print(h2.shape)
print(h2)

(5, 2)
[[0.32118171 0.66159156]
 [1.10083177 2.19782851]
 [1.25676178 2.5050759 ]
 [0.63304174 1.27608634]
 [0.78897175 1.58333373]]


In [29]:
np.exp(0)

1.0

In [30]:
# We can now calculate all the numerators needed for the softmax calculation.
e_x = np.exp(h2 - np.max(h2, axis=1, keepdims=True))
print(e_x)

[[0.71147867 1.        ]
 [0.33387229 1.        ]
 [0.28698822 1.        ]
 [0.52568947 1.        ]
 [0.45186944 1.        ]]


In [31]:
y_hat = e_x / np.sum(e_x, axis=1, keepdims=True)
y_hat

array([[0.41570992, 0.58429008],
       [0.25030304, 0.74969696],
       [0.22299211, 0.77700789],
       [0.34455863, 0.65544137],
       [0.31123284, 0.68876716]])

In [33]:
# calculating categorical cross entropy
np.finfo(float).eps

2.220446049250313e-16

In [34]:
y_hat_clipped = np.clip(y_hat, np.finfo(float).eps, 1 - np.finfo(float).eps)
print(y_hat_clipped)

[[0.41570992 0.58429008]
 [0.25030304 0.74969696]
 [0.22299211 0.77700789]
 [0.34455863 0.65544137]
 [0.31123284 0.68876716]]


In [36]:
print(y)
print(y_hat)

[[1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]]
[[0.41570992 0.58429008]
 [0.25030304 0.74969696]
 [0.22299211 0.77700789]
 [0.34455863 0.65544137]
 [0.31123284 0.68876716]]


In [35]:
neg_logs = np.sum(y * -np.log(y_hat_clipped), axis=1)
neg_logs

array([0.87776756, 0.2880862 , 0.25230477, 1.06549102, 1.16721398])

In [37]:
cce_loss = np.mean(neg_logs)
print(f'The loss after this forward pass: {cce_loss}')

The loss after this forward pass: 0.7301727079439226


### Backward pass

## Using TF