In [1]:
from tensorflow.keras.datasets import mnist
import numpy as np

# Load the dataset
(X_train_raw, y_train_raw), (X_test_raw, y_test_raw) = mnist.load_data()

# Normalize the data to the range [0, 1]
X_train_raw = X_train_raw.astype('float32') / 255
X_test_raw = X_test_raw.astype('float32') / 255

print(f"Training data shape: {X_train_raw.shape}")
print(f"Test data shape: {X_test_raw.shape}")

Training data shape: (60000, 28, 28)
Test data shape: (10000, 28, 28)


In [2]:
X_train = X_train_raw.reshape(X_train_raw.shape[0],-1).T
X_test = X_test_raw.reshape(X_test_raw.shape[0],-1).T

In [3]:
def softmax(array):
    # Subtract the max value to prevent overflow during exponentiation
    exp_array = np.exp(array - np.max(array, axis=-1, keepdims=True))
    return exp_array / np.sum(exp_array, axis=-1, keepdims=True)

class AnnStructure:
    def __init__(self,data,hidden_sizes,output_size):
        
        self.data = data
        #data must be stored such that len(data) == input_size
        self.input_size = self.data.shape[0]
        #array of size for each hidden layer
        self.hidden_sizes = hidden_sizes
        #size of the output layer
        self.output_size = output_size
        
        #definition of the weights and biases
        self.W = []
        self.b = []

        # Weights and biases for input to first hidden layer
        self.W.append(np.random.rand(hidden_sizes[0],self.input_size))
        self.b.append(np.zeros((hidden_sizes[0],1)))

        # Weights and biases for connections between hidden layers
        for i in range(1, len(hidden_sizes)):
            self.W.append(np.random.rand(hidden_sizes[i],hidden_sizes[i-1]))
            self.b.append(np.zeros((hidden_sizes[i],1)))

        # Weights and biases for last hidden layer to output layer
        self.W.append(np.random.rand(output_size,hidden_sizes[-1]))
        self.b.append(np.zeros((output_size,1)))
        
        #transform to numpy
        #self.W = np.array(self.W,dtype=object)
        #self.b = np.array(self.b,dtype=object)
        
        #definition of the output of each layer
        self.Z = []
        
    def feedforward(self):
        
        print(f"W[0] = {self.W[0]} \n\n data = {self.data} \n")
        self.Z.append(np.maximum(0,np.dot(self.W[0],self.data)))
        
        for i in range(1,len(self.W)-1):
            self.Z.append(np.maximum(0,np.dot(self.W[i],self.Z[i-1])))
        
        self.Z.append(softmax(np.dot(self.W[-1],self.Z[-2])))
        
        print(f"Z_output is: {self.Z[-1]} \nZ_output shape is: {self.Z[-1].shape}")
            
# Example usage
data = np.array([[0.5, 0.3, 0.25, 0.11, 0.72]])  # Sample data with 5 features
data = data
print(data)
print(len(data))
hidden_sizes = [3, 2, 3]  # three hidden layers with 3, 2 and 3 neurons respectively
output_size = 4  # Output layer with 4 neurons

trial_ann = AnnStructure(data, hidden_sizes, output_size)

# Printing the shapes of the weights and biases for verification
for i in range(len(trial_ann.W)):
    print(f"Weights {i}: {trial_ann.W[i].shape}")
    print(f"Biases {i}: {trial_ann.b[i].shape}")

    
print(trial_ann.W)
        
        

[[0.5  0.3  0.25 0.11 0.72]]
1
Weights 0: (3, 1)
Biases 0: (3, 1)
Weights 1: (2, 3)
Biases 1: (2, 1)
Weights 2: (3, 2)
Biases 2: (3, 1)
Weights 3: (4, 3)
Biases 3: (4, 1)
[array([[0.49887998],
       [0.13534154],
       [0.77950362]]), array([[0.98319766, 0.72767452, 0.66574771],
       [0.78758119, 0.30921274, 0.96183345]]), array([[0.05273694, 0.24956142],
       [0.9352311 , 0.77955523],
       [0.55368971, 0.84272271]]), array([[0.58965514, 0.93479104, 0.07933925],
       [0.99754637, 0.27942419, 0.95517746],
       [0.90631309, 0.68675105, 0.83638184],
       [0.73677206, 0.61592439, 0.65713019]])]


In [4]:
#print(data)
print(X_train[:,0])
print(X_train.shape) #(W * X_train = 10x784 * 784x60000 --> 10*60000 --> 10x10 * 10*60000


[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         

In [5]:
mnist_ann = AnnStructure(X_train,[10,10],10)

for i in range(len(mnist_ann.W)):
    print(f"Weights {i}: {mnist_ann.W[i].shape}")
    print(f"Biases {i}: {mnist_ann.b[i].shape}")

Weights 0: (10, 784)
Biases 0: (10, 1)
Weights 1: (10, 10)
Biases 1: (10, 1)
Weights 2: (10, 10)
Biases 2: (10, 1)


In [6]:
mnist_ann.feedforward()

W[0] = [[0.23606842 0.37974753 0.53434879 ... 0.1454279  0.12504177 0.21201753]
 [0.96156235 0.80288097 0.08521734 ... 0.59593835 0.03996532 0.96378567]
 [0.58567659 0.82249459 0.87513819 ... 0.62993649 0.82311927 0.38837362]
 ...
 [0.86506744 0.37448257 0.32614976 ... 0.95870243 0.16355594 0.80607039]
 [0.5089337  0.08943821 0.37241669 ... 0.73014806 0.80910249 0.38313377]
 [0.31443402 0.96355979 0.62078417 ... 0.54840566 0.75731346 0.97526925]] 

 data = [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]] 

Z_output is: [[2.44778750e-223 5.64590413e-208 9.38498731e-258 ... 2.48363072e-247
  5.31787363e-255 6.37179696e-254]
 [4.03365373e-182 3.83452237e-170 2.56512054e-209 ... 3.27805735e-201
  1.29465735e-207 2.15315721e-207]
 [1.10820148e-229 3.62597530e-215 1.77602925e-264 ... 1.09274648e-254
  1.28636956e-262 6.06610691e-262]
 ...
 [2.09061986e-215 1.33478977e-200 2.50757563e-24

In [7]:
#a = np.array([[1,2,3,4,5]])
#b = np.array([[1,0,0],[0,1,0],[0,0,1],[0,1,0],[3,0,0]])

a = np.array([1,2,3,4,5])
b = np.array([[0.4971655 , 0.46514149, 0.683409  ],[0.34680189 ,0.01691117, 0.64675005],[0.83683601 ,0.93708358, 0.25226605],[0.93013801, 0.49240928, 0.40130433],[0.14237  ,  0.10420476, 0.65134091]])


print(np.dot(a,b))

[8.13367935 5.80087549 7.59562912]


In [8]:
Z1 = np.dot(W1,X_train)
print(Z1.shape)
print(Z1[:,0])

NameError: name 'W1' is not defined

In [None]:
a = np.array([[[0,1,0],[2,3,0]],[[4,5,4],[6,7,4]]])

flattened_array = a.reshape(a.shape[0], -1)
print(flattened_array.T)

In [None]:
flattened_array2 = flattened_array.T
print(flattened_array.shape)
print(len(flattened_array2))
print(flattened_array2,"\n\n")
print(flattened_array2[4,1])

                    

In [None]:
y_train = y_train_raw.T



In [None]:
A = np.array([1,2,3,4,5,-1,-3])

print(A)

print(ReLu(A))

print(type(A))

In [None]:
global_vars = list(globals().keys())
print("Global variable names:", global_vars)

In [None]:
A3d = np.ones((3, 4, 2))

print(A3d)

In [None]:
print(mnist_ann.W[2].shape)

In [None]:


# Example usage
x = np.array([-1, 2, -3, 4, -5])
output = np.maximum(x,0)
print("Input:", x)
print("Output:", output)

In [None]:


A = np.array([[0,1,0,2,1,1],[1,1,1,3,1,0]])
print(np.sum(A,axis=-1, keepdims=True))

A = softmax([0,1,0,10,1,1])

print(A)