In [25]:
import numpy as np
from PIL import Image
import h5py
import matplotlib.pyplot as plt
import matplotlib
import time
# from google.colab import drive
# drive.mount('/content/gdrive')
import sys
# sys.path.append('/content/gdrive/My Drive')
# import unit10.utils as u10

import os
# from sklearn.datasets import fetch_openml
# from sklearn.metrics import classification_report, confusion_matrix

In [26]:
class ITrainable():
  pass
  def forward_propagation(self, X):
    raise NotImplementedError("Not Implemented")

  def backward_propagation(self, dY_hat):
    raise NotImplementedError("Not Implemented")

  def update_parameters(self):
    raise NotImplementedError("Not Implemented")


In [27]:
class DLLinearLayer(ITrainable):
  def __init__(self,name,num_units,input_size, alpha,optimize = None):
    self.name = name
    self.alpha = alpha
    self.optimization = optimize
    self.input_size = input_size
    self.num_units = num_units
    self.b = np.zeros((num_units,1),dtype = float)
    self.W_Xaviar_initialization()
    if(self.optimization == 'adaptive'):
      self.adaptive_cont = 1.1
      self.adaptive_switch = 0.5
      self.adaptive_W = np.full(self.W.shape,alpha,dtype = float)
      self.adaptive_b = np.full(self.b.shape,alpha,dtype = float)

  def set_W(self,W):
    self.W = np.copy(W)


  @staticmethod
  def normal_initialization(shape,factor=0.01):
    return np.random.randn(*shape)*factor

  def W_He_initialization(self):
    self.W = DLLinearLayer.normal_initialization((self.num_units,self.input_size,),np.sqrt(2/self.input_size))

  def W_Xaviar_initialization(self):
    self.W = DLLinearLayer.normal_initialization((self.num_units,self.input_size),1/np.sqrt(self.input_size))


  def save_parameters(self,file_path):
    file_name = file_path+"/"+self.name+".h5"
    with h5py.File(file_name, 'w') as hf:
      hf.create_dataset("W", data=self.W)
      hf.create_dataset("b", data=self.b)

  def restore_parameters(self,file_path):
    file_name = file_path+"/"+self.name+".h5"
    with h5py.File(file_name, 'r') as hf:
      self.W = hf['W'][:]
      self.b = hf['b'][:]

  def forward_propagation(self, prev_A):
    self.prev_A = np.copy(prev_A)
    Z = self.W @ prev_A + self.b
    return Z

  def get_W(self):
    return self.W

  def backward_propagation(self, dZ):
    self.db = np.sum(dZ, keepdims=True, axis=1)
    self.dW = dZ@self.prev_A.T
    return self.W.T@dZ


  def update_parameters(self):
    if self.optimization == 'adaptive':
      self.adaptive_W *= np.where(self.adaptive_W * self.dW > 0, self.adaptive_cont, -self.adaptive_switch)
      self.W -= self.adaptive_W
      self.adaptive_b *= np.where(self.adaptive_b * self.db > 0, self.adaptive_cont, -self.adaptive_switch)
      self.b -= self.adaptive_b
    else:
      self.W -= self.alpha * self.dW
      self.b -= self.alpha * self.db

  def __str__(self):
    s = self.name + " Function:\n"
    s += "\tlearning_rate (alpha): " + str(self.alpha) + "\n"
    if self.optimization != None:
      s += "\toptimization: " + str(self.optimization) + "\n"
    if self.optimization == "adaptive":
      s += "\t\tadaptive parameters:\n"
      s += "\t\t\tcont: " + str(self.adaptive_cont)+"\n"
      s += "\t\t\tswitch: " + str(self.adaptive_switch)+"\n"
    # parameters
    s += "\tParameters: W shape = "+str(self.W.shape)+", b = "+str(self.b.shape)+"\n"
    return s

In [28]:
class DLNetwork(ITrainable):
  def __init__(self,name):
    self.name = name
    self.layers = []

  def add_layer(self,iTrainable):
    self.layers.append(iTrainable)

  def forward_propagation(self, X):
    self.__X = X
    kelet = X
    for layer in self.layers:
      kelet = layer.forward_propagation(kelet)
    return kelet

  def backward_propagation(self, dY_hat):
    kelet= dY_hat
    for layer in self.layers[::-1]:
      kelet = layer.backward_propagation(kelet)
    return kelet

  def save_parameters(self,dir_path):
    path = dir_path+"/"+self.name
    if os.path.exists(path)== False:
      os.mkdir(path)
      for layer in self.layers:
        layer.save_parameters(path)

  def restore_parameters(self, dir_path):
    path = dir_path+"/"+self.name
    for layer in self.layers:
      layer.restore_parameters(path)

  def update_parameters(self):
    for layer in self.layers:
      layer.update_parameters()

  def __str__(self):
    s = self.name+'\n'
    for layer in self.layers:
      s+=layer.__str__()
    return s



In [29]:
class DLModel:
  def __init__(self,name,iTrainable,loss):
    self.name = name
    self.iTrainable = iTrainable
    self.loss = loss
    if(loss == 'categorical_cross_entropy'):
      self.loss_forward = self.categorical_cross_entropy
      self.loss_backward = self.dCategorical_cross_entropy
    if(loss =="square_dist"):
      self.loss_forward = self.square_dist
      self.loss_backward = self.dSquare_dist
    if(loss == "cross_entropy"):
      self.loss_forward = self.cross_entropy
      self.loss_backward = self.dCross_entropy

  @staticmethod
  def to_one_hot(num_categories, Y):
    m = Y.shape[0]
    Y = Y.reshape(1, m)
    Y_new = np.eye(num_categories)[Y.astype('int32')]
    Y_new = Y_new.T.reshape(num_categories, m)
    return Y_new

  def __str__(self):
    s = self.name + "\n"
    s += "\tLoss function: " + self.loss + "\n"
    s += "\t"+str(self.iTrainable) + "\n"
    return s

  def categorical_cross_entropy(self, Y_hat, Y):
    eps = 1e-10
    Y_hat = np.where(Y_hat==0,eps,Y_hat)
    Y_hat = np.where(Y_hat == 1, 1-eps,Y_hat)
    errors = np.zeros(Y.shape[1])
    errors = -np.sum(Y*np.log(Y_hat),axis = 0)
    return errors

  def dCategorical_cross_entropy(self,Y_hat,Y):
    return (-Y+Y_hat)/Y.shape[1]

  def cross_entropy(self,Y_hat,Y):
    eps = 1e-10
    Y_hat = np.where(Y_hat==0,eps,Y_hat)
    Y_hat = np.where(Y_hat == 1, 1-eps,Y_hat)
    return -(Y*np.log(Y_hat)+(np.full(Y.shape,1,dtype=float)-Y)*np.log(np.full(Y.shape,1,dtype=float)-Y_hat))

  def dCross_entropy(self,Y_hat,Y):
    eps = 1e-10
    Y_hat = np.where(Y_hat==0,eps,Y_hat)
    Y_hat = np.where(Y_hat == 1, 1-eps,Y_hat)
    return (1/Y.shape[1])*(-Y*Y_hat**(-1)+(np.full(Y.shape,1,dtype=float)-Y)*(np.full(Y.shape,1,dtype=float)-Y_hat)**-1)


  def square_dist(self, Y_hat, Y):
    errors = (Y_hat - Y)**2
    return errors

  def dSquare_dist(self, Y_hat, Y):
    m = Y.shape[1]
    dY_hat = 2*(Y_hat - Y)/m
    return dY_hat

  def compute_cost(self, Y_hat, Y):
    m = Y.shape[1]
    errors = self.loss_forward(Y_hat, Y)
    J = np.sum(errors)
    return J/m

  def confusion_matrix(self, X, Y):
    prediction = self.forward_propagation(X)
    prediction_index = np.argmax(prediction, axis=0)
    Y_index = np.argmax(Y, axis=0)
    right = np.sum(prediction_index == Y_index)
    print("accuracy: ",str(right/len(Y[0])))
    print(confusion_matrix(prediction_index, Y_index))

  def backward_propagation(self,Y_hat,Y):
    dY_hat = self.loss_backward(Y_hat,Y)
    self.iTrainable.backward_propagation(dY_hat)

  def forward_propagation(self,X):
    return self.iTrainable.forward_propagation(X)

  def train(self,X,Y,num_iterations):
    print_ind = max(num_iterations // 100, 1)
    costs = []
    for i in range(num_iterations):
      Y_hat = self.iTrainable.forward_propagation(X)
      self.backward_propagation(Y_hat,Y)
      self.iTrainable.update_parameters()

      if i > 0 and i % print_ind == 0:
        J = self.compute_cost(Y_hat, Y)
        print("cost:",J,i/print_ind,"%")
        costs.append(J)
    costs.append(self.compute_cost(Y_hat, Y))
    return costs


In [30]:
class DLNeuronsLayer(DLNetwork):
  def __init__(self,name,num_units,input_size,activation,alpha,optimization=None):
    self.name = name
    self.linear = DLLinearLayer("Linear",num_units,input_size,alpha,optimization)
    self.activation = DLActivation(activation)
    super().__init__(name)
    super().add_layer(self.linear)
    super().add_layer(self.activation)


  def __str__(self):
    return self.linear.__str__()+self.activation.__str__()


In [31]:
class DLActivation(ITrainable):
  def __init__(self,activation):
    self.name = activation
    if activation == "tanh":
      self.forward_propagation =self.tanh
      self.backward_propagation =self.tanh_dZ
    elif activation == "relu":
      self.forward_propagation =self.relu
      self.backward_propagation =self.relu_dZ
    elif activation == "leaky_relu":
      self.leaky_relu_d = 0.01
      self.forward_propagation =self.leaky_relu
      self.backward_propagation = self.leaky_relu_dZ
    elif activation == "sigmoid":
      self.forward_propagation = self.sigmoid
      self.backward_propagation =self.sigmoid_dZ
    elif activation =='softmax':
      self.forward_propagation = self.softmax
      self.backward_propagation =self.softmax_dZ
    else:
      raise Exception("Undifiend activation")

  def sigmoid(self, Z):
    self.res = 1/(1+np.exp(-1*Z))
    return self.res

  def sigmoid_dZ(self, dA):
    self.dZ = dA*self.res*(np.full(self.res.shape,1,dtype=float)-self.res)
    return self.dZ

  def softmax(self,Z):
    return np.exp(Z) / np.sum(np.exp(Z), axis=0)

  def softmax_dZ(self,dZ):
    return dZ

  def tanh(self, Z):
    self.res = np.tanh(Z)
    return self.res

  def tanh_dZ(self,dA):
    return dA*(1-self.res**2)

  def relu(self, Z):
    self.Z = Z
    return np.maximum(0,Z)

  def relu_dZ(self,dA):
    return np.where(self.Z <= 0, 0, 1)*dA

  def leaky_relu(self,Z):
    self.Z = Z
    return np.where(self.Z <= 0, self.leaky_relu_d*self.Z, self.Z)

  def leaky_relu_dZ(self,dA):
    return np.where(self.Z <= 0, self.leaky_relu_d, 1)*dA


  def update_parameters(self):
     pass

  def save_parameters(self,path):
    pass
  def restore_parameters(self,path):
    pass

  def __str__(self):
    return "Activation: "+self.name


In [32]:
class testGrad():
  def __init__(self):
    pass
  @staticmethod
  def check_grad(f,x,f_grad,epsilon = 1e-4,delta = 1e-7):
    aprox = (f(x+delta)-f(x-delta))/(2*delta)
    grad = f_grad(x)
    print(aprox,grad)
    diff = abs(aprox-grad)/(abs(aprox)+abs(grad))
    return (diff<epsilon,diff)

  @staticmethod
  def check_n_grad(f , parms_vec, grad_vec, epsilon=1e-4 , delta=1e-7):
    n = len(parms_vec)
    approx = np.zeros(parms_vec.shape)
    for i in range(n):
      pars_plus = np.copy(parms_vec)
      pars_plus[i]+=delta
      pars_min = np.copy(parms_vec)
      pars_min[i]-=delta
      approx[i] = (-f(pars_min)+f(pars_plus))/(2*delta)
    above = np.linalg.norm(approx-grad_vec)
    bottom = np.linalg.norm(approx)+np.linalg.norm(grad_vec)
    diff = above/bottom
    return (diff<epsilon,diff)


def g(parms):
    a,b = parms[0], parms[1]
    return 2*a**2+4*a*b-3*b**2

def dg_da(a,b):
    return 4*a+4*b
def dg_db(a,b):
    return 4*a-6*b
def dg_db_wrong(a,b):
    return 4*a-6*b+0.01
a,b = 5.0,1.0
check, diff = testGrad.check_n_grad(g, np.array([a,b]), np.array([dg_da(a,b),dg_db(a,b)]))
print("check:",str(check), ", diff:", str(diff))
check, diff = testGrad.check_n_grad(g, np.array([a,b]), np.array([dg_da(a,b),dg_db_wrong(a,b)]))
print("check:",str(check), ", diff:", str(diff))


check: True , diff: 1.5040645465357203e-09
check: False , diff: 0.00017993850937502074


In [33]:

# # mnist = fetch_openml('mnist_784')
# # X, Y = mnist["data"], mnist["target"]
# # X = np.array(X) # just in case…
# # Y = np.array(Y) # just in case…



In [34]:

# # Install the Kaggle package
# !pip install kaggle

# # Upload your kaggle.json file
# # from google.colab import files
# # files.upload()

# # Move kaggle.json to the correct directory and set permissions
# !mkdir -p ~/.kaggle
# !cp kaggle.json ~/.kaggle/
# !chmod 600 ~/.kaggle/kaggle.json


In [35]:
# # Download the handwritten digits dataset
# !kaggle datasets download -d jcprogjava/handwritten-digits-dataset-not-in-mnist

# # Unzip the downloaded files
# !unzip handwritten-digits-dataset-not-in-mnist.zip


In [36]:
# import os
# from PIL import Image
# import numpy as np

# import os
# import random
# import numpy as np
# from PIL import Image
# import matplotlib.pyplot as plt

# def array_to_image(array):
#     # Check if the array size is correct for a 28x28 image
#     if array.size != 28 * 28:
#         raise ValueError("The input array must have 784 elements (28x28).")

#     # Reshape the array back into a 28x28 image
#     image = array.reshape(28, 28)

#     # Display the image
#     plt.imshow(image, cmap='gray')
#     plt.axis('off')  # Hide the axis
#     plt.show()

#     return image

# def load_handwritten_digits(data_dir):
#     data = []
#     labels = []

#     all_files = []
#     for digit in range(10):
#         digit_dir = os.path.join(os.path.join(data_dir, str(digit)), str(digit))
#         digit_files = [os.path.join(digit_dir, f) for f in os.listdir(digit_dir) if f.endswith('.png')]
#         all_files.extend([(f, digit) for f in digit_files])

#     random.shuffle(all_files)

#     for file_path, digit in all_files:
#         try:
#             # Open the image
#             img = Image.open(file_path)
#             # print(f"Loaded image: {file_path}, mode: {img.mode}, size: {img.size}")

#             # Convert image to RGBA if it's not already
#             # if img.mode != 'RGBA':
#             #     print(img.mode)
#             #     return
#             #     img = img.convert('RGBA')

#             # Split the image into its separate channels
#             # r, g, b, a = img.split()

#             # Invert the RGB channels
#             # r = r.point(lambda p: 255 - p)
#             # g = g.point(lambda p: 255 - p)
#             # b = b.point(lambda p: 255 - p)

#             # Merge the channels back, keeping the alpha channel unchanged
#             # inverted_img = Image.merge("RGBA", (r, g, b, a))

#             # Create a black background image
#             black_bg = Image.new("RGBA", img.size, (255, 255, 255, 255))

#             # Paste the inverted image onto the black background using the alpha channel as mask
#             # black_bg.paste(inverted_img, (0, 0), inverted_img)
#             black_bg.paste(img, (0, 0), img)

#             # Convert to grayscale
#             new_img = black_bg.convert('L')
#             img_data = np.array(new_img).astype(np.float32)
#             # test_img_data = np.array(img.convert('L')).astype(np.float32) / 255.0

#             # Debugging step: Print min and max values to check normalization
#             # print(f"Converted Image: {file_path}, min: {img_data.min()}, max: {img_data.max()}")

#             # Display the image to debug if necessary
#             # array_to_image(img_data * 255)  # Convert back to 0-255 for display
#             # array_to_image(test_img_data * 255)
#             # return


#             data.append(img_data.flatten())
#             labels.append(str(digit))
#         except Exception as e:
#             print(f"Error loading file: {file_path}")
#             print(e)

#     data = np.array(data)
#     labels = np.array(labels)

#     return data, labels


# # Load images and labels
# base_folder = 'dataset'
# X, Y = load_handwritten_digits(base_folder)


In [37]:
# print(X.shape)

In [38]:


# X = X / 255 - 0.5

# Y_new = DLModel.to_one_hot(10,Y)
# print(Y_new.shape)

# m = 64638
# m_test = X.shape[0] - m
# X_train, X_test = X[:m].T, X[m:].T
# Y_train, Y_test = Y_new[:,:m], Y_new[:,m:]

# print(X_train.shape, Y_train.shape)
# print(X_test.shape, Y_test.shape)


# # np.random.seed(111)
# # shuffle_index = np.random.permutation(m)
# # X_train, Y_train = X_train[:, shuffle_index], Y_train[:, shuffle_index]


In [39]:
# print(X.shape)
# print(Y.shape)

In [40]:
np.random.seed(1)

Hidden = DLNeuronsLayer("Hidden",64,28*28,"sigmoid",0.1,'adaptive')
Output = DLNeuronsLayer("Output",10,64,"softmax",0.1,'adaptive')

digit_network = DLNetwork("digit_net")
digit_network.add_layer(Hidden)
digit_network.add_layer(Output)

digit_model = DLModel("model",digit_network,'categorical_cross_entropy')
# costs = digit_model.train(X_train, Y_train, 200)
# u10.print_costs(costs,200)
# digit_network.save_parameters("parameters")

In [41]:
# digit_network.save_parameters("parameters")

In [42]:
digit_network.restore_parameters("parameters")
print("Train:")
# digit_model.confusion_matrix(X_train, Y_train)
print("Test:")
# digit_model.confusion_matrix(X_test, Y_test)







Train:


NameError: name 'X_train' is not defined

In [53]:

import numpy as np
from PIL import Image

def evaluateImage(imagePath):
    img = Image.open(imagePath)
    
     
    # Create a black background image
    if img.mode != 'RGBA':
      img = img.convert('RGBA')
      print(img.size)

    if "my_digits" in imagePath:

      print("here")

    white_bg = Image.new("RGBA", img.size, (255, 255, 255, 255))

    # Paste the inverted image onto the black background using the alpha channel as mask
    # black_bg.paste(inverted_img, (0, 0), inverted_img)
    white_bg.paste(img, (0, 0), img)

    # Convert to grayscale
    new_img = white_bg.convert('L')
    img_data = np.array(new_img).astype(np.float32) / 255 - 0.5

    img_data = img_data.reshape(784, 1)


    # Forward propagation
    Y_hat = digit_model.forward_propagation(img_data)

    # Get the predicted digit
    predicted_digit = np.argmax(Y_hat)

    print(f"The predicted digit in {imagePath} is: {predicted_digit}")


dataset_path = "dataset/"

for j in range(5,10):
  for i in range(5):
    num = 1 * (10 ** i)
    evaluateImage(dataset_path + str(j) + "/" + str(j) + "/" + str(num) + ".png")

The predicted digit in dataset/5/5/1.png is: 5
The predicted digit in dataset/5/5/10.png is: 5
The predicted digit in dataset/5/5/100.png is: 5
The predicted digit in dataset/5/5/1000.png is: 5
The predicted digit in dataset/5/5/10000.png is: 5
The predicted digit in dataset/6/6/1.png is: 6
The predicted digit in dataset/6/6/10.png is: 6
The predicted digit in dataset/6/6/100.png is: 6
The predicted digit in dataset/6/6/1000.png is: 6
The predicted digit in dataset/6/6/10000.png is: 6
The predicted digit in dataset/7/7/1.png is: 7
The predicted digit in dataset/7/7/10.png is: 7
The predicted digit in dataset/7/7/100.png is: 7
The predicted digit in dataset/7/7/1000.png is: 7
The predicted digit in dataset/7/7/10000.png is: 7
The predicted digit in dataset/8/8/1.png is: 8
The predicted digit in dataset/8/8/10.png is: 8
The predicted digit in dataset/8/8/100.png is: 8
The predicted digit in dataset/8/8/1000.png is: 8
The predicted digit in dataset/8/8/10000.png is: 8
The predicted digit 

In [51]:
# my_digits_path = "my_digits/"
evaluateImage("my_digits/1.png")
evaluateImage("my_digits/2.png")



here
The predicted digit in my_digits/1.png is: 1
here
The predicted digit in my_digits/2.png is: 2
