<a href="https://colab.research.google.com/github/rayenmbarek/Deep-Learning/blob/main/Arabic_MNIST_ANN_and_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Python packages to manipulate files
import os
import pathlib
from pathlib import Path
import datetime
import shutil

# Tensorflow, Keras and Numpy packages
import tensorflow as tf
import numpy as np
import keras_preprocessing
from keras_preprocessing import image
from keras_preprocessing.image import ImageDataGenerator

# Display related packages
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from IPython.display import Image
import PIL
import PIL.Image

%matplotlib inline



In [2]:
my_devices = tf.config.experimental.list_physical_devices(device_type='CPU')
tf.config.experimental.set_visible_devices(devices= my_devices, device_type='CPU')
# #tf.config.set_visible_devices([], 'GPU')

In [None]:
! git clone https://github.com/minus--/arabic-letters-tutorial

In [None]:
! tar xvzf arabic-letters-tutorial/arabic_handwritten_data.tgz # unzip

# ***Create New Folders***


In [5]:
mkdir data/new_data

In [6]:
mkdir data/new_data/test_data

In [7]:
mkdir data/new_data/train_data

In [8]:
for i in range(1,29):
  os.mkdir("data/new_data/test_data/{}".format(i))
  os.mkdir("data/new_data/train_data/{}".format(i))

# ***Copy the classified data into the new folders***

In [9]:
directory_train = "./data/train_data/"
images_train = os.listdir(directory_train)
directory_test = "./data/test_data/"
images_test = os.listdir(directory_test)

In [10]:
for image in images_train:
  if image.endswith('.png'):
    label =int(tf.strings.regex_replace(input=image,pattern=r".+_label_(\d+)\.png",rewrite =r"\1").numpy())
    original = directory_train+image
    target =("./data/new_data/train_data/{}/".format(label))+image
    shutil.copy2(original,target)

In [11]:
for image in images_test:
  if image.endswith('.png'):
    label =int(tf.strings.regex_replace(input=image,pattern=r".+_label_(\d+)\.png",rewrite =r"\1").numpy())
    original = directory_test+image
    target =("./data/new_data/test_data/{}/".format(label))+image
    shutil.copy2(original,target)

# ***CNN MODEL***

In [25]:
TRAINING_DIR = "data/new_data/train_data/"
training_datagen = ImageDataGenerator(
      rescale = 1./255,
      rotation_range=40,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True,
      fill_mode='nearest')

VALIDATION_DIR = "data/new_data/test_data/"
validation_datagen = ImageDataGenerator(rescale = 1./255)

train_generator = training_datagen.flow_from_directory(
  TRAINING_DIR,
  target_size=(32,32),
  class_mode='categorical',
  batch_size=32
)

validation_generator = validation_datagen.flow_from_directory(
  VALIDATION_DIR,
  target_size=(32,32),
  class_mode='categorical',
  batch_size=32
)

model = tf.keras.models.Sequential([
    # Note the input shape is the desired size of the image 150x150 with 3 bytes color
    # This is the first convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu', padding="same", input_shape=(32, 32, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    # The second convolution
    #tf.keras.layers.Conv2D(64, (3,3), activation='relu', padding="same"),
    #tf.keras.layers.MaxPooling2D(2,2),
    # The third convolution
    tf.keras.layers.Conv2D(128, (3,3), activation='relu', padding="same"),
    tf.keras.layers.MaxPooling2D(2,2),
    # The fourth convolution
    tf.keras.layers.Conv2D(256, (3,3), activation='relu', padding="same"),
    tf.keras.layers.MaxPooling2D(2,2),
    # Flatten the results to feed into a DNN
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.5),
    # 512 neuron hidden layer
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(28, activation='softmax')
])


model.summary()

model.compile(loss="categorical_crossentropy", optimizer='rmsprop',metrics=['accuracy'])

history = model.fit(train_generator, epochs=50, steps_per_epoch=420, validation_data = validation_generator, verbose = 1, validation_steps=105)

model.save("data_new.h5")


Found 13440 images belonging to 28 classes.
Found 3360 images belonging to 28 classes.
Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_44 (Conv2D)           (None, 32, 32, 64)        1792      
_________________________________________________________________
max_pooling2d_44 (MaxPooling (None, 16, 16, 64)        0         
_________________________________________________________________
conv2d_45 (Conv2D)           (None, 16, 16, 128)       73856     
_________________________________________________________________
max_pooling2d_45 (MaxPooling (None, 8, 8, 128)         0         
_________________________________________________________________
conv2d_46 (Conv2D)           (None, 8, 8, 256)         295168    
_________________________________________________________________
max_pooling2d_46 (MaxPooling (None, 4, 4, 256)         0         
________________________________

# ***Loading data for ANN (MLP)***

In [26]:
batch_size = 32
img_height = 32
img_width = 32

def get_dataset(dataset_dir):
    
    def process_filename(file_path):
        label = tf.strings.regex_replace(input=file_path,pattern=r".+_label_(\d+)\.png", rewrite=r"\1")
        label = tf.strings.to_number(label, tf.int32)-1
        #label = tf.one_hot(label, depth=29)
        return label

    def process_img(file_path):

        img = tf.io.read_file(file_path)
        img = tf.image.decode_png(img, channels=3)
        img = tf.image.resize(img, size=(32, 32))
        img = tf.image.convert_image_dtype(img, tf.float32)
        img = tf.cast(img, tf.float32) / 255.0
        return img
    
    data_dir = pathlib.Path(dataset_dir)
    file_list = [str(path.absolute()) for path in Path(data_dir).glob("*.png")]
    files_ds = tf.data.Dataset.from_tensor_slices((file_list))
    files_ds = files_ds.map(lambda x: (process_img(x), process_filename(x)))
    return files_ds

In [27]:
train_dataset_path = "data/train_data"
test_dataset_path = "data/test_data"

train_ds = get_dataset(train_dataset_path).shuffle(buffer_size=batch_size*10).batch(batch_size)
valid_ds = get_dataset(test_dataset_path).batch(batch_size)

# **ANN MODEL**

In [29]:
model1 = tf.keras.Sequential([
    # Convert the 32x32x3 image into a flat vector of 32x32x3 = 3072 values
    tf.keras.layers.Flatten(input_shape=(32, 32, 3), name='flatten_input'),
    # Create a "hidden" layer with 256 neurons and apply the ReLU non-linearity
    tf.keras.layers.Dense(256, activation=tf.nn.relu, name='input_to_hidden1'),
    # Create another hidden layer with 128 neurons
    tf.keras.layers.Dense(128, activation=tf.nn.relu, name='hidden1_to_hidden2'),
    # Create an "output layer" with 28 neurons
    tf.keras.layers.Dense(28, name='hidden_to_logits'),
])
model1.summary()
model1.compile(
    # Optimizer
    optimizer=tf.keras.optimizers.RMSprop(),  
    # Loss function to minimize
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    # List of metrics to monitor
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)

history = model1.fit(train_ds, epochs = 50, validation_data=valid_ds)

Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_input (Flatten)      (None, 3072)              0         
_________________________________________________________________
input_to_hidden1 (Dense)     (None, 256)               786688    
_________________________________________________________________
hidden1_to_hidden2 (Dense)   (None, 128)               32896     
_________________________________________________________________
hidden_to_logits (Dense)     (None, 28)                3612      
Total params: 823,196
Trainable params: 823,196
Non-trainable params: 0
_________________________________________________________________
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Ep

# ***Comparaison Between ANN (mlp) and CNN for arabic_MNIST***

ANN_accuracy = 0.7735 and CNN_accuracy = 0.7345
ANN_loss = 3.8495 and CNN_loss = 3.0963

Even for ANN is better than CNN in accuracy for the validation dataset but in the loss we found that CNN is lower than ANN loss, and there is not a big diffrence between the accuracy, so we can say that CNN is better for arabic_MNIST