In [None]:
!conda config --env --set always_yes true
!conda install -c conda-forge arabic_reshaper
!conda install -c conda-forge python-bidi 

In [None]:
import os
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import random # Generate pseudo-random numbers
from random import randint

from sklearn.utils import shuffle # Shuffle arrays or sparse matrices in a consistent way
from sklearn.model_selection import train_test_split # Split arrays or matrices into random train and test subsets
from sklearn.metrics import classification_report, confusion_matrix
import sklearn

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec # Specifies the geometry of the grid that a subplot can be placed in.

import keras
from keras import models as Models
from keras import layers as Layers
from keras.preprocessing import image
from keras.models import Sequential,Model
from keras.layers import Input,InputLayer, Dense, Activation, ZeroPadding2D, BatchNormalization
from keras.layers import Flatten, Conv2D, AveragePooling2D, MaxPooling2D, Dropout
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint,EarlyStopping
from keras import utils as Utils
from keras.utils import to_categorical # Converts a class vector (integers) to binary class matrix.

from keras.utils.vis_utils import model_to_dot

import seaborn as sns

# from IPython.display import SVG

import arabic_reshaper # Reconstruct Arabic sentences to be used in applications that don't support Arabic
#from bidi.algorithm import get_display

In [None]:
# global variables
Language = "Ar"
ImageClassMapping_path = "../input/Labels/ImagesClassPath.csv"
ClassLabels_path = "../input/Labels/ClassLabels.xlsx"
ImagesRoot_path = "../input/"

ModelFileName ='Model_255.h5'

**2- Load Data**


In [None]:
# load 54k image path mapping
df_ImageClassPath = pd.read_csv(ImageClassMapping_path)
display(df_ImageClassPath.head())

In [None]:
# load Class Labels
df_Classes = pd.read_excel(ClassLabels_path)
display(df_Classes.head())

In [None]:
df_ImageClassPath.groupby("ClassId").size().describe()

In [None]:

ddata = {"samples destribution":df_ImageClassPath.groupby("ClassId").size()}
iindex = range(32)

ddataframe = pd.DataFrame(data=ddata, index= iindex)
ddataframe.plot.bar(stacked= True, rot= 15, title='samples destribution')
plt.show(block= True)

**3- Define Functions**

**SplitData(predictions,testsize):** *to split the data into Fixed number of samples for prediction(predictions) and rest will be splited by percentage of test and training (testsize)*<br><br>
**get_classlabel(class_code,lang= 'Ar'):** *to return Class Label for a class (class_code) in arabic of English , default is arabic*<br><br>
**getDataSet(setType,isDL):** *prepaer images and class to be used by model setType: Data type is training, validation or prediction, isDL: True for Keras CNN, False for skLearn KNN*<br><br>
**display_prediction(col_size, row_size,XPred,yPred):** *Generally to display images <br><br>

In [None]:
# Split 54K Images into 3 groups of Fixed Prediction, training and test
# the dataset is 32 class,split is maintaind as per class 
def SplitData(predictions,testsize):
    
    min = df_ImageClassPath.groupby("ClassId").size().min()
    print('{0} Samples per Class'.format(min))
    
    # empty dataframes with same column difinition
    df_TrainingSet = df_ImageClassPath[0:0].copy()
    df_TestSet = df_ImageClassPath[0:0].copy()
    df_PredSet = df_ImageClassPath[0:0].copy()

    # Create the sets by loop thru classes and append
    for index,row in df_Classes.iterrows():
        # make sure all class are same size 
        df_FullSet = df_ImageClassPath[df_ImageClassPath['ClassId'] == row['ClassId']].sample(min,random_state= 42)
        
#         df_FullSet = df_ImageClassPath[df_ImageClassPath['ClassId'] == row['ClassId']]
        
        df_PredSet = df_PredSet.append(df_FullSet.sample(n=predictions, random_state=1))
        df_FullSet = pd.merge(df_FullSet,df_PredSet, indicator=True, 
                              how='left').query('_merge=="left_only"').drop('_merge', axis=1)
        
        trainingSet, testSet = train_test_split(df_FullSet, test_size= testsize)        
        
        df_TrainingSet = df_TrainingSet.append(trainingSet)
        df_TestSet = df_TestSet.append(testSet)
    
    return df_TrainingSet,df_TestSet,df_PredSet


In [None]:
# retrive class Label (Arabic or English) using class id 
def get_classlabel(class_code,lang= 'Ar'):
    if lang== 'Ar':
        text_to_be_reshaped = df_Classes.loc[df_Classes['ClassId'] == class_code, 
                                             'ClassAr'].values[0]
        reshaped_text = arabic_reshaper.reshape(text_to_be_reshaped)
        return get_display(reshaped_text)
    elif lang== 'En':
        return df_Classes.loc[df_Classes['ClassId'] == class_code, 'Class'].values[0]
    

In [None]:
# prepare Images, and class Arrays
def getDataSet(setType,isDL): # 'Training' for Training dataset , 'Testing' for Testing data set
    imgs = []
    lbls = []
    df = pd.DataFrame(None)
    
    if setType =='Training':
        df = dtTraining.copy()
    elif setType=='Test':
        df = dtTest.copy()
    elif setType=='Prediction':
        df = dtPred.copy()

    for index,row in df.iterrows():
        lbls.append(row['ClassId'])
        try:
            imageFilePath = os.path.join(ImagesRoot_path, row['ImagePath'])
            img = image.load_img(imageFilePath, target_size=(32,32,1), 
                                 color_mode = "grayscale")
            img = image.img_to_array(img) # to array
            img = img/255 # Normalize
            if isDL == False:
                img = img.flatten() # for knn_classifier Model
            imgs.append(img)

        except Exception as e:
            print(e)
            
    shuffle(imgs,lbls,random_state=255) #Shuffle the dataset

    imgs = np.array(imgs)
    lbls = np.array(lbls)
    if isDL ==True:
        lbls = to_categorical(lbls) # for keras CNN Model
    return imgs, lbls

In [None]:
def display_prediction(col_size, row_size,XPred,yPred): 
    img_index=0
    fig, ax = plt.subplots(row_size, col_size, figsize=(row_size*2.5,col_size*1.5))
    for row in range(0,row_size):
        for col in range(0,col_size):
            ax[row][col].imshow(XPred[img_index][:,:,0], cmap='gray')
            ax[row][col].set_title("({}) {}".format(yPred[img_index],get_classlabel(yPred[img_index],'Ar')))
            ax[row][col].set_xticks([])
            ax[row][col].set_yticks([])
            img_index += 1

**4- Preparing Data**

In [None]:
# Split our Dataset into Training, Test and Prediction
# take 3 images per class for later prediction (96 images 3 x 32 class category)
# split the remaining into 20% test and 80% Training

dtTraining, dtTest,dtPred = SplitData(3,0.3)

In [None]:
print('Pred     {} \t # {} per class'.format(dtPred.shape[0], dtPred.shape[0] //32))
print('Training {} \t # {} per class'.format(dtTraining.shape[0], dtTraining.shape[0] //32))
print('Test     {} \t # {} per class'.format(dtTest.shape[0], dtTest.shape[0] //32))
print('---------------')
print('Sum      {}'.format(dtTraining.shape[0] + dtTest.shape[0] + dtPred.shape[0]))

In [None]:
ddata = {"Training":dtTraining.groupby("ClassId").size(),"Test":dtTest.groupby("ClassId").size()}
iindex = range(32)

ddataframe = pd.DataFrame(data=ddata, index= iindex)
ddataframe.plot.bar(stacked= True, rot= 15, title='Training vs Test data')
plt.show(block= True)

**5- Model Definition**

**5.B- Keras Convolutional Neural Network (CNN)**

In [None]:
X_train,y_train = getDataSet('Training',True)
X_test,y_test= getDataSet('Test',True)
X_pred,_ = getDataSet('Prediction',True)

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_valid,y_train,y_valid=train_test_split(X_train,y_train,test_size=0.3,random_state=0)

In [None]:
print("Shape of train Images:{} , train Labels: {}".format(X_train.shape,y_train.shape))
print("Shape of validation Images:{} , valid Labels: {}".format(X_valid.shape,y_valid.shape))
print("Shape of test Images:{} , test Labels: {}".format(X_test.shape,y_test.shape))
print("Shape of Prediction Images:{} , Prediction Labels: {}".format(X_pred.shape,"?"))

In [None]:
model=Sequential()
model.add(Conv2D(128,kernel_size=(5,5),
                 strides=1,padding='same',activation='relu',input_shape=(32,32,1)))
model.add(MaxPooling2D(pool_size=(3,3),strides=2,padding='same'))
model.add(Conv2D(64,kernel_size=(2,2),
                strides=1,activation='relu',padding='same'))
model.add(MaxPooling2D((2,2),2,padding='same'))
model.add(Conv2D(32,kernel_size=(2,2),
               strides=1,activation='relu',padding='same'))
model.add(MaxPooling2D((2,2),2,padding='same'))
          
model.add(Flatten())


In [None]:
model.add(Dense(units=512,activation='relu'))
model.add(Dropout(rate=0.25))
model.add(Dense(units=32,activation='softmax'))
model.summary()

In [None]:
from keras import backend as K
from sklearn.metrics import f1_score
import copy


def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

In [None]:
def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:

opt = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=opt,loss='categorical_crossentropy',metrics=['accuracy',f1_m,recall_m,precision_m])

In [None]:
callbacks_list =[EarlyStopping(monitor='val_loss', patience=50), ModelCheckpoint(
    filepath='model_255.h5', monitor='val_loss', save_best_only= True),]

trained = model.fit(X_train, y_train, epochs=50, validation_data=(X_valid, y_valid),batch_size=200, 
                    callbacks= callbacks_list)


**7- Model Evaluation**

In [None]:
p=model.evaluate(X_test,y_test)
print("loss", str(p[0]))
print("accurecy",str(p[1]))

print("f1-score", str(p[2]))
print("recall",str(p[3]))
print("percision",str(p[4]))

In [None]:
import pandas as pd
hist_df=pd.DataFrame(trained.history)
hist_csv_file='history1.csv'
with open(hist_csv_file, mode='w')as f:
    hist_df.to_csv(f)