In [93]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import cv2
import glob
## Sklearn
from sklearn import datasets
## Preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
## Metrics
from sklearn.metrics import accuracy_score

## tensorflow & Keras
import tensorflow as tf
from tensorflow.keras import callbacks
from keras.preprocessing.image import ImageDataGenerator

## Loading data

In [30]:
#reading labels
train_labels = pd.read_csv('../input/arabic-hwr-ai-pro-intake1/train.csv')
test_labels = pd.read_csv('../input/arabic-hwr-ai-pro-intake1/test.csv')
display(train_labels.head())
print(train_labels.shape)
print(test_labels.shape)

## EDA

In [55]:
train_labels.label.value_counts().plot(kind='bar')

we have a 13440 training image for arabic handwritten charachters which are labeled from 1 to 28 and are balanced each class contains 480 image
we have a 3360 testing images

In [19]:
test_labels

Load Images and Prepare X and y

In [32]:
train_path = '../input/arabic-hwr-ai-pro-intake1/train/'
test_path = '../input/arabic-hwr-ai-pro-intake1/test/'

In [73]:
train_images = [cv2.imread(file,cv2.IMREAD_GRAYSCALE) for file in sorted(glob.glob(train_path+'*.png')) ]

train_images = np.array(train_images)
print('train images shape {}'.format(train_images.shape))

view first image

In [74]:
plt.imshow(train_images[0])

## Preparing Images for the model

Reshape images

In [75]:
train_images = train_images.reshape((-1, 32, 32, 1))
print("images shape: {}".format(train_images.shape))
train_images[0]

Normalization

In [76]:
train_images = train_images/255

In [77]:
train_images[0]

In [78]:
def displayImgs(images,df,num):
    array = range(0,num,1)
    plt.figure(figsize=(20,10))
    j = 1
    for i in range(num):
        plt.subplot(5,10,j)
        num = array[i]
        plt.imshow(images[num], cmap='gray')
        plt.axis('off')
        plt.title('label : {}'.format(df.label.iloc[num]))
        j+=1

In [79]:
displayImgs(train_images,train_labels,50)

## Splitting data

In [80]:
X_train, X_valid, y_train, y_valid = train_test_split(train_images, train_labels['label'],
                                                      test_size=0.2, stratify=train_labels['label'],
                                                      random_state = 42)

In [81]:
print(X_train.shape)
print(X_valid.shape)
print(y_train.shape)
print(y_valid.shape)

## Modeling

In [83]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu',padding='Same',input_shape=(32, 32, 1)),
    tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu',padding='Same',input_shape=(32, 32, 1)),
    
    tf.keras.layers.MaxPooling2D(pool_size=2),
    tf.keras.layers.Dropout(0.25),
    #-----------------------------------------------
    tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu',padding='Same'),
    tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu',padding='Same'),
    
    tf.keras.layers.MaxPooling2D(pool_size=2,strides=2),
    tf.keras.layers.Dropout(0.25),
    #-----------------------------------------------
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256,input_dim=1024, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.5),
   
    tf.keras.layers.Dense(29, activation='softmax')
 
])


In [84]:
model.summary()

In [89]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
early_stopp = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)

### Data Generation

In [90]:
#https://studymachinelearning.com/keras-imagedatagenerator/
datagen = ImageDataGenerator(
        featurewise_center=False, 
        samplewise_center=False,  
        featurewise_std_normalization=False,
        samplewise_std_normalization=False,
        zca_whitening=False,
        rotation_range=10,
        zoom_range = 0.1,  
        width_shift_range=0.1, 
        height_shift_range=0.1,
        horizontal_flip=False,
        vertical_flip=False)

In [91]:
datagen.fit(X_train)

In [94]:
early_stopp = callbacks.EarlyStopping(patience=25, restore_best_weights=True)

In [95]:
history = model.fit(X_train, y_train, validation_data=(X_valid, y_valid), 
                    epochs=30, batch_size=32, callbacks=[early_stopp])


### Prepare test images

In [96]:
test_images = [cv2.imread(file,cv2.IMREAD_GRAYSCALE) for file in sorted(glob.glob(test_path+'*.png')) ]

test_images = np.array(test_images)

test_images = test_images.reshape((-1, 32, 32, 1))
test_images = test_images/255

In [98]:
y_preds_classes = np.argmax(model.predict(test_images), axis=-1)

In [100]:
test_labels['label'] = y_preds_classes

In [102]:
test_labels[['id', 'label']].to_csv('/kaggle/working/submission.csv', index=False)