# **Handwriting Recognition - TensorFlow**

In [None]:
# **IMPORTING THE LIBRARIES**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import scipy as sp
import warnings
import tensorflow as tf
import string
import datetime
warnings.filterwarnings("ignore")
%matplotlib inline

# **LOADING THE DATASET**

In [None]:
train = pd.read_csv('/kaggle/input/handwriting-recognition/written_name_train_v2.csv')
valid = pd.read_csv('/kaggle/input/handwriting-recognition/written_name_validation_v2.csv')

In [None]:
train.head()

In [None]:
valid.head()

In [None]:
train.describe()


In [None]:
train.info()

In [None]:
train.shape

In [None]:
valid.shape

In [None]:
train.value_counts()

In [None]:
train.dtypes

In [None]:
valid.dtypes

In [None]:
valid.columns

In [None]:
train.columns

**Checking Null Values**

In [None]:
train.isnull().sum()

In [None]:
valid.isnull().sum()

In [None]:
train.isnull().any()

**Our dataset contains some null values we have to drop them.**

**Dropping Null Values**

In [None]:
train=train.dropna()

In [None]:
valid=valid.dropna()

In [None]:
train.isnull().sum()


In [None]:
valid.isnull().sum()

In [None]:
train.isnull().any()

**Thus all the null values have been dropped.**

**Data Pre Processing**

In [None]:
#lets find the categorialfeatures
list_1=list(train.columns)


In [None]:
list_cate=[]
for i in list_1:
    if train[i].dtype=='object':
        list_cate.append(i)


In [None]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()


In [None]:
for i in list_cate:
    train[i]=le.fit_transform(train[i])


In [None]:
train

In [None]:
X = train.drop('FILENAME',axis=1)
y = train['FILENAME']


# EXPLORATORY DATA ANALYSIS

**HISTOGRAM**

In [None]:
train.hist(figsize=(14,12))
plt.show()


In [None]:
train.corr()

**HEATMAP**

In [None]:
plt.figure(figsize = (12,10))

sns.heatmap(train.corr(), annot =True)


**BOXPLOT**

In [None]:
plt.figure(figsize=(14,10))
sns.set_style(style='whitegrid')
plt.subplot(2,3,1)
sns.boxplot(x='FILENAME',data=train)
plt.subplot(2,3,2)
sns.boxplot(x='IDENTITY',data=train)




In [None]:

train['IDENTITY'].plot(kind='hist')


**BARPLOT**

In [None]:
plt.style.use("default")
sns.barplot(x="IDENTITY", y="FILENAME",data=train[180:190])
plt.title("Identity vs Filename",fontsize=15)
plt.xlabel("Identity")
plt.ylabel("Filename")
plt.show()



In [None]:
plt.style.use("default")
sns.barplot(x="FILENAME", y="IDENTITY",data=train[180:190])
plt.title("Filename vs Identity",fontsize=15)
plt.xlabel("Filename")
plt.ylabel("Identity")
plt.show()


**LINEPLOT**

In [None]:
plt.style.use("default")
plt.figure(figsize=(14,7))
sns.lineplot(x = "FILENAME",y = "IDENTITY",data = train[190:400], color='g')
plt.title("Filename vs Identity")
plt.xlabel("Filename")
plt.ylabel("Identity")
plt.show()


**KDE PLOT**

In [None]:
plt.style.use("default")
plt.figure(figsize=(14,8))
plt.xlabel('Identity')
plt.ylabel('Filename')
sns.kdeplot(train['IDENTITY'],shade=True,color='g')
plt.show()


**SCATTER PLOT**

In [None]:
sns.scatterplot(x='FILENAME',y="IDENTITY",data=train)

**TRAINING AND TESTING DATA**

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=12)


In [None]:
print(len(X_train))
print(len(X_test))
print(len(y_train))
print(len(y_test))

**TENSORFLOW**

In [None]:
from tensorflow import keras
from tensorflow.keras import layers
model = tf.keras.Sequential([
      tf.keras.layers.Dense(units=80,activation='relu',name = 'input_layer'),
      tf.keras.layers.Dense(units=60,activation='relu',name = 'dense_layer1'),
      tf.keras.layers.Dense(units=40,activation='relu',name = 'dense_layer2'),
      tf.keras.layers.Dense(units=1,name='Output_layer')
],name='Model')   # Dropout and Batch Normalization can also be used.


        
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',        # In loss mae can also be used.
    metrics=['binary_accuracy'],
)


history = model.fit(X_train,y_train,batch_size=256,
    epochs=150,
)


**Activation Function other than relu we have are :**

**Sigmoid**

**Threshold**

**Hyperbolic Tangent**

![](https://i.imgur.com/rFI1tIk.gif)

In [None]:
y_pred = model.predict(X_test)
y_pred


In [None]:
# convert the training history to a dataframe
history_df = pd.DataFrame(history.history)
# use Pandas native plot method
history_df['loss'].plot()


In [None]:
model.summary()


In [None]:
from tensorflow.keras.utils import plot_model

plot_model(model, show_shapes = True)


![](https://i.imgur.com/tHiVFnM.png)

In [None]:
history_df = pd.DataFrame(history.history)
# Start the plot at epoch 5
history_df.loc[5:, ['loss']].plot()
history_df.loc[5:, ['binary_accuracy']].plot()
print(("Best Validation Loss: {:0.4f}" +\
      "\nBest Validation Accuracy: {:0.4f}")\
      .format(history_df['loss'].min(), 
              history_df['binary_accuracy'].max()))



![](https://i.imgur.com/eP0gppr.png)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(
    min_delta=0.001, # minimium amount of change to count as an improvement
    patience=20, # how many epochs to wait before stopping
    restore_best_weights=True,
)


In [None]:
model.evaluate(X_test,y_test)
