In [1]:
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import numpy as np

from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout, Concatenate, BatchNormalization
from tensorflow.keras import Model,layers,Input
from keras.callbacks import EarlyStopping, ModelCheckpoint
import tensorflow as tf

In [20]:
data = pd.read_csv('../input/w1n1-data-v2/df.csv')

In [21]:
data.head()

In [3]:
with open('../input/new-hack1-1/img_array.npy','rb') as f:
    img_array = np.load(f)
height = img_array.shape[1]
width = img_array.shape[2]
img_array = np.array(img_array).reshape(-1,height,width,1)
print(img_array.shape)

In [1]:
### .reshape(-1,height,width,1)   -->   model need to be 4D

In [22]:
img_array.shape

In [4]:
img_array[0]

In [5]:
data['fontFamily'].value_counts()

In [6]:
train_set = int(0.6*data.shape[0])
val_set = int(0.2*data.shape[0])
test_set = data.shape[0] - train_set - val_set

In [24]:
print(train_set)
print(val_set)
print(test_set)

In [7]:
fam_y = pd.get_dummies(data['fontFamily'])
col_fam = fam_y.columns

fam_train = np.array(fam_y[:train_set])
img_train = img_array[:train_set]

fam_val = np.array(fam_y[train_set:train_set+val_set])
img_val = img_array[train_set:train_set+val_set]

fam_test = np.array(fam_y[-test_set:])
img_test = img_array[-test_set:]

In [26]:
fam_y.head()

In [8]:
print(fam_train.shape)
print(fam_val.shape)
print(fam_test.shape)

In [9]:
# Plot the accuracy and loss of the training session.
def plot_history():
    plt.figure(figsize=(14,6))
    plt.subplot(1,2,1)
    plt.plot(history.history['accuracy'], label='accuracy')
    plt.plot(history.history['val_accuracy'], label='val_acuuracy')
    plt.title('Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(1,2,2)
    plt.plot(history.history['loss'], label='loss')
    plt.plot(history.history['val_loss'], label= 'val_loss')
    plt.title('Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

In [10]:
inputs = tf.keras.Input(shape=(height,width,1), name='input')

x = layers.Conv2D(64, (3, 3),padding='same', activation='relu',kernel_regularizer=tf.keras.regularizers.l2(0.001))(inputs)
x = BatchNormalization()(x)
x = layers.MaxPooling2D((2, 2))(x)

x = layers.Conv2D(128, (3, 3),padding='same', activation='relu',kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.MaxPooling2D((2, 2))(x)
x = BatchNormalization()(x)

x = layers.Conv2D(128, (3, 3),padding='same', activation='relu',kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.MaxPooling2D((2, 2))(x)
x = BatchNormalization()(x)

x = layers.Conv2D(128, (3, 3),padding='same', activation='relu',kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.MaxPooling2D((2, 2))(x)
x = BatchNormalization()(x)

x = Flatten()(x)

x = Dropout(0.2)(x)
x = Dense(512, activation='relu',kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = Dropout(0.2)(x)
x = Dense(256, activation='relu',kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)

fam_output = Dense(10, activation='softmax', name='family')(x)


fam_model = Model(inputs=inputs, outputs=fam_output)
fam_model.summary()

In [11]:
checkpoint = ModelCheckpoint('fammodel.h5',save_best_only=True)
earlystop = EarlyStopping(monitor='loss', patience=3)

fam_model.compile(loss='categorical_crossentropy',optimizer=tf.keras.optimizers.RMSprop(lr=0.0001,momentum=0.2),metrics=['accuracy'])

history = fam_model.fit(x=img_train,
                        y=fam_train,
                        epochs=40,
                        validation_data=(img_val,fam_val), 
                        callbacks=[earlystop,checkpoint],
                        batch_size=512)

In [13]:
### how long it took to run model each epoch took 60 secs
# 40 epoches will take 40 mins

In [14]:
plot_history()

In [15]:
fam_model.evaluate(img_test, fam_test)

In [16]:
prediction = fam_model.predict(img_array)

In [17]:
pred = np.argmax(prediction, axis=1)
pred_class = []
for p in pred:
    pred_class.append(col_fam[p])

data['pred'] = pred_class
data

In [18]:
error = data[data['fontFamily'] != data['pred']]
error['fontFamily'].value_counts()