In [2]:
import matplotlib.pyplot as plt
import numpy as np
import cv2
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
import tensorflow as tf

In [4]:
df = pd.read_csv("metadata.csv")

In [5]:
df.head()

Unnamed: 0,FileName,Font,Text
0,file_1.png,CourierNew,Sediment
1,file_2.png,Arial,Guest
2,file_3.png,CenturyGothic,Socket
3,file_4.png,Verdana,Onset
4,file_5.png,Candara,Driver


In [6]:
df['Font'].value_counts().sum()

np.int64(20000)

In [7]:
df['Text'].value_counts().sum()

np.int64(19995)

In [8]:
image_directory = 'files'

In [9]:
from sklearn.model_selection import train_test_split

In [10]:
train_list = []
test_list = []
val_list = []

for class_label, group in df.groupby('Font'):
    train, temp = train_test_split(group, test_size=0.3, random_state=42, stratify=group['Font'])
    test, val = train_test_split(temp, test_size=2/3, random_state=42, stratify=temp['Font'])
    
    train_list.append(train)
    test_list.append(test)
    val_list.append(val)

train_df = pd.concat(train_list).reset_index(drop=True)
test_df = pd.concat(test_list).reset_index(drop=True)
val_df = pd.concat(val_list).reset_index(drop=True)

In [11]:
import os

base_dir = "files"

train_df['image_path'] = train_df['FileName'].apply(lambda x: os.path.join(base_dir, x))
val_df['image_path'] = val_df['FileName'].apply(lambda x: os.path.join(base_dir, x))
test_df['image_path'] = test_df['FileName'].apply(lambda x: os.path.join(base_dir, x))

In [12]:
train_df.head()

Unnamed: 0,FileName,Font,Text,image_path
0,file_5693.png,Arial,Modem,files/file_5693.png
1,file_10959.png,Arial,Mrs,files/file_10959.png
2,file_13087.png,Arial,Marc,files/file_13087.png
3,file_6418.png,Arial,Seats,files/file_6418.png
4,file_17497.png,Arial,Chest,files/file_17497.png


In [13]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [14]:
image_size = (224, 224) 
batch_size = 16
class_mode = 'categorical'

In [15]:
train_datagen = ImageDataGenerator(rescale=1.0/255.0,     
                                   width_shift_range=0.2,  
                                   height_shift_range=0.2,  
                                   shear_range=0.2,        
                                   zoom_range=0.2,         
                                   vertical_flip=True)  

In [16]:
val_test_datagen = ImageDataGenerator(rescale=1.0/255.0)

In [17]:
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='image_path',
    y_col='Font',
    target_size=image_size,
    batch_size=batch_size
)

validation_generator = val_test_datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col='image_path',
    y_col='Font',
    target_size=image_size,
    batch_size=batch_size
)

test_generator = val_test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col='image_path',
    y_col='Font',
    target_size=image_size,
    batch_size=batch_size
)

Found 14000 validated image filenames belonging to 20 classes.
Found 4000 validated image filenames belonging to 20 classes.
Found 2000 validated image filenames belonging to 20 classes.


In [18]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Flatten, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

base_model.trainable = True

x = base_model.output
x = Flatten()(x)
x = Dropout(0.5)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(20, activation='softmax')(x)  

model = Model(inputs=base_model.input, outputs=predictions)

model.compile(optimizer=Adam(learning_rate=1e-4),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()


In [19]:
history = model.fit(
    train_generator,
    epochs=1, 
    validation_data=validation_generator,
    verbose=1
)

  self._warn_if_super_not_called()


[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1097s[0m 1s/step - accuracy: 0.1949 - loss: 2.9215 - val_accuracy: 0.1495 - val_loss: 3.1375


In [21]:
test_loss, test_accuracy = model.evaluate(test_generator, steps=test_generator.samples // test_generator.batch_size)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 197ms/step - accuracy: 0.1468 - loss: 3.1566
Test Accuracy: 15.15%
