In [None]:
import pandas as pd
import numpy as np
import os
import random 
from sklearn.model_selection import train_test_split
from google.colab import drive
import tensorflow as tf
from tensorflow.keras import layers, models

In [16]:
drive.mount('/content/drive')
path_to_fonts_folder = '/content/drive/My Drive/fonts'
csv_files = [os.path.join(path_to_fonts_folder, file) for file in os.listdir(path_to_fonts_folder) if file.endswith('.csv')]

# Initialize empty lists to hold data
images = []
labels = []

for file in csv_files:
    df = pd.read_csv(file)
    for _, row in df.iterrows():
        # Assuming 'r0c0' to 'r19c19' are pixel values in each row
        image = row['r0c0':'r19c19'].values.reshape(20, 20, 1)  # Reshape into 20x20 image
        images.append(image)
        labels.append(row['font'])


In [17]:
# Convert labels list to a numpy array for convenience
labels = np.array(labels)

# Create a unique list of labels
unique_labels = np.unique(labels)

# Create a dictionary that maps each label to a unique integer
label_to_int = {label: i for i, label in enumerate(unique_labels)}

# Map labels to integers based on the dictionary
encoded_labels = np.array([label_to_int[label] for label in labels])

# Convert images list to a numpy array and normalize
images = np.array(images).astype('float32') / 255.0

In [18]:
X_train, X_test, y_train, y_test = train_test_split(images, encoded_labels, test_size=0.2, random_state=42)

In [19]:
num_classes = len(np.unique(y_train))

model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(20, 20, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(len(np.unique(encoded_labels)), activation='softmax')  # Unique labels
])

In [20]:
# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# Train the model
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [24]:
csv_files = [os.path.join(path_to_fonts, file) for file in os.listdir(path_to_fonts_folder) if file.endswith('.csv')]

# Randomly select a few CSV files for testing
num_files_to_test = 10
selected_files = random.sample(csv_files, num_files_to_test)

# Initialize list for images and original fonts
test_images = []
original_fonts = []

for file in selected_files:
    df = pd.read_csv(file)
    for _, row in df.sample(5).iterrows():  # Take 5 random images from each file
        image = row['r0c0':'r19c19'].astype(np.float32).values.reshape(20, 20, 1) / 255.0
        test_images.append(image)
        original_fonts.append(str(row['font']))  # Ensure that it's converted to string

# Convert list to numpy array
test_images = np.array(test_images)

# Predict fonts using the trained model
predicted_fonts = model.predict(test_images)
predicted_font_classes = np.argmax(predicted_fonts, axis=1)

# Assuming you have a mapping from class indices to font names
predicted_font_names = [unique_labels[i] for i in predicted_font_classes]

# Display predictions and compare with original fonts
for i, (original, predicted) in enumerate(zip(original_fonts, predicted_font_names)):
    print(f"Image {i}: Original Font - {original}, Predicted Font - {predicted}")

Image 0: Original Font - COOPER, Predicted Font - ROCKWELL
Image 1: Original Font - COOPER, Predicted Font - SEGOE
Image 2: Original Font - COOPER, Predicted Font - FRANKLIN
Image 3: Original Font - COOPER, Predicted Font - COOPER
Image 4: Original Font - COOPER, Predicted Font - FRANKLIN
Image 5: Original Font - HIMALAYA, Predicted Font - SITKA
Image 6: Original Font - HIMALAYA, Predicted Font - COURIER
Image 7: Original Font - HIMALAYA, Predicted Font - SEGOE
Image 8: Original Font - HIMALAYA, Predicted Font - NIRMALA
Image 9: Original Font - HIMALAYA, Predicted Font - HIMALAYA
Image 10: Original Font - NUMERICS, Predicted Font - NUMERICS
Image 11: Original Font - NUMERICS, Predicted Font - NUMERICS
Image 12: Original Font - NUMERICS, Predicted Font - NUMERICS
Image 13: Original Font - NUMERICS, Predicted Font - ARIAL
Image 14: Original Font - NUMERICS, Predicted Font - NUMERICS
Image 15: Original Font - GOUDY, Predicted Font - SEGOE
Image 16: Original Font - GOUDY, Predicted Font - 

In [None]:
# Save the model
model.save('/content/drive/My Drive/font_recognition_model.h5')
