Training a neural network to classify N jersey numbers

Import libraries

In [None]:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense

Specify training folder directory, image size, number of epochs

In [None]:
# Set the path to your overall data directory
data_dir = './grouped_training_data/'

# Set parameters
img_width, img_height = 20, 20
batch_size = 32
epochs = 10

Load the training images. I added a 50% probability of inverting an image as it's loaded. A validation set is also created from a subset.

In [None]:
def invert_image(image):
    return 255 - image

# Data preprocessing with automatic splitting into training and validation sets
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,  # Adjust the split ratio as needed
    preprocessing_function=lambda x: invert_image(x) if np.random.rand() < 0.5 else x  # Invert 50% of the images
)

train_generator = datagen.flow_from_directory(
    data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'  # Specify this is for training data
)

# List all directories inside data_dir
folders = next(os.walk(data_dir))[1]

# Count the number of folders
num_folders = len(folders)

num_classes = num_folders

validation_generator = datagen.flow_from_directory(
    data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'  # Specify this is for validation data
)

Define the CNN model and train it.

In [None]:
# Define the model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_width, img_height, 3)),
    Conv2D(64, (3, 3), activation='relu'),
    Conv2D(128, (3, 3), activation='relu'),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // batch_size
)

Map the class labels to the actual jersey numbers

In [None]:
train_class_mapping = train_generator.class_indices

class_labels = [];
for folder_name, class_label in train_class_mapping.items():
    #print(f"Folder: {folder_name}, Class: {class_label}")
    class_labels.append(str(folder_name))

Define function for loading and preprocessing test images

In [None]:
from tensorflow.keras.preprocessing import image

def preprocess_image(img_path, img_width, img_height):
    img = image.load_img(img_path, target_size=(img_width, img_height))
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img /= 255.0
    return img

Test the trained model on the test images. For now, if a test folder is empty I label it as -1.

In [None]:
# Define range of folders to test. The test data has folders 0 through 1210. Testing all folders takes a while (~25min).
folder_min = 0
folder_max = 1210

# Initialize an empty dictionary
data_dict = {}

for i in range(folder_min,folder_max+1):

    # Set the path to the folder containing test images
    test_folder_path = './cropped_test_img/' + str(i)  # Replace with the path to your test image folder

    class_counts = {label: 0 for label in class_labels}

    # Iterate over each image in the folder
    for filename in os.listdir(test_folder_path):
        if filename.endswith('.jpg'):  # Assuming all images in the folder are JPG files
            # Preprocess the image
            img_path = os.path.join(test_folder_path, filename)
            img = preprocess_image(img_path, img_width, img_height)
            
            # Make predictions
            predictions = model.predict(img, verbose=0)
            #print(predictions)
            predicted_class_index = np.argmax(predictions)
            predicted_class_label = class_labels[predicted_class_index]
            
            # Update class counts
            class_counts[predicted_class_label] += 1

    # Calculate percentages
    total_images = sum(class_counts.values())

    if sum(class_counts.values()) < 1:
        max_percentage_class = str(-1);
    else:
        class_percentages = {cls: count / total_images * 100 for cls, count in class_counts.items()}

        # Predict the class with the highest percentage
        max_percentage_class = max(class_percentages, key=class_percentages.get)
    print('Folder: ' + str(i) + ', Predicted number: ' + max_percentage_class)

    # Append the updated values to the dictionary
    A = str(i)
    B = int(max_percentage_class)
    # Check if the key already exists in the dictionary
    if A in data_dict:
        # If the key exists, overwrite the value with the new value
        data_dict[A] = B
    else:
        # If the key does not exist, add a new key-value pair to the dictionary
        data_dict[A] = B

Export the predicted jersey numbers to a JSON file

In [None]:
import json 

# Specify the output file path
output_file = 'jersey_predictions.json'

# Write the dictionary to a JSON file
with open(output_file, 'w') as f:
    json.dump(data_dict, f)

print("Data written to", output_file)

Now compare the predictions to the ground truth values in test_gt.json

In [None]:
def compare_json_files(file1, file2):
    with open(file1, 'r') as f1:
        data1 = json.load(f1)

    with open(file2, 'r') as f2:
        data2 = json.load(f2)

    total_pairs_b = len(data2)
    matching_pairs = 0

    for key, value in data2.items():
        if key in data1 and data1[key] == value:
            matching_pairs += 1

    if total_pairs_b > 0:
        similarity_percentage = (matching_pairs / total_pairs_b) * 100
    else:
        similarity_percentage = 0

    return similarity_percentage

# Usage
file1 = "test_gt.json"
file2 = "jersey_predictions.json"
similarity_percentage = compare_json_files(file1, file2)
print("Accuracy of the model (% of correct predictions):\n", similarity_percentage)

Uncomment this line to save the model 

In [None]:
#model.save("CNN_model.h5")