#**Display and Preprocess Images**

In [None]:
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from zipfile import ZipFile
from io import BytesIO
from PIL import Image
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder

In [None]:
# connect to google drive
from google.colab import drive
drive.mount('/content/drive')

# get dataset
base_path = '/content/drive/MyDrive/ml-project'
dataset_zip = '/content/drive/MyDrive/ml-project/cards-set.zip'

Mounted at /content/drive


In [None]:
IMG_SIZE = (128, 128)
def preprocess_image(image_data):
  image = Image.open(BytesIO(image_data)).convert('RGB')
  image = image.resize(IMG_SIZE)

  # normalize pixel values to 0-1 range
  image_array = np.array(image)
  image_array = image_array / 255.0
  return image_array

In [None]:
def extract_and_preprocess_images(zip_ref, folder):
  # get list of all files in the zip
  all_files = zip_ref.namelist()

  # filter for specified folder (train, valid, or test)
  data_files = [f for f in all_files if f.startswith(folder) and f.endswith(('.png', '.jpg', '.jpeg'))]

  preprocessed_imgs = []
  img_labels = []

  for img_file in data_files:
    img_data = zip_ref.read(img_file)
    preprocessed_img = preprocess_image(img_data)
    preprocessed_imgs.append(preprocessed_img)

    # extract the label (class) from the file path
    label = os.path.basename(os.path.dirname(img_file))
    img_labels.append(label)

  return preprocessed_imgs, img_labels, data_files

In [None]:
def display_imgs(zip_ref, data_files, title, preview_num):
  class_dirs = set(os.path.dirname(f) for f in data_files)

  for class_dir in class_dirs:
    # get first n (preview_num) images for the current class
    class_imgs = [f for f in data_files if os.path.dirname(f) == class_dir][:preview_num]

    print(f'Class: {class_dir} - {title}')
    fig, axes = plt.subplots(1, preview_num, figsize=(2 * preview_num, 2))

    for i, img_file in enumerate(class_imgs):
      img_data = zip_ref.read(img_file)
      img = mpimg.imread(BytesIO(img_data), format='jpg')
      axes[i].imshow(img)
      axes[i].axis('off')
    plt.show()

In [None]:
def one_hot_encode_labels(labels):
  # convert to numpy array and reshape
  labels_array = np.array(labels).reshape(-1, 1)

  # check unique labels and their count
  unique_labels = np.unique(labels_array)
  print(f"Number of unique labels: {len(unique_labels)}")
  print(f"Unique labels: {unique_labels}")

  # perform one-hot encoding
  one_hot_encoder = OneHotEncoder(sparse_output=False)
  one_hot_encoded_labels = one_hot_encoder.fit_transform(labels_array)

  # verify the shape of the one-hot encoded labels
  print(f'Shape of one-hot encoded labels: {one_hot_encoded_labels.shape}')
  # printing the first encoded label
  print(one_hot_encoded_labels[0])

  return one_hot_encoded_labels

In [None]:
train_folder_path = 'cards-set/train/'
valid_folder_path = 'cards-set/valid/'
test_folder_path = 'cards-set/test/'
# number of images to display for each class
preview_num = 5

with ZipFile(dataset_zip, 'r') as zip_ref:
  # process training data
  print("-----PROCESSING TRAINING DATA-----")
  preprocessed_train_imgs, train_img_labels, train_files = extract_and_preprocess_images(zip_ref, train_folder_path)
  one_hot_encoded_labels_train = one_hot_encode_labels(train_img_labels)
  # display_imgs(zip_ref, train_files, "Training Set", preview_num)

  # process valid data
  print("-----PROCESSING VALIDATION DATA-----")
  preprocessed_valid_imgs, valid_img_labels, valid_files = extract_and_preprocess_images(zip_ref, valid_folder_path)
  one_hot_encoded_labels_valid = one_hot_encode_labels(valid_img_labels)
  # display_imgs(zip_ref, valid_files, "Validation Set", preview_num)

  # process test data
  print("-----PROCESSING TEST DATA-----")
  preprocessed_test_imgs, test_img_labels, test_files = extract_and_preprocess_images(zip_ref, test_folder_path)
  one_hot_encoded_labels_test = one_hot_encode_labels(test_img_labels)
  # display_imgs(zip_ref, test_files, "Test Set", preview_num)

-----PROCESSING TRAINING DATA-----
Number of unique labels: 53
Unique labels: ['ace of clubs' 'ace of diamonds' 'ace of hearts' 'ace of spades'
 'eight of clubs' 'eight of diamonds' 'eight of hearts' 'eight of spades'
 'five of clubs' 'five of diamonds' 'five of hearts' 'five of spades'
 'four of clubs' 'four of diamonds' 'four of hearts' 'four of spades'
 'jack of clubs' 'jack of diamonds' 'jack of hearts' 'jack of spades'
 'joker' 'king of clubs' 'king of diamonds' 'king of hearts'
 'king of spades' 'nine of clubs' 'nine of diamonds' 'nine of hearts'
 'nine of spades' 'queen of clubs' 'queen of diamonds' 'queen of hearts'
 'queen of spades' 'seven of clubs' 'seven of diamonds' 'seven of hearts'
 'seven of spades' 'six of clubs' 'six of diamonds' 'six of hearts'
 'six of spades' 'ten of clubs' 'ten of diamonds' 'ten of hearts'
 'ten of spades' 'three of clubs' 'three of diamonds' 'three of hearts'
 'three of spades' 'two of clubs' 'two of diamonds' 'two of hearts'
 'two of spades']
Sh

In [None]:
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# create an ImageDataGenerator for augmentation
datagen = ImageDataGenerator(
    rotation_range=20,      # randomly rotate images by up to 20 degrees
    width_shift_range=0.2,  # randomly shift images horizontally by up to 20% of the width
    height_shift_range=0.2, # randomly shift images vertically by up to 20% of the height
    shear_range=0.2,        # apply random shearing transformations
    zoom_range=0.2,         # randomly zoom in on images by up to 20%
    horizontal_flip=True,   # randomly flip images horizontally
    fill_mode='nearest'     # fill in missing pixels with the nearest value
)

# reshape it to fit the ImageDataGenerator's expected input
preprocessed_train_imgs = np.array(preprocessed_train_imgs)

# generate augmented images
augmented_images = []
augmented_labels = []

for i in range(len(preprocessed_train_imgs)):
    img = preprocessed_train_imgs[i]
    label = one_hot_encoded_labels_train[i]  # Assuming you have one-hot encoded labels

    # expand dimensions to make it compatible with ImageDataGenerator
    img = np.expand_dims(img, axis=0)

    # generate augmented images (adjust the number of augmentations as needed)
    for j in range(5):
        augmented_img = next(datagen.flow(img, batch_size=1))[0]
        augmented_images.append(augmented_img)
        augmented_labels.append(label)

# convert the augmented data to NumPy arrays
augmented_images = np.array(augmented_images)
augmented_labels = np.array(augmented_labels)

# concatenate augmented data with original training data
all_train_images = np.concatenate((preprocessed_train_imgs, augmented_images), axis=0)
all_train_labels = np.concatenate((one_hot_encoded_labels_train, augmented_labels), axis=0)

print("Augmentation complete. New training set size:", all_train_images.shape)


Augmentation complete. New training set size: (45744, 128, 128, 3)


#**Create Model**



In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import Adam, Adamax
from sklearn.model_selection import train_test_split
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.models import Model, load_model, Sequential
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Flatten, Dense, Activation, Dropout, BatchNormalization
print(tf.__version__)

2.15.0


In [None]:
# convert lists to numpy arrays if they aren't already
X_train = np.array(all_train_images)
y_train = np.array(all_train_labels)
X_val = np.array(preprocessed_valid_imgs)
y_val = np.array(one_hot_encoded_labels_valid)

# ensure the shapes are correct
print(f"Training data shape: {X_train.shape}, {y_train.shape}")
print(f"Validation data shape: {X_val.shape}, {y_val.shape}")

# create pre-trained model
base_model = tf.keras.applications.MobileNet(
    input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3),
    include_top=False,
    weights="imagenet",
    input_tensor=None,
    pooling="max",
)

model = Sequential([
    base_model,
    BatchNormalization(axis= -1, momentum= 0.99, epsilon= 0.001),
    Dense(256, kernel_regularizer= regularizers.l2(l= 0.016), activity_regularizer= regularizers.l1(0.006),
                bias_regularizer= regularizers.l1(0.006), activation= 'relu'),
    Dropout(rate= 0.5),
    Dense(53, activation= 'softmax') # assuming 53 unique classes for each card
])

# compile the model
model.compile(Adamax(learning_rate= 0.001), loss= 'categorical_crossentropy', metrics= ['accuracy'])

# define learning rate reduction and early stopping
lr_reduction = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.5, min_lr=0.00001)
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# train the model with callbacks
history = model.fit(X_train, y_train, epochs=8, validation_data=(X_val, y_val),
                    batch_size=64, callbacks=[lr_reduction, early_stopping])

# evaluate the model
val_loss, val_accuracy = model.evaluate(X_val, y_val)
print(f'Validation accuracy: {val_accuracy}')

# save model architecture to JSON
model_json = model.to_json()

# save the JSON model to drive
with open("/content/drive/MyDrive/ml-project/playing-cards-model.json", "w") as json_file:
    json_file.write(model_json)

# save weights to the same directory
model.save_weights("/content/drive/MyDrive/ml-project/playing-cards-model_weights.h5")

# model.save('/content/drive/MyDrive/ml-project/playing-cards-classification-model.h5')


Training data shape: (45744, 128, 128, 3), (45744, 53)
Validation data shape: (265, 128, 128, 3), (265, 53)
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_1_0_128_tf_no_top.h5
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Validation accuracy: 0.9509434103965759


# Testing Model


In [None]:
from tensorflow.keras.models import load_model

In [None]:
# convert lists to numpy arrays
X_test = np.array(preprocessed_test_imgs)
y_test = np.array(one_hot_encoded_labels_test)

# ensure the shapes are correct
print(f"Test data shape: {X_test.shape}, {y_test.shape}")

# load the model
model = load_model('/content/drive/MyDrive/ml-project/classification-test-model.h5')

# evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_accuracy}')

Test data shape: (265, 128, 128, 3), (265, 53)
Test accuracy: 0.9433962106704712
