In [1]:
import numpy as np
import tensorflow as tf 
import cv2
from PIL import Image
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ds = load_dataset("microsoft/cats_vs_dogs")

In [3]:
print(ds['train'][0]['image'])
print(ds['train'][0]['labels'])

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=500x375 at 0x254CAB37E50>
0


In [4]:
ds

DatasetDict({
    train: Dataset({
        features: ['image', 'labels'],
        num_rows: 23410
    })
})

In [5]:
all_images = []
all_labels = []

target_size = (128, 128)

In [6]:
def make_square(image):
    width, height = image.size

    # If the image is already square, return the original image
    if width == height:
        return image

    # Determine the size of the new square image (it should be the max of width and height)
    new_size = max(width, height)

    # Create a new black image with a square size
    new_image = Image.new("RGB", (new_size, new_size), color=(0, 0, 0))  # Black background

    # Calculate the position to paste the original image (centered)
    paste_position = ((new_size - width) // 2, (new_size - height) // 2)

    # Paste the original image onto the new black square
    new_image.paste(image, paste_position)

    return new_image

In [7]:

print('iterating through the dataset')
print('processing...')
for row in ds['train']:
    image = row['image']
    
    new_image = make_square(image)
    
    image_resized = new_image.resize(target_size)
    image_np = np.array(image_resized)
    all_images.append(image_np)
    
    label = row['labels']
    all_labels.append(label)
    
print('Done !')

iterating through the dataset
processing...
Done !


In [8]:
# check the shape of everything
count = 0
for image in all_images:
    if len(image.shape) != 3 or (len(image.shape) == 3 and image.shape[2] != 3):
        print(f'shape: {image.shape}')
        count += 1
        
print(f'count: {count}')

shape: (128, 128, 4)
shape: (128, 128, 4)
shape: (128, 128, 4)
count: 3


**Issue**

We have 5 pictures that don't have 3 channels but just one. We have to eliminate those in order to conver all_images from a list to a numpy array

I've tried to eliminated the elements that have the shape == 2 but with the .remove() method it didn't work. So we have to repopulate another list

after a couple of trial there I founded that there are some images where the last axis has 4 channels instead of 3 (I don't know why, i guess it is some alpha value), so I'm gonna get rid of those images.

In [9]:
temp_list = []
count_list = []
count = 0

for image in all_images:
    if len(image.shape) == 3 and image.shape[2] == 3:
        temp_list.append(image)
    else:
        count_list.append(count)
    count += 1
    
for count in reversed(count_list):
    all_labels.pop(count)
    
all_images = temp_list

In [10]:
all_images = np.array(all_images)
all_labels = np.array(all_labels)

print(f'shape images np array: {all_images.shape}')
print(f'shape labels np array: {all_labels.shape}')

shape images np array: (23407, 128, 128, 3)
shape labels np array: (23407,)


In [11]:
# saving the two arrays into files
import os

folder_name = '../npy_file'

if not os.path.exists(folder_name):
    os.makedirs(folder_name)

np.save('../npy_file/images.npy', all_images)
np.save('../npy_file/labels.npy', all_labels)

In [12]:
image = Image.fromarray(all_images[0])
#image.show()

Create the model

In [13]:
all_images = all_images/255.0

input_shape = (128, 128, 3)

In [14]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(all_images, all_labels, test_size = 0.3, random_state = 42)

In [15]:
print(f'X_train shape: {X_train.shape}')
print(f'X_test shape: {X_test.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'y_test shape: {y_test.shape}')

X_train shape: (16384, 128, 128, 3)
X_test shape: (7023, 128, 128, 3)
y_train shape: (16384,)
y_test shape: (7023,)


In [16]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.callbacks import ModelCheckpoint

In [17]:
model = tf.keras.models.Sequential()

In [18]:
model.add(Conv2D(input_shape=input_shape, filters=16, kernel_size=(3,3), padding='same', activation='relu'))
model.add(Conv2D(filters=16, kernel_size=(3,3), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))

model.add(Conv2D(filters=32, kernel_size=(3,3), padding='same', activation='relu'))
model.add(Conv2D(filters=32, kernel_size=(3,3), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))

model.add(Flatten())

model.add(Dense(1024, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [19]:
batch = 32
epochs = 10

steps_per_epoch = int(np.ceil(len(X_train)/batch))
validation_steps = int(np.ceil(len(X_test)/batch))

folder_name = '../ai_models'

if not os.path.exists(folder_name):
    os.makedirs(folder_name)
 
best_model_file = os.path.join(folder_name, 'cat_dog_squared_10.keras')

best_model = ModelCheckpoint(best_model_file, monitor = 'val_accuracy', verbose = 1, save_best_only = True)

history = model.fit(X_train, y_train,
                    batch_size = batch,
                    epochs = epochs,
                    verbose = 1,
                    validation_data = (X_test, y_test),
                    validation_steps = validation_steps,
                    steps_per_epoch = steps_per_epoch,
                    shuffle = True,
                    callbacks = [best_model])

Epoch 1/10
[1m512/512[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 196ms/step - accuracy: 0.5773 - loss: 0.6997
Epoch 1: val_accuracy improved from -inf to 0.67250, saving model to ../ai_models\cat_dog_squared_10.keras
[1m512/512[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m118s[0m 222ms/step - accuracy: 0.5774 - loss: 0.6996 - val_accuracy: 0.6725 - val_loss: 0.6056
Epoch 2/10
[1m512/512[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 175us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 3/10


  self.gen.throw(typ, value, traceback)
  self._save_model(epoch=epoch, batch=None, logs=logs)


[1m512/512[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step - accuracy: 0.7260 - loss: 0.5524
Epoch 3: val_accuracy improved from 0.67250 to 0.75153, saving model to ../ai_models\cat_dog_squared_10.keras
[1m512/512[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 212ms/step - accuracy: 0.7260 - loss: 0.5524 - val_accuracy: 0.7515 - val_loss: 0.5038
Epoch 4/10
[1m512/512[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 5/10
[1m512/512[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 197ms/step - accuracy: 0.7960 - loss: 0.4453
Epoch 5: val_accuracy improved from 0.75153 to 0.77502, saving model to ../ai_models\cat_dog_squared_10.keras
[1m512/512[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 211ms/step - accuracy: 0.7960 - loss: 0.4453 - val_accuracy: 0.7750 - val_loss: 0.4753
Epoch 6/10
[1m512/512[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49us/step - accuracy: 0.0000e+0

In [20]:

history.history

{'accuracy': [0.6202392578125,
  0.0,
  0.737548828125,
  0.0,
  0.7952880859375,
  0.0,
  0.8594970703125,
  0.0,
  0.93695068359375,
  0.0],
 'loss': [0.6493006944656372,
  0.0,
  0.5326759815216064,
  0.0,
  0.440672904253006,
  0.0,
  0.3162634074687958,
  0.0,
  0.15655331313610077,
  0.0],
 'val_accuracy': [0.6725046038627625,
  0.7515307068824768,
  0.7750248908996582,
  0.7881247401237488,
  0.7804357409477234],
 'val_loss': [0.6055585741996765,
  0.5037979483604431,
  0.47529327869415283,
  0.5224905014038086,
  0.6446336507797241]}