In [23]:
import os
import numpy as np
from PIL import Image
import gzip
import struct
from array import array

In [2]:
base_folder = '../dataset/test'  
image_size = (32, 32)
class_descriptions = [
        "cheetah", "deer", "giraffe", "hyena", "jaguar",
        "leopard", "tapir", "tiger", "WhaleShark", "zebra"
    ]

In [3]:
def load_images_from_folders(base_folder, class_descriptions, image_size):
    images = []
    labels = []
    for label, class_name in enumerate(class_descriptions):
        folder_path = os.path.join(base_folder, class_name)
        print("Processing class: ", class_name)
        if not os.path.exists(folder_path):
            continue
        for filename in os.listdir(folder_path):
            if filename.endswith(".png"):
                img = Image.open(os.path.join(folder_path, filename)).convert('L')  # Convert image to grayscale
                img = img.resize(image_size)  # Resize image to 32x32 pixels
                img_np = np.array(img, dtype=np.uint8)
                images.append(img_np)
                labels.append(label)
    return np.array(images), np.array(labels)

In [4]:
images, labels = load_images_from_folders(base_folder, class_descriptions, image_size)

Processing class:  cheetah
Processing class:  deer
Processing class:  giraffe
Processing class:  hyena
Processing class:  jaguar
Processing class:  leopard
Processing class:  tapir
Processing class:  tiger
Processing class:  WhaleShark
Processing class:  zebra


In [5]:
print(images.shape, labels.shape)

(40000, 32, 32) (40000,)


In [17]:
def save_idx_images(filepath, images):
    if not isinstance(images, (np.ndarray, list)):
        raise TypeError('Unsupported data type.')

    # Ensure images is a numpy array
    images = np.array(images)

    # Ensure the images array has the right shape
    if images.ndim != 3:
        raise ValueError('Images array must be 3-dimensional.')

    magic_number = 2051
    num_images = images.shape[0]
    rows = images.shape[1]
    cols = images.shape[2]

    header = struct.pack(">IIII", magic_number, num_images, rows, cols)
    
    data_list = [header]
    for image in images:
        data_list.append(struct.pack('>' + 'B' * rows * cols, *image.flatten()))

    data = b''.join(data_list)

    with gzip.open(filepath, 'wb') as f:
        f.write(data)

In [18]:
save_idx_images('../dataset/test-images-idx3-ubyte.gz', images)

In [19]:
def save_idx_labels(filepath, labels):
    if not isinstance(labels, (np.ndarray, list)):
        raise TypeError('Unsupported label type.')

    # Ensure labels is a numpy array
    labels = np.array(labels)

    # Ensure the labels array has the right shape
    if labels.ndim != 1:
        raise ValueError('Labels array must be 1-dimensional.')

    magic_number = 2049
    num_labels = len(labels)

    data = struct.pack(">II", magic_number, num_labels)
    
    data += struct.pack('>' + 'B' * num_labels, *labels)

    with gzip.open(filepath, 'wb') as f:
        f.write(data)

In [20]:
save_idx_labels('../dataset/test-labels-idx1-ubyte.gz', labels)

In [24]:
def load(path_img, path_lbl):
    
    with gzip.open(path_lbl, 'rb') as file:
        magic, size = struct.unpack(">II", file.read(8))
        if magic != 2049:
            raise ValueError('Magic number mismatch, expected 2049, got {}'.format(magic))

        labels = array("B", file.read())

    with gzip.open(path_img, 'rb') as file:
        magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
        if magic != 2051:
            raise ValueError('Magic number mismatch, expected 2051, got {}'.format(magic))

        image_data = array("B", file.read())

    images = np.zeros((size, rows, cols), dtype=np.uint8)
    
    for i in range(size):
        images[i] = np.array(image_data[i * rows * cols:(i + 1) * rows * cols]).reshape(rows, cols)

    return images, labels

In [25]:
test_images, test_labels = load('../dataset/test-images-idx3-ubyte.gz', '../dataset/test-labels-idx1-ubyte.gz')

In [29]:
np.array(test_images).shape, np.unique(np.array(test_labels), return_counts=True)

((10000, 32, 32),
 (array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8),
  array([1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000],
        dtype=int64)))

In [36]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from spots_10_loader import SPOT10Loader
import numpy as np

In [38]:
# Load data
X_train, y_train = SPOT10Loader.get_data(dataset_dir="../dataset", kind="train")
X_test, y_test = SPOT10Loader.get_data(dataset_dir="../dataset", kind="test")

print(np.unique(y_train, return_counts=True))
print(np.unique(y_test, return_counts=True))

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

# Normalize the data
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# Reshape the data to include the channel dimension
#X_train = tf.expand_dims(X_train, axis=-1)
#X_test = tf.expand_dims(X_test, axis=-1)

X_train = X_train.reshape(-1, 32, 32, 1)
X_test = X_test.reshape(-1, 32, 32, 1)

# One-hot encode the labels
y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8), array([4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000],
      dtype=int64))
(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8), array([1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000],
      dtype=int64))
(40000, 32, 32) (40000,)
(10000, 32, 32) (10000,)


In [39]:
X_train.shape

(40000, 32, 32, 1)

In [40]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])

In [41]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [47]:
history = model.fit(X_train, y_train, epochs=200, batch_size=64, validation_data=(X_test, y_test))

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200

KeyboardInterrupt: 

In [43]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc}')

Test accuracy: 0.8077999949455261


In [44]:
import numpy as np

# Predict the classes for the test set
y_pred = model.predict(X_test)
# Convert predictions from one-hot encoded to class labels
y_pred_classes = np.argmax(y_pred, axis=1)
# Convert test labels from one-hot encoded to class labels
y_true = np.argmax(y_test, axis=1)



In [45]:
from sklearn.metrics import confusion_matrix

# Compute the confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred_classes)


In [46]:
conf_matrix

array([[780,   2,   8,  94,  63,  41,   1,   9,   2,   0],
       [  1, 765,   5,  10,   0,   0,  73,   6, 138,   2],
       [  5,  17, 842,  84,  12,  21,   7,  11,   1,   0],
       [ 68,   9,   5, 859,  20,   0,   1,  38,   0,   0],
       [ 76,   1,  24,  38, 752,  87,   3,  18,   0,   1],
       [169,   1,  30,  46, 349, 398,   0,   1,   6,   0],
       [  0,  95,   5,   0,   2,   0, 865,   5,  11,  17],
       [  0,   5,   0,  15,   4,   0,   3, 924,   0,  49],
       [  2,  47,   1,   0,   0,   1,   7,   0, 942,   0],
       [  1,   0,   0,   0,   3,   0,  11,  33,   1, 951]], dtype=int64)

In [None]:
'../dataset/test-images-idx3-ubyte.gz'
'../dataset/test-labels-idx1-ubyte.gz'

'../dataset/train-images-idx3-ubyte.gz'
'../dataset/train-labels-idx1-ubyte.gz'