In [None]:
!pip install tensorflow
!pip install opencv-python

In [2]:
from google.colab import drive
drive.mount('/content/drive')
import matplotlib.pyplot as plt
import numpy as np

import tensorflow as tf
import tensorflow_datasets as tfds

from tensorflow import keras

import os
import cv2

Mounted at /content/drive


In [3]:
def import_data(dataset: str):
  #dataset:str => dict {artist : [images]}

  all_artists = sorted(os.listdir(dataset))
  print(len(all_artists), all_artists)

  raw_dataset = {}
  for artist in all_artists :
    raw_dataset[artist] = []
    all_images = sorted(os.listdir(os.path.join(dataset, artist))) #sorting the array that points to the files so we always access the images in the same order, done for data reproducibility
    for image in all_images :
      raw_dataset[artist].append(cv2.imread(os.path.join(dataset, artist, image)))

  return raw_dataset

In [14]:
dummy_data = "/content/drive/MyDrive/ml-datasets/painters/dummy_data"
path_to_dataset = "/content/drive/MyDrive/ml-datasets/painters/resized_training"

path_to_dataset = dummy_data

raw_dataset = import_data(path_to_dataset)

dataset_size = 0
for images in raw_dataset.values():
  dataset_size += len(images)
print('dataset size : ', dataset_size)

print(raw_dataset)

3 ['Caravaggio', 'Claude_Monet', 'Coriano']
dataset size :  36
{'Caravaggio': [array([[[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       ...,

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]]], dtype=uint8), array([[[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 

In [24]:
import sklearn.preprocessing

#arr => np.array(arr)
for artist, images in raw_dataset.items():
  raw_dataset[artist] = np.array(images)

#shuffle using a seed for reproducibility
rng = np.random.default_rng(42)
for images in raw_dataset.values():
  rng.shuffle(images)

#75% training || 25% testing
train_data = []
test_data = []
for artist, images in raw_dataset.items():
  split_point = int (0.75 * len(images))
  for img in images[:split_point]:
    train_data.append([img, artist])
  for img in images[split_point:]:
    test_data.append([img, artist])

print('train set size : ', len(train_data))
print('test set size : ', len(test_data))
print('dataset size : ', len(train_data) + len(test_data))

#split train and test pairs into individual arrays
rng.shuffle(train_data)
X_train = np.array([img[0] for img in train_data])
y_train = np.array([img[1] for img in train_data])

rng.shuffle(test_data)
X_test = np.array([img[0] for img in test_data])
y_test = np.array([img[1] for img in test_data])

X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255

# Encode string labels to integers
label_encoder = sklearn.preprocessing.LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Assign the encoded labels back to y_train and y_test
y_train = y_train_encoded
y_test = y_test_encoded

print('y_train after encoding:', y_train)
print('y_test after encoding:', y_test)


train set size :  27
test set size :  9
dataset size :  36
y_train after encoding: [2 0 2 1 1 0 0 2 2 0 0 2 2 2 0 1 1 1 0 0 0 2 1 1 2 1 1]
y_test after encoding: [1 1 1 0 2 2 0 0 2]


In [41]:
#random test
model = tf.keras.applications.EfficientNetB0(
    include_top=True,
    weights='imagenet',
    input_tensor=None,
    input_shape=(224, 224, 3),
    pooling=None,
    classes=1000,
    classifier_activation='softmax'
)

model.trainable = False


model.compile(
    optimizer='adam',
    loss=keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

model.fit(
    X_train,
    y_train,
    epochs=5,
    batch_size=1
)

Epoch 1/5
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 343ms/step - accuracy: 0.1541 - loss: 5.2652
Epoch 2/5
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 378ms/step - accuracy: 0.4840 - loss: 1.3380
Epoch 3/5
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 370ms/step - accuracy: 0.8182 - loss: 0.5835
Epoch 4/5
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 322ms/step - accuracy: 0.9472 - loss: 0.2151
Epoch 5/5
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 390ms/step - accuracy: 1.0000 - loss: 0.0395


<keras.src.callbacks.history.History at 0x78a249cb2690>

In [42]:
model.evaluate(X_test, y_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - accuracy: 0.0000e+00 - loss: 9.9300


[9.929970741271973, 0.0]