In [3]:
!pip install tensorflow
!pip install opencv-python

from google.colab import drive
drive.mount('/content/drive')
import matplotlib.pyplot as plt
import numpy as np

import tensorflow as tf
import tensorflow_datasets as tfds

from tensorflow import keras

Collecting opencv-python
  Downloading opencv_python-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (19 kB)
Downloading opencv_python-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (67.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 MB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: opencv-python
Successfully installed opencv-python-4.12.0.88
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
#dict {artist : [images]}

path_to_dataset = "/content/drive/MyDrive/ml-datasets/resized_training"
import os
import cv2

all_artists = sorted(os.listdir(path_to_dataset))
print(len(all_artists), all_artists)

raw_dataset = {}
for artist in all_artists :
  raw_dataset[artist] = []
  all_images = sorted(os.listdir(os.path.join(path_to_dataset, artist))) #sorting the array that points to the files so we always access the images in the same order, done for data reproducibility
  for image in all_images :
    raw_dataset[artist].append(cv2.imread(os.path.join(path_to_dataset, artist, image)))
# print(raw_dataset)

dataset_size = 0
for images in raw_dataset.values():
  dataset_size += len(images)
print('dataset size : ', dataset_size)

27 ['Caravaggio', 'Claude_Monet', 'Coriano', 'Diego_Rivera', 'Edouard_Manet', 'El_Greco', 'Eugene_Delacroix', 'Francisco_Goya', 'Frida_Kahlo', 'Gentullicci', 'Giotto_di_Bondone', 'Henri_Rousseau', 'Hieronymus_Bosch', 'Pablo_Picasso', 'Peter_Paul_Rubens', 'Pierre-Auguste_Renoir', 'Platin', 'Raphael', 'Rembrandt', 'Rene_Magritte', 'Rosemary', 'Roux', 'Salvador_Dali', 'Sandro_Botticelli', 'Titian', 'Valois', 'Vincent_van_Gogh']
dataset size :  3079


In [5]:
import sklearn.preprocessing

#arr => np.array(arr)
for artist, images in raw_dataset.items():
  raw_dataset[artist] = np.array(images)

#shuffle using a seed for reproducibility
rng = np.random.default_rng(42)
for images in raw_dataset.values():
  rng.shuffle(images)

#75% training || 25% testing
train_data = []
test_data = []
for artist, images in raw_dataset.items():
  split_point = int (0.75 * len(images))
  for img in images[:split_point]:
    train_data.append([img, artist])
  for img in images[split_point:]:
    test_data.append([img, artist])

print('train set size : ', len(train_data))
print('test set size : ', len(test_data))
print('dataset size : ', len(train_data) + len(test_data))

#split train and test pairs into individual arrays
X_train = np.array([img[0] for img in train_data])
y_train = np.array([img[1] for img in train_data])

X_test = np.array([img[0] for img in test_data])
y_test = np.array([img[1] for img in test_data])

X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255

# Encode string labels to integers
label_encoder = sklearn.preprocessing.LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Assign the encoded labels back to y_train and y_test
y_train = y_train_encoded
y_test = y_test_encoded

print('y_train after encoding:', y_train)
print('y_test after encoding:', y_test)


train set size :  2299
test set size :  780
dataset size :  3079
y_train after encoding: [ 0  0  0 ... 26 26 26]
y_test after encoding: [ 0  0  0  0  0  0  0  0  0  0  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  2  2  2  2  2  2  2  2  3  3  3  3  3  3  3  3  3  3  3  3  3  3  4
  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  5  5  5  5  5  5
  5  5  5  5  5  5  5  5  5  5  5  5  6  6  6  6  6  6  6  7  7  7  7  7
  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7
  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7  7
  7  7  7  7  7  7  7  7  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8
  8  8  8  8  8  8  8  9  9  9  9  9  9  9  9  9  9 10 10 10 10 10 10 10
 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 11 11 11 11 11 11
 11 11 11 11 11 11 11 11 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12
 12 12 12 12 12 12 12 12 12 12 12 13 13 13 13 13 13 13 13 13 13 13 13 13
 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 

In [6]:
#random test
model = keras.Sequential([
    keras.layers.Conv2D(64, 3, activation='relu', input_shape=(224, 224, 3)),
    keras.layers.Conv2D(32, 3, activation='relu'),
    keras.layers.Flatten(),
    keras.layers.Dense(27, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss=keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

model.fit(
    X_train,
    y_train,
    epochs=5,
    batch_size=32
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 3s/step - accuracy: 0.1655 - loss: 10.3421
Epoch 2/5
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m181s[0m 3s/step - accuracy: 0.4605 - loss: 1.9524
Epoch 3/5
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m182s[0m 3s/step - accuracy: 0.8930 - loss: 0.5010
Epoch 4/5
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m185s[0m 3s/step - accuracy: 0.9863 - loss: 0.0701
Epoch 5/5
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 3s/step - accuracy: 0.9990 - loss: 0.0127


<keras.src.callbacks.history.History at 0x7de94b62f410>

In [8]:
model.evaluate(X_test, y_test)

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 522ms/step - accuracy: 0.2402 - loss: 7.3086


[5.380802631378174, 0.36282050609588623]