<a href="https://colab.research.google.com/github/mattberg88/Colab/blob/develop/Artist_Identifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install kaggle
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json


In [None]:
! kaggle datasets download ansonnnnn/historic-art

In [None]:
!unzip "/content/historic-art.zip" -d "/content/"

In [None]:
import pandas as pd
import numpy as np

from  matplotlib import pyplot as plt
import matplotlib.image as mpimg
import random
import os
import torch
import torch.nn as nn
import torch.utils.data as data

import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import *
from tensorflow.keras.applications import *
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.initializers import *
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Activation

from PIL import Image
tf.config.list_physical_devices('GPU')
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

os.getcwd()

In [None]:
artwork_df = pd.read_csv("complete/artwork_dataset.csv", index_col=0)
dirs = []
for idx, x in enumerate(artwork_df["artist"]):
    dirs.append(f"complete/artwork/{idx}.jpg")
artwork_df["dir"] = dirs
print(artwork_df)

artists = np.unique(list(artwork_df["artist"]))

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255, shear_range=0.2, horizontal_flip=True, vertical_flip=True, validation_split=0.2)
train_generator = train_datagen.flow_from_dataframe(dataframe=artwork_df, x_col="dir", y_col="artist", class_mode="categorical", target_size=(224,224), batch_size=16, subset="training", shuffle=True, classes=list(artists))
valid_generator = train_datagen.flow_from_dataframe(dataframe=artwork_df, x_col="dir", y_col="artist", class_mode="categorical", target_size=(224,224), batch_size=16, subset="validation", shuffle=True, classes=list(artists))
train_input_shape = (224, 224,3)

In [None]:
base_model = tf.keras.applications.resnet50.ResNet50(weights='imagenet', include_top=False, input_shape=train_input_shape)

for layer in base_model.layers:
    layer.trainable = True

X = base_model.output
X = tf.keras.layers.Flatten()(X)

X = Dense(512, kernel_initializer='he_uniform')(X)
X = tf.keras.layers.BatchNormalization()(X)
X = Activation('relu')(X)

X = Dense(16, kernel_initializer='he_uniform')(X)
X = tf.keras.layers.BatchNormalization()(X)
X = Activation('relu')(X)

output = Dense(len(artists), activation='softmax')(X)

model = Model(inputs=base_model.input, outputs=output)
optimizer = tf.keras.optimizers.Adam(lr=0.0001)
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer, 
              metrics=['accuracy'])


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
checkpoint_path = "/content/gdrive/My Drive/Artist_Identifier/Checkpoints"


In [None]:
model.load_weights(checkpoint_path)

In [None]:
batch_size = 32

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, 
                              verbose=1, mode='auto')

cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path, 
    verbose=1, 
    save_weights_only=True)

model.fit_generator(generator=train_generator, 
                              steps_per_epoch=train_generator.samples // batch_size,
                              validation_data=valid_generator,
                              validation_steps=valid_generator.samples // batch_size,
                              shuffle=True, 
                              use_multiprocessing=True,
                              verbose=1, 
                              epochs=50,
                              workers=7,
                              callbacks=[cp_callback, reduce_lr])

In [None]:
from keras.preprocessing import *
artwork_df["dir"]
content_path = "/content/"


random_piece = random.choice(artwork_df["dir"])
artist_index = list(artwork_df["dir"]).index(random_piece )
random_artist = artwork_df["artist"][artist_index]
random_image_file = content_path + random_piece
file_to_test = "/content/rembrandt.png"
# Original image

test_image = image.load_img(file_to_test, target_size=(train_input_shape[0:2]))

# Predict artist
test_image = image.img_to_array(test_image)
test_image /= 255.
test_image = np.expand_dims(test_image, axis=0)

prediction = model.predict(test_image)
prediction_probability = np.amax(prediction)
prediction_idx = np.argmax(prediction)

labels = train_generator.class_indices
labels = dict((v,k) for k,v in labels.items())

title = "Predicted artist = {}\nPrediction probability = {:.2f} %" \
            .format(labels[prediction_idx].replace('_', ' '), prediction_probability*100)


# title = "Actual artist = {}\nPredicted artist = {}\nPrediction probability = {:.2f} %" \
#             .format(random_artist.replace('_', ' '), labels[prediction_idx].replace('_', ' '),
#                     prediction_probability*100)

# Print image
fig, axes = plt.subplots()

axes.imshow(plt.imread(file_to_test))
axes.set_title(title)
axes.axis('off')


In [None]:
from keras.preprocessing import *

n = 5
fig, axes = plt.subplots(1, n, figsize=(25,10))
images_dir = "/content/complete/artwork"
for i in range(n):
    random_artist = random.choice(list(artwork_df["artist"]))
    random_artist_id = list(artwork_df["artist"]).index(random_artist)
    # random_image = random.choice(os.listdir(os.path.join(images_dir, random_artist_id)))
    random_image_file = os.path.join(images_dir, str(random_artist_id) + ".jpg")

    # Original image

    test_image = image.load_img(random_image_file, target_size=(train_input_shape[0:2]))

    # Predict artist
    test_image = image.img_to_array(test_image)
    test_image /= 255.
    test_image = np.expand_dims(test_image, axis=0)

    prediction = model.predict(test_image)
    prediction_probability = np.amax(prediction)
    prediction_idx = np.argmax(prediction)

    labels = train_generator.class_indices
    labels = dict((v,k) for k,v in labels.items())


    title = "Actual artist = {}\nPredicted artist = {}\nPrediction probability = {:.2f} %" \
                .format(random_artist.replace('_', ' '), labels[prediction_idx].replace('_', ' '),
                        prediction_probability*100)

    # Print image
    axes[i].imshow(plt.imread(random_image_file))
    axes[i].set_title(title)
    axes[i].axis('off')

plt.show()