In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('../input/best-artworks-of-all-time/artists.csv')
df.head(5)

In [None]:
df['paintings'].sum()

In [None]:
# remove columns that contains multiple artistic current
df = df[~df.genre.str.contains(',')]

# keep only interesting columns
df = df.drop(columns = ['id', 'years', 'bio', 'wikipedia'])

In [None]:
# check if numbers of paintings in dataframe match with number of files stored in folder
path, dirs, files = next(os.walk("../input/best-artworks-of-all-time/images/images/Amedeo_Modigliani"))
file_count = len(files)
file_count

df['paintings'][0] == file_count

## DataFrame Creation

In [None]:
artist = []
current = []
path_art = []
FOLDER = '../input/best-artworks-of-all-time/resized/resized/'

for name, genre, row in zip(df['name'], df['genre'], df['paintings']):
    nb_painting = 0
    while nb_painting < row:
        nb_painting += 1
        artist.append(name)
        current.append(genre)
        path_art.append(FOLDER + str(name) + '_' + str(nb_painting) + '.jpg')
        
data = {'artist': artist, 'genre': current, 'jpg': path_art}

In [None]:
df2 = pd.DataFrame(data)
df2.head(5)

In [None]:
df2['jpg'] = df2['jpg'].str.replace(' ', '_')

In [None]:
# load the image
image = Image.open(df2['jpg'][0])

# summarize some details about the image
print(image.format)
print(image.mode)
print(image.size)

# show the image
plt.imshow(image)

In [None]:
# export dataset
df2.to_csv('paintings_informations.csv', index=False)

## Machine Learning

In [None]:
df2.head()

In [None]:
df2.shape

In [None]:
df2.genre.unique()

In [None]:
len(df2.genre.unique())

In [None]:
from keras.models import Sequential
from keras_preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras import layers, regularizers, optimizers


In [None]:
datagen=ImageDataGenerator(rescale=1./255.,validation_split=0.25)

In [None]:
train_generator=datagen.flow_from_dataframe(
dataframe=df2,
directory=None,
x_col="jpg",
y_col="genre",
subset="training",
batch_size=32,
seed=42,
shuffle=True,
class_mode="categorical",
target_size=(224,224))

valid_generator=datagen.flow_from_dataframe(
dataframe=df2,
directory=None,
x_col="jpg",
y_col="genre",
subset="validation",
batch_size=32,
seed=42,
shuffle=True,
class_mode="categorical",
target_size=(224,224))

test_datagen=ImageDataGenerator(rescale=1./255.)

test_generator=test_datagen.flow_from_dataframe(
dataframe=df2,
directory=None,
x_col="jpg",
y_col=None,
batch_size=32,
seed=42,
shuffle=False,
class_mode=None,
target_size=(224,224))

### Building model

In [None]:
# from keras.applications import ResNet50
input_shape = (224,224,3)

# base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)

# for layer in base_model.layers:
#     layer.trainable = False

In [None]:
# model = Sequential([
#     base_model,
#     layers.Flatten(),
#     layers.Dense(512, activation='relu'),
#     layers.BatchNormalization(),
#     layers.Dense(16, activation='relu'),
#     layers.BatchNormalization(),
#     layers.Dense(len(df2.genre.unique()), activation='softmax'),
# ])

In [None]:
model = Sequential([

    # First Convolutional Block
    layers.Conv2D(filters=32, kernel_size=5, activation="relu", padding='same',
                  # give the input dimensions in the first layer
                  # [height, width, color channels(RGB)]
                  input_shape=[224, 224, 3]),
    layers.MaxPool2D(),

    # Second Convolutional Block
    layers.Conv2D(filters=64, kernel_size=3, activation="relu", padding='same'),
    layers.MaxPool2D(),

    # Third Convolutional Block
    layers.Conv2D(filters=128, kernel_size=3, activation="relu", padding='same'),
    layers.MaxPool2D(),

    # Classifier Head
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.BatchNormalization(),
    layers.Dense(32, activation='relu'),
    layers.BatchNormalization(),
    layers.Dense(len(df2.genre.unique()), activation='softmax'),
])
model.summary()

In [None]:
# # Freeze core ResNet layers
# for layer in model.layers:
#     layer.trainable = False

# for layer in model.layers[:50]:
#     layer.trainable = True

In [None]:
from keras import losses 
from keras import optimizers

model.compile(
      loss=losses.CategoricalCrossentropy(),
      optimizer=optimizers.Adam(),
      metrics=["accuracy"]
  )

In [None]:
from keras import callbacks

early_stopping = callbacks.EarlyStopping(
    patience=5,
    min_delta=0.01,
    restore_best_weights=True,
    )

In [None]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

In [None]:
history = model.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=10,
                    shuffle=True,
                    verbose=1,
                    callbacks=[early_stopping])

In [None]:
# history =  model.fit(
#             x=train_data,
#             epochs=10,
#             validation_data=val_data,
#             validation_freq=1, # check validation metrics every epoch
#             verbose=1,
#             callbacks=[early_stopping])

In [None]:
history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot();

In [None]:
history_df.loc[:, ['accuracy', 'val_accuracy']].plot();

In [None]:
from keras import preprocessing

# Original image
path = df2['jpg'][0]
#print(path)
test_image = preprocessing.image.load_img(path, target_size=(224,224))

# Prediction
test_image = preprocessing.image.img_to_array(test_image)
test_image /= 255.
test_image = np.expand_dims(test_image, axis=0)

prediction = model.predict(test_image)
prediction_probability = np.amax(prediction)
prediction_idx = np.argmax(prediction)

labels = train_generator.class_indices
labels = dict((v,k) for k,v in labels.items())
    
title = "Predicted current = {}\nPrediction probability = {:.2f} %" \
                .format(labels[prediction_idx].replace('_', ' '),
                        prediction_probability*100)

plt = plt
plt.imshow(plt.imread(path))
print(title)

In [None]:
# Original image
path = df2['jpg'][5]
#print(path)
test_image = preprocessing.image.load_img(path, target_size=(224,224))

# Prediction
test_image = preprocessing.image.img_to_array(test_image)
test_image /= 255.
test_image = np.expand_dims(test_image, axis=0)

prediction = model.predict(test_image)
prediction_probability = np.amax(prediction)
prediction_idx = np.argmax(prediction)

labels = train_generator.class_indices
labels = dict((v,k) for k,v in labels.items())
    
title = "Predicted current = {}\nPrediction probability = {:.2f} %" \
                .format(labels[prediction_idx].replace('_', ' '),
                        prediction_probability*100)

plt = plt
plt.imshow(plt.imread(path))
print(title)

In [None]:
# Original image
path = df2['jpg'][2145]
#print(path)
test_image = preprocessing.image.load_img(path, target_size=(224,224))

# Prediction
test_image = preprocessing.image.img_to_array(test_image)
test_image /= 255.
test_image = np.expand_dims(test_image, axis=0)

prediction = model.predict(test_image)
prediction_probability = np.amax(prediction)
prediction_idx = np.argmax(prediction)

labels = train_generator.class_indices
labels = dict((v,k) for k,v in labels.items())
    
title = "Predicted current = {}\nPrediction probability = {:.2f} %" \
                .format(labels[prediction_idx].replace('_', ' '),
                        prediction_probability*100)

plt = plt
plt.imshow(plt.imread(path))
print(title)

In [None]:
# from keras import models
# model.save('my_model.h5')

In [None]:
# import pickle

In [None]:
# save the model to disk

# filename = 'model_custom_cnn_24_08_21.sav'
# pickle.dump(model, open(filename, 'wb'))

In [None]:
# filepath='../output/'

In [None]:
# models.save_model(model, filepath)

In [None]:
# model.save('./model_2.h5')

In [None]:
# import dill
# fname = 'model_custom_cnn_24_08_21'
# file = open(fname + '.pkl', 'wb')
# dill.dump(model, file)
# file.close()