In [1]:
#imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import random
import tensorflow as tf
import csv

import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.applications import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.initializers import *
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.keras.applications.resnet import ResNet50, preprocess_input

In [39]:
from collections import defaultdict


artist_csv = pd.read_csv('../data/artists.csv')
#remove artists with multiple genres
artist_csv = artist_csv[~artist_csv["genre"].str.contains(",")].reset_index()
artist_csv["genre"] = artist_csv["genre"].str.replace(" ", "_")

#find number of total paintings per genre
dic = {}
for g in artist_csv["genre"].unique():
   dic[g] = artist_csv[artist_csv["genre"] == g]["paintings"].sum()

#keep only name and genre cols in df
artist_csv = artist_csv[["name", "genre", "paintings"]]
#replace spaces with underscores
artist_csv["name"] = artist_csv["name"].str.replace(" ", "_")
artist_csv["genre"] = artist_csv["genre"].str.replace(" ", "_")


In [63]:
artist_csv = pd.read_csv('../data/artists.csv')
#remove artists with multiple genres
artist_csv = artist_csv[~artist_csv["genre"].str.contains(",")].reset_index()
artist_csv["genre"] = artist_csv["genre"].str.replace(" ", "_")
artist_csv = artist_csv[["name", "genre", "paintings"]]
artist_csv 
#transform artitst_csv to a df with columns: genre, total paintings per genre 
genre_csv = artist_csv.groupby("genre").sum().reset_index()
genre_csv["class_weight"] = genre_csv["paintings"].max() / genre_csv["paintings"]
genre_csv

class_weights = genre_csv["class_weight"].to_dict()
class_weights

{0: 57.083333333333336,
 1: 2.3378839590443685,
 2: 13.83838383838384,
 3: 3.120728929384966,
 4: 8.353658536585366,
 5: 7.098445595854923,
 6: 4.5514950166112955,
 7: 1.0,
 8: 15.74712643678161,
 9: 16.30952380952381,
 10: 2.014705882352941,
 11: 7.569060773480663,
 12: 1.3072519083969465,
 13: 4.433656957928803,
 14: 11.512605042016807,
 15: 23.220338983050848,
 16: 3.5309278350515463,
 17: 10.873015873015873,
 18: 5.6846473029045645,
 19: 8.011695906432749}

In [64]:
genre_names = genre_csv["genre"].tolist()
genre_names

['Abstract_Expressionism',
 'Baroque',
 'Byzantine_Art',
 'Cubism',
 'Early_Renaissance',
 'Expressionism',
 'High_Renaissance',
 'Impressionism',
 'Mannerism',
 'Neoplasticism',
 'Northern_Renaissance',
 'Pop_Art',
 'Post-Impressionism',
 'Primitivism',
 'Proto_Renaissance',
 'Realism',
 'Romanticism',
 'Suprematism',
 'Surrealism',
 'Symbolism']

### Agumentation

In [140]:
#hyperparameters
batch_size = 16
train_input_shape = (224, 224, 3)
n_classes = genre_csv.shape[0]

img_dir = '../data/genre_images/'

train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,
                                   validation_split=0.2, 
                                   rescale=1./255., 
                                   shear_range=5, 
                                   horizontal_flip=True, 
                                   vertical_flip=True)

train_generator = train_datagen.flow_from_directory(directory=img_dir,
                                                    class_mode='categorical',
                                                    classes=genre_names,
                                                    target_size=train_input_shape[0:2],
                                                    batch_size=batch_size,
                                                    subset='training',
                                                    shuffle=True,)

valid_generator = train_datagen.flow_from_directory(directory=img_dir,
                                                    class_mode='categorical',
                                                    classes=genre_names,
                                                    target_size=train_input_shape[0:2],
                                                    batch_size=batch_size,
                                                    subset='validation',
                                                    shuffle=True,)

                                                    
STEP_SIZE_TRAIN = train_generator.n//train_generator.batch_size
STEP_SIZE_VALID = valid_generator.n//valid_generator.batch_size
print("Total number of batches =", STEP_SIZE_TRAIN, "and", STEP_SIZE_VALID)                                           


Found 3983 images belonging to 20 classes.
Found 984 images belonging to 20 classes.
Total number of batches = 248 and 61


In [141]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=train_input_shape)

for layer in base_model.layers:
    layer.trainable = False

In [150]:
# Add layers at the end
X = base_model.output
X = Flatten()(X)

X = Dense(512, kernel_initializer='he_uniform')(X)
# X = Dropout(0.1)(X)
X = BatchNormalization()(X)
X = tf.keras.layers.LeakyReLU()(X)

X = Dense(64, kernel_initializer='he_uniform')(X)
# X = Dropout(0.5)(X)
X = BatchNormalization()(X)
X = tf.keras.layers.LeakyReLU()(X)

X = Dense(16, kernel_initializer='he_uniform')(X)
# X = Dropout(0.5)(X)
X = BatchNormalization()(X)
X = tf.keras.layers.LeakyReLU()(X)

output = Dense(n_classes, activation='softmax')(X)

model = Model(inputs=base_model.input, outputs=output)

In [151]:
optimizer = Adam(lr=0.0001)
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer, 
              metrics=['categorical_accuracy'])

In [152]:
n_epoch = 10

early_stop = EarlyStopping(monitor='val_loss', patience=20, verbose=1, 
                           mode='auto', restore_best_weights=True)

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, 
                              verbose=1, mode='auto')


In [153]:
model.fit(
    train_generator,
    steps_per_epoch=STEP_SIZE_TRAIN,
    epochs=n_epoch,
    validation_data=valid_generator,
    validation_steps=STEP_SIZE_VALID,
    batch_size=batch_size,
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x12c4a4940>

In [None]:
model.fit(
    train_generator,
    steps_per_epoch=STEP_SIZE_TRAIN,
    epochs=n_epoch,
    validation_data=valid_generator,
    validation_steps=STEP_SIZE_VALID,
    batch_size=batch_size,
    class_weight=class_weights,
)