### Baseline Model

In [40]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras import models, layers, optimizers

from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np

import os
import matplotlib.pyplot as plt
import seaborn as sns
from numpy import random
import random

In [41]:
main = os.listdir("../raw_data/wikiart")

genres = []
for genre in main:
    if genre[0] != ".":
        genres.append(genre)

data = sorted(genres)

In [42]:
poc_list = ["Realism", "Impressionism"]

In [43]:
from PIL import Image
import glob, os

paintings_list = []
size = 128, 128

for index, genre in enumerate(poc_list):
    print(f"the genre is {genre}")
    g = os.listdir("../raw_data/wikiart/" + str(genre))
    counter = 0
    if len(g) > 2966:
        i =random.sample(range(len(g)), 2966)
        for num in i:
            with Image.open("../raw_data/wikiart/" + genre + "/" + g[num]) as im:
                img_resized = im.resize(size)
                image_array = np.array(img_resized)
                #print(image_array.shape)
                #paintings_list.append(image_array)
                string = g[num][:-4]
                string = string.replace("-"," ")
                string = string.split("_")
                string.insert(0, genre)
                string.insert(3, image_array)
                paintings_list.append(string)
                counter += 1
                if counter % 100 == 0:
                    print(counter+1)

the genre is Realism
101
201
301
401
501
601
701
801
901
1001
1101
1201
1301
1401
1501
1601
1701
1801
1901
2001
2101
2201
2301
2401
2501
2601
2701
2801
2901
the genre is Impressionism
101
201
301
401
501
601
701
801
901
1001
1101
1201
1301
1401
1501
1601
1701
1801
1901
2001
2101
2201
2301
2401
2501
2601
2701
2801
2901


In [48]:
df = pd.DataFrame(paintings_list)
df

Unnamed: 0,0,1,2,3,4,5
0,Realism,arthur segal,portrait of a man against the light 1935,"[[[192, 193, 186], [191, 192, 184], [191, 193,...",,
1,Realism,vincent van gogh,lange vijverberg hague 1883,"[[[184, 149, 94], [179, 146, 93], [189, 155, 1...",,
2,Realism,valentin serov,peasant woman in a cart 1896,"[[[170, 162, 149], [164, 155, 151], [160, 158,...",,
3,Realism,konstantin makovsky,portrait of the boy,"[[[223, 194, 146], [227, 192, 143], [230, 200,...",,
4,Realism,mikhail vrubel,dress,"[[[225, 223, 226], [224, 222, 225], [222, 220,...",,
...,...,...,...,...,...,...
5927,Impressionism,edgar degas,landscape with hills 1890,"[[[221, 205, 122], [214, 207, 132], [209, 212,...",,
5928,Impressionism,alfred sisley,welsh coast in the fog 1887,"[[[248, 246, 225], [245, 241, 221], [245, 238,...",,
5929,Impressionism,iosif iser,silistra landscape,"[[[81, 132, 153], [84, 135, 154], [90, 142, 15...",,
5930,Impressionism,william merritt chase,still life with flowers,"[[[44, 33, 33], [53, 47, 45], [61, 58, 52], [6...",,


In [49]:
df[0].value_counts()

Realism          2966
Impressionism    2966
Name: 0, dtype: int64

In [6]:
def load_model():
    
    # $CHALLENGIFY_BEGIN
    
    model = VGG16(weights="imagenet", include_top=False, input_shape=(128,128,3))
    
    # $CHALLENGIFY_END
    
    return model

In [7]:
def set_nontrainable_layers(model):
    
    # $CHALLENGIFY_BEGIN
    # Set the first layers to be untrainable
    model.trainable = False
    
    # $CHALLENGIFY_END
    
    return model

In [8]:
def add_last_layers(model):
    '''Take a pre-trained model, set its parameters as non-trainable, and add additional trainable layers on top'''
    # $CHALLENGIFY_BEGIN
    base_model = set_nontrainable_layers(model)
    flatten_layer = layers.Flatten()
    dense_layer = layers.Dense(500, activation='relu')
    prediction_layer = layers.Dense(1, activation='sigmoid')
    
    
    model = models.Sequential([
        base_model,
        flatten_layer,
        dense_layer,
        prediction_layer
    ])
    # $CHALLENGIFY_END
    return model


In [9]:
def build_model():
    # $CHALLENGIFY_BEGIN    
    model = load_model()
    model = add_last_layers(model)
    
    opt = optimizers.Adam(learning_rate=1e-4)
    model.compile(loss='binary_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])
    return model
    # $CHALLENGIFY_END

In [10]:
def tolist(x):
    return x.tolist()

In [50]:
X = df[3]

In [51]:
X = np.stack(X.values)

In [52]:
#turn genre in binary
def genre_to_binary(x):
    if x == "Realism":
        return 0
    if x == "Impressionism":
        return 1

In [53]:
y = df[0].apply(genre_to_binary)

In [54]:
y

0       0
1       0
2       0
3       0
4       0
       ..
5927    1
5928    1
5929    1
5930    1
5931    1
Name: 0, Length: 5932, dtype: int64

In [55]:
y = np.stack(y)

In [56]:
y

array([0, 0, 0, ..., 1, 1, 1])

In [57]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [58]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train)

In [59]:
X_train.shape

(3114, 128, 128, 3)

In [60]:
y_train.shape

(3114,)

In [23]:
y_train

array([1, 1, 0, ..., 1, 0, 0])

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

model = build_model()

es = EarlyStopping(monitor = 'val_accuracy', 
                   mode = 'max', 
                   patience = 5, 
                   verbose = 1, 
                   restore_best_weights = True)

history = model.fit(X_train, y_train, 
                    validation_data=(X_val, y_val), 
                    epochs=50, 
                    batch_size=32, 
                    callbacks=[es])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50

In [None]:
df = pd.read_csv("../smArt/data/binary_train_20k.csv")

In [None]:
df