In [None]:
!pip install imutils

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental.preprocessing import CenterCrop
from tensorflow.keras.layers.experimental.preprocessing import Rescaling

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from imutils import paths
import argparse
import imutils
import cv2
import os

#import os
#for dirname, _, filenames in os.walk('/kaggle/input/images/images'):
#    for filename in filenames:
#        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# ====== Begin ===============

# Read images and create dataset as vectors of rgb values
#for dirname, _, filenames in os.walk('/kaggle/input'):
#    for filename in filenames:
#        os.path.join(dirname, filename)

# Create a dataset

#get list of artists and delete unimportant infos from artists.csv
artists = pd.read_csv('/kaggle/input/best-artworks-of-all-time/artists.csv')
artists.drop('id',inplace=True, axis=1)
artists.drop('years',inplace=True, axis=1)
artists.drop('bio',inplace=True, axis=1)
artists.drop('wikipedia',inplace=True, axis=1)
artists = artists.sort_values(by=['paintings'], ascending=False)
artists.reset_index(inplace = True)
maxpaintings = artists['paintings'].sum()
artists['weight'] = maxpaintings/artists.paintings
print(artists)



In [None]:
# Argument Parser
#ap = argparse.ArgumentParser()
#ap.add_argument("-d", "--dataset", help="path to input dataset",required=True, default="/kaggle/input/best-artworks-of-all-time/resized/resized/")
#ap.add_argument("-l", "--label", help="artist or genre", default="artist")
#ap.add_argument("-m", "--mode", help="knn or cluster",required=True)
#ap.add_argument("-k","--neighbors", type=int, default=1, help="number of nearest neighbors for classification")
#ap.add_argument("-j", "--job", type=int, default=-1, help="number of jobs for k-NN distance")
#args=vars(ap.parse_args())

# presets
dataset ="/kaggle/input/best-artworks-of-all-time/resized/resized/"
neighbors = 5
jobs = -1
labelmode = "genre"
labelgenre = artists["genre"]

print("[INFO] describing images...")
#imagePaths = list(paths.list_images(args["dataset"]))
imagePaths = list(paths.list_images(dataset))
rawImages=[]
features = []
labels = []

#========= Preprocessing and Feature Selection======================

#========== Helperfunction ========================
#Function getLabel. Modify file path to get artist name
def get_artistname(filepath):
    whitespace=' '
    artist=filepath[6].split(".")[0].split("_")
    del artist[len(artist)-1]
    label=whitespace.join(artist)
    return label


#========= 1.1.1 Resize Image to (32,32) and flatten the image
def image_to_feature_vector(image, size=(32,32)):
    return cv2.resize(image, size).flatten()

#========= 1.1.2 extract 3d color histogram from HSV color space using bins per channel
def extract_color_histogram (image, bins=(8,8,8)):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0,1,2], None, bins, [0,180,0,256,0,256])
    if imutils.is_cv2():
        hist = cv2.normalize(hist)
    else:
        cv2.normalize(hist, hist)
    return hist.flatten()

#loop over images
for (i, imagePath) in enumerate(imagePaths):
    image = cv2.imread(imagePath)
    label = imagePath.split(os.path.sep[-1].split(".")[0])
    pixels = image_to_feature_vector(image)
    hist = extract_color_histogram(image)
    rawImages.append(pixels)
    features.append(hist)
    labels.append(get_artistname(label))
    
    if i>0 and i%1000 == 0:
        print("[INFO] processed {}/{}".format(i, len(imagePaths)))

rawImages = np.array(rawImages)
features = np.array(features)
labels = np.array(labels)

print("[INFO] pixels matrix: {:.2f}MB".format(rawImages.nbytes / (1024*1000.0)))
print("[INFO] pixels matrix: {:.2f}MB".format(features.nbytes / (1024*1000.0)))

In [None]:
#========== 1.1 KNN by artistname=================================

# presets
dataset ="/kaggle/input/best-artworks-of-all-time/resized/resized/"
neighbors = 5
jobs = -1


#partition training set (75%) und test (25%)
if labelmode == 'genre': 
    (trainRI, testRI, trainRL, testRL) = train_test_split(rawImages, labelgenre, test_size=0.25, random_state=42)
    (trainFeat, testFeat, trainLabels, testLabels) = train_test_split(features, labelsgenre, test_size=0.25, random_state=42)
else:
    (trainRI, testRI, trainRL, testRL) = train_test_split(rawImages, labels, test_size=0.25, random_state=42)
    (trainFeat, testFeat, trainLabels, testLabels) = train_test_split(features, labels, test_size=0.25, random_state=42)
                            
#train and evaluate a knn classifier on raw pixel intensities
print("[INFO] evaluating raw pixel accuracy...")
model = KNeighborsClassifier(n_neighbors=neighbors,n_jobs=jobs) # n_neighbors=args["neighbors"],n_jobs=args["jobs"]
model.fit(trainRI, trainRL)
acc= model.score(testRI, testRL)
print("[INFO] raw pixel accuracy: {:.2f}%".format(acc*100))

#train and evaluate a knn classifier on histogram
print("[INFO] evaluating histogram accuracy...")
model=KNeighborsClassifier(n_neighbors=neighbors,n_jobs=jobs) # n_neighbors=args["neighbors"],n_jobs=args["jobs"]
model.fit(trainFeat,trainLabels)
acc=model.score(testFeat, testLabels)
print("[INFO] histogram accuracy: {:.2f}%".format(acc*100))

In [None]:
#========== 1.2 KNN by genre=================================

#========== Helperfunction ========================
#Function getLabel. Modify file path to get artist name
def get_artistname(filepath):
    whitespace=' '
    artist=filepath[6].split(".")[0].split("_")
    del artist[len(artist)-1]
    label=whitespace.join(artist)
    return label

#========= Preprocessing and Feature Selection======================

#========= 1.2.1. Resize Image to (32,32) and flatten the image
def image_to_feature_vector(image, size=(32,32)):
    return cv2.resize(image, size).flatten()

#========= 1.2.2. extract 3d color histogram from HSV color space using bins per channel
def extract_color_histogram (image, bins=(8,8,8)):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0,1,2], None, bins, [0,180,0,256,0,256])
    if imutils.is_cv2():
        hist = cv2.normalize(hist)
    else:
        cv2.normalize(hist, hist)
    return hist.flatten()

# presets
dataset ="/kaggle/input/best-artworks-of-all-time/resized/resized/"
neighbors = 5
jobs = -1



#partition training set (75%) und test (25%)
(trainRI, testRI, trainRL, testRL) = train_test_split(rawImages, labelsgenre, test_size=0.25, random_state=42)
(trainFeat, testFeat, trainLabels, testLabels) = train_test_split(features, labelsgenre, test_size=0.25, random_state=42)
                            
#train and evaluate a knn classifier on raw pixel intensities
print("[INFO] evaluating raw pixel accuracy...")
model = KNeighborsClassifier(n_neighbors=neighbors,n_jobs=jobs) # n_neighbors=args["neighbors"],n_jobs=args["jobs"]
model.fit(trainRI, trainRL)
acc= model.score(testRI, testRL)
print("[INFO] raw pixel accuracy: {:.2f}%".format(acc*100))

#train and evaluate a knn classifier on histogram
print("[INFO] evaluating histogram accuracy...")
model=KNeighborsClassifier(n_neighbors=neighbors,n_jobs=jobs) # n_neighbors=args["neighbors"],n_jobs=args["jobs"]
model.fit(trainFeat,trainLabels)
acc=model.score(testFeat, testLabels)
print("[INFO] histogram accuracy: {:.2f}%".format(acc*100))

In [None]:
#============ 2. Clustering ==================

from numpy import unique
from numpy import where
from sklearn.datasets import make_classification
from sklearn.cluster import AgglomerativeClustering
from matplotlib import pyplot

nclusters=50

#define dataset
X = features
_ = labels
#X, _ = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, random_state=4)
#define model
model = AgglomerativeClustering(n_clusters=nclusters)
#fit model and predict clusters
yhat = model.fit_predict(X)
#retrieve unique clusters
clusters = unique(yhat)
#create scatter plot from each cluster
for cluster in clusters:
        # get row indexes
        row_ix = where(yhat == cluster)
        # create scatter 
        pyplot.scatter(X[row_ix,0],X[row_ix,1])
#show plot
pyplot.show()


In [None]:
nclusters=50

#define dataset
X = rawImages
_ = labels
#X, _ = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, random_state=4)
#define model
model = AgglomerativeClustering(n_clusters=nclusters)
#fit model and predict clusters
yhat = model.fit_predict(X)
#retrieve unique clusters
clusters = unique(yhat)
#create scatter plot from each cluster
for cluster in clusters:
        # get row indexes
        row_ix = where(yhat == cluster)
        # create scatter 
        pyplot.scatter(X[row_ix,0],X[row_ix,1])
#show plot
pyplot.show()


In [None]:
from tensorflow import keras

#get labels from filename
dataset = keras.preprocessing.image_dataset_from_directory(
  '/kaggle/input/best-artworks-of-all-time/images/images/', batch_size=64, image_size=(200, 200), label_mode='categorical', color_mode='rgb')

dense = keras.layers.Dense(units=16)
# Let's say we expect our inputs to be RGB images of size 200,200,3
inputs = keras.Input(shape=(200, 200, 3))
# Let's say we expect our inputs to be RGB images of arbitrary size
#inputs = keras.Input(shape=(None, None, 3))

# Example image data, with values in the [0, 255] range
training_data = np.random.randint(0, 256, size=(64, 200, 200, 3)).astype("float32")

cropper = CenterCrop(height=150, width=150)
scaler = Rescaling(scale=1.0 / 255)

output_data = scaler(cropper(training_data))
print("shape:", output_data.shape)
print("min:", np.min(output_data))
print("max:", np.max(output_data))   

# Center-crop images to 150x150
x = CenterCrop(height=150, width=150)(inputs)
# Rescale images to [0, 1]
x = Rescaling(scale=1.0 / 255)(x)

# Apply some convolution and pooling layers
x = layers.Conv2D(filters=32, kernel_size=(3, 3), activation="relu")(x)
x = layers.MaxPooling2D(pool_size=(3, 3))(x)
x = layers.Conv2D(filters=32, kernel_size=(3, 3), activation="relu")(x)
x = layers.MaxPooling2D(pool_size=(3, 3))(x)
x = layers.Conv2D(filters=32, kernel_size=(3, 3), activation="relu")(x)

# Apply global average pooling to get flat feature vectors
x = layers.GlobalAveragePooling2D()(x)

# Add a dense classifier on top
num_classes = 51
outputs = layers.Dense(num_classes, activation="softmax")(x)

model2 = keras.Model(inputs=inputs, outputs=outputs)   

data = np.random.randint(0, 256, size=(64, 200, 200, 3)).astype("float32")
processed_data = model2(data)
print(processed_data.shape)

model2.compile(optimizer='rmsprop', loss='categorical_crossentropy')

model2.fit(dataset, epochs=5)


In [None]:
#!/usr/bin/env python
# coding: utf-8


from keras.models import load_model
from keras.preprocessing import image
import matplotlib.pyplot as plt
import numpy as np
import os


def load_image(img_path, show=False):

    img = image.load_img(img_path, target_size=(200, 200))
    img_tensor = image.img_to_array(img)                    # (height, width, channels)
    img_tensor = np.expand_dims(img_tensor, axis=0)         # (1, height, width, channels), add a dimension because the model expects this shape: (batch_size, height, width, channels)
    img_tensor /= 255.                                      # imshow expects values in the range [0, 1]

    if show:
        plt.imshow(img_tensor[0])                           
        plt.axis('off')
        plt.show()

    return img_tensor


if __name__ == "__main__":

    # load model
    #model = load_model("model_aug.h5")

    # image path
    # Reihenfolge der Label ist hierbei alphabetisch
    img_path = "/kaggle/input/best-artworks-of-all-time/resized/resized/Vincent_van_Gogh_10.jpg"    #iconography  
    #img_path = '0004.jpg'    #painting 
    #img_path = '4.jpg'    #sculpture 
    #img_path = '1.jpg'    #engraving 
    #img_path = 'download (21).jpeg' #drawing 
    

    # load a single image
    new_image = load_image(img_path)

    # check prediction
    pred = model2.predict(new_image)
    
    print(pred)
    print(pred.argmax(axis=-1))

