In [4]:
import os
import numpy as np
import random
import csv
import pickle

import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from keras.models import Sequential
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout
from keras.utils import load_img
from keras.utils import img_to_array
from sklearn.model_selection import train_test_split
from keras.models import load_model
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator

from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import Adam
from sklearn.model_selection import StratifiedKFold


In [5]:

DATASET_PATH = '/Users/yurirykhlo/dev/ECE460JFinalPrject/datasets/'
DIGITAL_IMAGES_PATH = 'digital/StableDiffusion/'
REAL_IMAGES_PATH = 'real-world/'
PAINTING_IMAGES_PATH = 'Non-digital Artwork/'

SEED = 69420

def is_directory(path):
    return os.path.isdir(path)

def getImageFiles(image_class):
    if image_class == 'digital':
        files = os.listdir(DATASET_PATH + DIGITAL_IMAGES_PATH)
        return [DIGITAL_IMAGES_PATH + file for file in files]
    
    elif image_class == 'real':
        dirs = [dir for dir in os.listdir(DATASET_PATH + REAL_IMAGES_PATH) if is_directory(DATASET_PATH + REAL_IMAGES_PATH + dir)]
        real_images_files = np.array([])
        for dir in dirs:
            images = os.listdir(DATASET_PATH + REAL_IMAGES_PATH + dir)
            images = [REAL_IMAGES_PATH + dir + '/' + image for image in images]
            real_images_files = np.append(real_images_files, images)
        return real_images_files
    
    elif image_class == 'painting':
        painting_images_dirs = [dir for dir in os.listdir(DATASET_PATH + PAINTING_IMAGES_PATH) if is_directory(DATASET_PATH + PAINTING_IMAGES_PATH + dir)]
        painting_images_files = np.array([])
        for dir in painting_images_dirs:
            images = os.listdir(DATASET_PATH + PAINTING_IMAGES_PATH + dir)
            images = [PAINTING_IMAGES_PATH + dir + '/' + image for image in images if not image == '.DS_Store']
            painting_images_files = np.append(painting_images_files, images)
        return painting_images_files
    
    else:
        raise ValueError('image_class must be one of digital, real, or painting')

def sampleImages(digital_image_samples, real_world_samples, artwork_samples, dimension=(224,224)):
    digital_images_files = getImageFiles('digital')
    real_world_images_files = getImageFiles('real')
    artwork_images_files = getImageFiles('painting')

    random.seed(SEED)

    digital_sampled_images = random.sample(list(digital_images_files), digital_image_samples)
    real_sampled_images = random.sample(list(real_world_images_files), real_world_samples)
    artwork_sampled_images = random.sample(list(artwork_images_files), artwork_samples)

    # concatenate all samples
    sampled_images = np.concatenate((digital_sampled_images, real_sampled_images, artwork_sampled_images), axis=0)

    images = np.empty((0, dimension[0], dimension[1], 3))
    for image in sampled_images:
        img = load_img(DATASET_PATH + image, target_size=dimension)
        data = img_to_array(img)
        data = np.expand_dims(data, axis=0)
        images = np.append(images, data, axis=0)

    # create labels
    digital_labels = [0 for i in range(digital_image_samples)]
    real_labels = [1 for i in range(real_world_samples)]
    artwork_labels = [2 for i in range(artwork_samples)]
    labels = np.concatenate((digital_labels, real_labels, artwork_labels), axis=0)

    return images, labels

# X, Y = sampleImages(1000, 1000,1000)
# Save X and Y using pickle
# with open('X_224.pickle', 'wb') as file_X:
#     pickle.dump(X, file_X)

# with open('Y_224.pickle', 'wb') as file_Y:
#     pickle.dump(Y, file_Y)

    # Load X and Y using pickle
with open('X_224.pickle', 'rb') as file_X:
    X = pickle.load(file_X)

with open('Y_224.pickle', 'rb') as file_Y:
    Y = pickle.load(file_Y)

In [6]:


# def preprocess_image(image_path, target_size=(128, 128)):
#     img = load_img(image_path, target_size=target_size)
#     img_array = img_to_array(img) / 255.0
#     return img_array.flatten()

# def train_model(X, Y):
#     # Split the dataset into train and test sets
#     x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=SEED)

#     # Reshape the input data to be 2D
#     x_train = x_train.reshape(x_train.shape[0], -1)
#     x_test = x_test.reshape(x_test.shape[0], -1)

#     # Create and train the MLP model
#     mlp = MLPClassifier(hidden_layer_sizes=(512, 256, 128), activation='relu', solver='adam', max_iter=100, random_state=SEED, verbose=True)
#     mlp.fit(x_train, y_train)

#     # Predict on the test set
#     y_pred = mlp.predict(x_test)

#     # Calculate the accuracy
#     accuracy = accuracy_score(y_test, y_pred)
#     print("Accuracy: {:.2f}%".format(accuracy * 100))

#     return mlp


# # Train the model
# mlp_model = train_model(X, Y)



In [7]:
from keras.optimizers import Adam
from sklearn.model_selection import StratifiedKFold
import numpy as np

def create_model(learning_rate, dropout_rate):
    model = Sequential()
    model.add(Flatten(input_shape=(224, 224, 3)))
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(3, activation='softmax'))

    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

    return model


def train_model(X, Y, batch_size, learning_rate, dropout_rate, n_folds=3):
    kfold = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=SEED)
    fold_accuracy = []

    Y = to_categorical(Y, num_classes=3)

    for train, val in kfold.split(X, np.argmax(Y, axis=1)):
        x_train = X[train]
        y_train = Y[train]
        x_val = X[val]
        y_val = Y[val]

        model = create_model(learning_rate, dropout_rate)

        hist = model.fit(x=x_train, y=y_train, batch_size=batch_size, epochs=10, verbose=1, validation_data=(x_val, y_val), shuffle=True, workers=1, use_multiprocessing=True)

        scores = model.evaluate(x_val, y_val, verbose=1)
        fold_accuracy.append(scores[1] * 100)
        print(f"Fold accuracy: {scores[1]*100:.2f}%")

    return np.mean(fold_accuracy), model


batch_sizes = [16, 32]
learning_rates = [0.001, 0.0005]
dropout_rates = [0.5, 0.6]


best_accuracy = 0
best_model = None
best_hyperparams = {
    'batch_size': batch_sizes[0],
    'learning_rate': learning_rates[0],
    'dropout_rate': dropout_rates[0],
}

for batch_size in batch_sizes:
    for learning_rate in learning_rates:
        for dropout_rate in dropout_rates:
            print(f"\nTraining with batch_size={batch_size}, learning_rate={learning_rate}, dropout_rate={dropout_rate}")
            accuracy, model = train_model(X, Y, batch_size, learning_rate, dropout_rate)
            
            if accuracy > best_accuracy:
                best_model = model
                best_accuracy = accuracy
                best_hyperparams['batch_size'] = batch_size
                best_hyperparams['learning_rate'] = learning_rate
                best_hyperparams['dropout_rate'] = dropout_rate

print("\nBest hyperparameters:")
print(f"Batch size: {best_hyperparams['batch_size']}")
print(f"Learning rate: {best_hyperparams['learning_rate']}")
print(f"Dropout rate: {best_hyperparams['dropout_rate']}")
print(f"Best accuracy: {best_accuracy:.2f}%")

best_model.save('mlp_fully_trained.h5')



Training with batch_size=32, learning_rate=0.001, dropout_rate=0.5
Metal device set to: Apple M1

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB

Epoch 1/10


2023-04-22 12:15:53.248710: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Fold accuracy: 33.80%
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Fold accuracy: 33.60%
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Fold accuracy: 33.40%

Best hyperparameters:
Batch size: 32
Learning rate: 0.001
Dropout rate: 0.5
Best accuracy: 33.60%
