In [1]:
import numpy as np
import pandas as pd

import pickle
import io

from PIL import Image

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

In [2]:
# Import the preprocessed data
imgs = pd.read_csv("training_imageset.csv")

In [3]:
imgs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8864 entries, 0 to 8863
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Style      8864 non-null   object
 1   file_path  8864 non-null   object
dtypes: object(2)
memory usage: 138.6+ KB


Convert the image files to floating point numpy arrays and normalize. 

In [None]:
# Convert to floating point and normalize
def load_and_normalize_image(file_path):
    """
    Load image from file_path, convert to float32 array and normalize to [0, 1] range
    
    Args:
        file_path (str): Path to the image file
        
    Returns:
        numpy.ndarray: Normalized image array of shape (height, width, 3) in float32
    """
    # Load image
    img = Image.open(file_path)
    
    # Convert to RGB if not already
    if img.mode != 'RGB':
        img = img.convert('RGB')
    
    # Convert to numpy array and change type to float32
    img_array = np.array(img, dtype=np.float32)
    
    # Normalize to [0, 1] range
    img_array = img_array / 255.0
    
    return img_array

def load_images_in_batches(dataframe, batch_size=1000):
    """Load images in batches to prevent memory overload"""
    all_arrays = []
    
    for i in range(0, len(dataframe), batch_size):
        batch = dataframe.iloc[i:i+batch_size]
        batch_arrays = batch['file_path'].apply(load_and_normalize_image)
        all_arrays.extend(batch_arrays.values)
        print(f"Processed batch {i//batch_size + 1}/{(len(dataframe)//batch_size) + 1}")
        
    return np.array(all_arrays)

# Load images in batches
image_arrays = load_images_in_batches(imgs)

Processed batch 1/9
Processed batch 2/9
Processed batch 3/9
Processed batch 4/9


In [5]:
# Define X and y

y = imgs['Style']

In [9]:
unique_styles = imgs['Style'].unique()
print("Style order:")
for i, style in enumerate(unique_styles):
    print(f"{i}: {style}")

Style order:
0: Abstract Expressionism
1: Conceptual Art
2: Early Renaissance
3: Expressionism
4: Baroque
5: Cubism
6: Contemporary Realism
7: Art Informel
8: Contemporary
9: Realism
10: Neo-romantic
11: Post-impressionism
12: Modern Art
13: Modernism
14: Surrealism
15: Symbolism
16: Rococo
17: Northern Renaissance
18: Pop Art
19: Mannerism
20: Late Renaissance
21: Ukiyo-e
22: High Renaissance
23: Fauvism


In [15]:
# Split the training dataset into training and validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=888)

In [17]:
# First convert strings to numbers
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Then one-hot using tensorflows method
y_train_cat = to_categorical(y_train_encoded)
y_test_cat = to_categorical(y_test_encoded)

print("y_train_cat shape:", y_train_cat.shape)
print("y_test_cat shape:", y_test_cat.shape)

y_train_cat shape: (7091, 24)
y_test_cat shape: (1773, 24)


In [18]:
# Convert list/series of arrays into a single 4D tensor
X_train = np.stack(X_train.values)
X_test = np.stack(X_test.values)

# Check shapes
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)

X_train shape: (7091, 256, 256, 3)
X_test shape: (1773, 256, 256, 3)


In [None]:
# Define a CNN model
model = keras.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(60, 64, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(24, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
batch_size = 32
epochs = 10
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=epochs
)

In [None]:
# Evaluate the model using the testing data
model.evaluate(X_test, y_test)