**Imports & Setup**

In [6]:
from pathlib import Path
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import resnet50
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd

In [7]:
# directories & file paths
TRAIN_DIR = Path("../../data/processed_balanced/train")
IMG_DIR = Path("../../data/processed")
VAL_DIR   = IMG_DIR / "validate"
TEST_DIR  = IMG_DIR / "test"
EXTS = (".jpg", ".jpeg", ".png", ".bmp")
RESULTS_DIR = Path("/results")
RESULTS_PATH = RESULTS_DIR / "bagged_knn_results.csv"
EXTS = (".jpg", ".jpeg", ".png", ".bmp")

# target styles for classification
TARGET_STYLES = [
    "Abstract_Expressionism",
    "Baroque",
    "Cubism",
    "Impressionism",
    "Pop_Art"
]

# hyperparameters for knn grid search
K_GRID = [1, 3, 5, 7, 9, 11]

**Extracting Features**

In [10]:
# config for data loading & feature extraction
BATCH = 64
AUTO  = tf.data.AUTOTUNE
EXPECT_SIZE = (256, 256)
STRICT_SIZE = True

# mapping from class name to label index
CLASS_TO_IDX = {c:i for i,c in enumerate(TARGET_STYLES)}
def list_paths_labels(root: Path):
    paths, labels = [], []
    for cls in TARGET_STYLES:
        cls_dir = root / cls
        if not cls_dir.exists(): 
            continue
        for p in sorted(cls_dir.rglob("*")):
            if p.suffix.lower() in EXTS and p.is_file():
                paths.append(str(p))
                labels.append(CLASS_TO_IDX[cls])
    return np.array(paths), np.array(labels, dtype=np.int32)

def decode_keep_size(path):
    '''
    decode image from path, keep 224 size if STRICT_SIZE is true
    '''
    img = tf.io.read_file(path)
    img = tf.image.decode_image(img, channels=3, expand_animations=False)
    img.set_shape([None, None, 3])
    if STRICT_SIZE:
        shape = tf.shape(img)
        assert_op = tf.debugging.assert_equal(shape[:2], EXPECT_SIZE, message="Non-224 image found")
        with tf.control_dependencies([assert_op]):
            img = tf.identity(img)
    img = tf.cast(img, tf.float32)
    img = resnet50.preprocess_input(img)
    return img

def build_ds(paths, labels=None, shuffle=False):
    '''
    build tf.data.Dataset from image paths & labels
    '''
    x = tf.data.Dataset.from_tensor_slices(paths)
    x = x.map(decode_keep_size, num_parallel_calls=AUTO)
    if labels is not None:
        y = tf.data.Dataset.from_tensor_slices(labels)
        ds = tf.data.Dataset.zip((x, y))
    else:
        ds = x
    if shuffle:
        ds = ds.shuffle(buffer_size=min(10000, len(paths)), seed=42, reshuffle_each_iteration=False)
    ds = ds.batch(BATCH).prefetch(AUTO)
    return ds

# load backbone model from keras applications
backbone = resnet50.ResNet50(include_top=False, weights="imagenet", pooling="avg")
train_paths, y_train = list_paths_labels(TRAIN_DIR)
val_paths,   y_val   = list_paths_labels(VAL_DIR)
test_paths,  y_test  = list_paths_labels(TEST_DIR)

# build datasets
train_ds = build_ds(train_paths, y_train, shuffle=False)
val_ds = build_ds(val_paths, y_val, shuffle=False)
test_ds = build_ds(test_paths, y_test, shuffle=False)

# extract features
X_train = backbone.predict(train_ds, verbose=1)
X_val = backbone.predict(val_ds, verbose=1)
X_test = backbone.predict(test_ds, verbose=1)
print("Shapes:", X_train.shape, X_val.shape, X_test.shape)

2025-10-28 20:56:15.179871: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 268435456 exceeds 10% of free system memory.
2025-10-28 20:56:15.330426: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 276889600 exceeds 10% of free system memory.
2025-10-28 20:56:15.442511: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 268435456 exceeds 10% of free system memory.
2025-10-28 20:56:15.575881: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 268435456 exceeds 10% of free system memory.
2025-10-28 20:56:15.822008: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 268435456 exceeds 10% of free system memory.


[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m586s[0m 2s/step
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 2s/step
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m182s[0m 2s/step
Shapes: (15000, 2048) (4760, 2048) (4761, 2048)


**K-NN Model**