In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np

from dataset import load_dataset, load_dataset_info
from experiments import ResNetClassificationExperiment

## Training Data

In [None]:
DATASETS = [
    ("LOC", "Antwerp", False), 
    ("LOCPortrait", "LOCPortrait", False),
    ("LOC", "LOC", True), 
]  # (training_dataset, test_dataset, technique

In [None]:
def get_dataset(dataset, technique=False, test=False):
    df = load_dataset_info(f"../data/{dataset}")
    if test:
        df["set"] = "train"

    # Preprocess Years
    
    if technique:
        df = df.loc[df["technique"].isin(["ambrotypes", "cyanotypes", "dry+plate+negatives", "gelatin+silver+prints", "acetate+negatives"])]
        df["target"] = df["technique"]
        df["target_ordinal"] = df["target"]
    else:
        df = df.loc[(df["year"] >= 1850) & (df["year"] < 1930)]
        df["target"] = df["label"]

    min_year, max_year = df["year"].min(), df["year"].max()
    n_classes = df["target"].unique().shape[0]
    
    preprocess_config = {
        "preprocessing_function": tf.keras.applications.vgg16.preprocess_input
    }
    
    return load_dataset(
        f"../data/{dataset}",
        df=df,
        y_col="target",
        class_mode="categorical",
        validation_split = 0.01 if test else 0.2,
        train_preprocess_config=preprocess_config,
        test_preprocess_config=preprocess_config,
    ),  n_classes

In [None]:
def get_test_datasets(dataset, sample_size=100):
    test_datasets = []
    df = load_dataset_info(f"../data/{dataset}")
    df["set"] = "train"
    
    df = df.loc[(df["year"] >= 1850) & (df["year"] < 1930)]
    df["target"] = df["label"]
    
    labels = list(df["target"].unique())
    
    preprocess_config = {
        "preprocessing_function": tf.keras.applications.vgg16.preprocess_input
    }
    
    for label in labels:
        df_set = df.loc[df["target"] == label].sample(sample_size)
        train_dataset, _, _ = load_dataset(
            f"../data/{dataset}",
            df=df_set,
            y_col="target",
            class_mode="categorical",
            validation_split = 0.01,
            train_preprocess_config=preprocess_config,
            test_preprocess_config=preprocess_config,
        )
        test_datasets.append((label, train_dataset))
    
    return test_datasets

In [None]:
for train_dataset_name, test_dataset_name, is_technique in DATASETS:
    has_test_dataset = train_dataset_name != test_dataset_name
    
    (train_generator, val_generator, test_generator), n_classes \
        = get_dataset(train_dataset_name, technique=is_technique)
    
    if has_test_dataset:
        (test_generator, val_test_generator, _), _  = get_dataset(train_dataset_name, test=True, technique=is_technique)
        
    
    name = f"{train_dataset_name}_{test_dataset_name}_ResNet_Classification_test"
    experiment = ResNetClassificationExperiment(
        name=name,
        n_classes=n_classes,
    )
    
    model, _, _ = experiment.run(
        train_generator, val_generator, test_generator,
        pretrain_epochs=20,
        finetune_epochs=20
    )
    
    if test_dataset_name == "Antwerp":
        test_datasets = get_test_datasets(train_dataset_name)
        for (label, test_dataset) in test_datasets:
            performance = model.evaluate(test_dataset)
            print(f"[{name}][evaluation][{label}]: {performance}")