In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np

from dataset import load_dataset, load_dataset_info
from experiments import ResNetRegressionExperiment

In [None]:
DATASETS = [
    ("LOC", "Antwerp"),
    ("LOCPortrait", "LOCPortrait"),
]  # (training_dataset, test_dataset

In [None]:
def get_dataset(dataset, test=False):
    df = load_dataset_info(f"../data/{dataset}")
    if test:
        df["set"] = "train"

    # Preprocess Years
    df = df.loc[(df["year"] >= 1850) & (df["year"] < 1930)]
    df["target"] = (df["year"] - df["year"].min()) / (df["year"].max() - df["year"].min())

    min_year, max_year = df["year"].min(), df["year"].max()
    
    preprocess_config = {
        "preprocessing_function": tf.keras.applications.vgg16.preprocess_input
    }
    
    return load_dataset(
        f"../data/{dataset}",
        df=df,
        y_col="target",
        class_mode="raw",
        validation_split = 0.01 if test else 0.2,
        train_preprocess_config=preprocess_config,
        test_preprocess_config=preprocess_config,
    )


In [None]:
def get_test_datasets(dataset, sample_size=100):
    test_datasets = []
    df = load_dataset_info(f"../data/{dataset}")
    df["set"] = "train"
    
    df = df.loc[(df["year"] >= 1850) & (df["year"] < 1930)]
    df["target"] = df["label"]
    
    labels = list(df["target"].unique())
    
    preprocess_config = {
        "preprocessing_function": tf.keras.applications.vgg16.preprocess_input
    }
    
    for label in labels:
        df_set = df.loc[df["target"] == label].sample(sample_size)
        train_dataset, _, _ = load_dataset(
            f"../data/{dataset}",
            df=df_set,
            y_col="target",
            class_mode="categorical",
            validation_split = 0.01,
            train_preprocess_config=preprocess_config,
            test_preprocess_config=preprocess_config,
        )
        test_datasets.append((label, train_dataset))
    
    return test_datasets

In [None]:
for train_dataset_name, test_dataset_name in DATASETS:
    has_test_dataset = train_dataset_name != test_dataset_name
    
    train_generator, val_generator, test_generator = get_dataset(train_dataset_name)
    
    if has_test_dataset:
        test_generator, val_test_generator, _  = get_dataset(train_dataset_name, test=True)
        
    
    experiment = ResNetRegressionExperiment(
        name=f"{train_dataset_name}_{test_dataset_name}_ResNet_Regression",
        n_classes=1,
        min_year=1850,
        max_year=1929,
    )
    
    model, _, _ = experiment.run(
        train_generator, val_generator, test_generator,
        pretrain_epochs=20,
        finetune_epochs=20,
    )
    
    if test_dataset_name == "Antwerp":
        test_datasets = get_test_datasets(train_dataset_name)
        for (label, test_dataset) in test_datasets:
            performance = model.evaluate(test_dataset)
            print(f"[{name}][evaluation][{label}]: {performance}")