# 🎨 Analysis

This notebook analysis the experimental results from training a digital-to-film style transfer model. It uses the experiment results that are tracked to W&B during training from the `scr/train.py` script.

## Setup

---

Let's install some necessary dependencies and set global variables.

In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
import autorootcwd

In [None]:
# Imports
import wandb

import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

In [None]:
# Constants
WANDB_PROJECT = "sillystill"
WANDB_ENTITY = "sillystill"

## Connect to W&B

---

Let's start by loading all the runs from the W&B project.

In [None]:
# Initialize W&B
api = wandb.Api()

# Get runs
runs = api.runs(f"{WANDB_ENTITY}/{WANDB_PROJECT}")
print(f"✅ Loaded {len(runs)} runs from W&B ({WANDB_ENTITY}/{WANDB_PROJECT})")

## Experiment 1: Single Image

---

In [None]:
def load_single_results(path, runs):
    data = pd.read_csv(path)

    baselines_cols = ["baseline_lpips", "baseline_psnr", "baseline_ssim", "baseline_pieapp"]
    baselines = data.iloc[0][baselines_cols].rename(lambda x: x.replace("baseline_", ""), axis=0)

    # Remove baseline cols  + media col
    data = data.drop(columns=["digitalfilepath", "filmfilepath", "predictedfilepath"])
    data = data.drop(columns=["baseline_lpips", "baseline_psnr", "baseline_ssim", "baseline_pieapp"])

    # Retrieve other runs from api
    runs = [run for run in runs if run.name in data["run_name"].unique()]

    tags = [" - ".join(set(run.tags) - set(["Combined"])) for run in runs]
    with_noise = [run.config["model"]["net"]["with_noise"] for run in runs]
    data["tags"] = tags
    data["with_noise"] = with_noise

    # Add baseline
    idx = len(data)
    data.loc[idx] = baselines
    data.loc[idx, "run_name"] = "baseline"
    data.loc[idx, "tags"] = "Baseline"
    data.loc[idx, "with_noise"] = 0.0
    
    return data

### Simple Losses


In [None]:
single_image_raw = load_single_results("outputs/single-image.csv", runs)

# Split tags into resize and loss column
single_image_raw["resize"] = single_image_raw["tags"].apply(lambda x: int("Resized" in x))
single_image_raw["loss"] = single_image_raw["tags"].apply(lambda x: x.replace("- Resized", "").strip())

# Make noise column boolean
single_image_raw["with_noise"] = single_image_raw["with_noise"].astype(int)

# Sort by loss
single_image = single_image_raw.drop(columns=["tags"])
single_image.sort_values(by="ssim", inplace=True, ascending=False)
single_image[single_image['resize'] == 1].round(2)

In [None]:
single_image

In [None]:
single_image[single_image['loss'].isin(["MSE", "MAE"])].round(3)

In [None]:
# Simple plot for losses with no resizing and no noise

def plot_losses(data, title, y_value, x_label, with_noise=0, resize=0, save_path=None, ylim = None, cutoff=0):
    data = data[data["resize"] == resize]
    data = data[data["with_noise"] == with_noise]
    data = data.drop(columns=["resize", "with_noise"])

    plt.figure(figsize=(15, 10))
    sns.set_theme(style="whitegrid")
    sns.set_context("notebook", font_scale=1.5)
    sns.set_palette("colorblind")

    # Remove bottom 3 performing models
    data = data.iloc[cutoff:]

    ax = sns.barplot(data=data, x="loss", y=y_value, hue="run_name")
    ax.set_title(title)
    ax.set_ylabel(y_value.upper())
    ax.set_xlabel(x_label)
    ax.legend().remove()
    if ylim:
        plt.ylim(ylim)
    plt.xticks(rotation=45)
    plt.tight_layout()

    if save_path:
        plt.savefig(save_path)
    plt.show()

plot_losses(single_image, "Single Image Losses", "pieapp", "Loss", with_noise=0, cutoff=1) # "outputs/single-image-losses.png")

In [None]:
# TODO: Do some plotting here

# Probably useful to pivot the data
# single_image.melt(id_vars=["run_name", "tags", "with_noise"], var_name="metric", value_name="value")

## Experiment 2: Full Data

---

In [None]:
def load_full_results(path, runs):
    data = pd.read_csv(path)

    data = data.drop(columns=["digitalfilepath", "filmfilepath", "predictedfilepath"])

    # Make index from 1-37 replicated num_runs times
    idx = list(range(1, 38)) * data.run_name.nunique()
    data["image_idx"] = idx

    # Add run names
    runs = [run for run in runs if run.name in data["run_name"].unique()]
    tags = pd.Series([" - ".join(set(run.tags) - set(["Combined"])) for run in runs]).repeat(37).reset_index(drop=True)
    with_noise = pd.Series([run.config["model"]["net"]["with_noise"] for run in runs]).repeat(37).reset_index(drop=True)
    data["tags"] = tags
    data["with_noise"] = with_noise

    # Grab baselines
    baselines_cols = ["baseline_lpips", "baseline_psnr", "baseline_ssim", "baseline_pieapp"]
    baselines = data.iloc[:37, 5:-1].rename(lambda x: x.replace("baseline_", ""), axis=1)
    baselines["image_idx"] = idx[:37]
    baselines["run_name"] = "baseline"
    baselines["tags"] = "Baseline"
    baselines["with_noise"] = False
    data = data.drop(columns=baselines_cols)

    return pd.concat([data, baselines], axis=0).reset_index(drop=True)

In [None]:
full_data_raw = load_full_results("outputs/full-data.csv", runs)
# Split tags into resize and loss column
full_data_raw["resize"] = full_data_raw["tags"].apply(lambda x: int("Resized" in x))
full_data_raw["loss"] = full_data_raw["tags"].apply(lambda x: x.replace("- Resized", "").strip())

# Make noise column boolean
full_data_raw["with_noise"] = full_data_raw["with_noise"].astype(int)

# Sort by loss
full_data = full_data_raw.drop(columns=["tags"])
full_data.sort_values(by="ssim", inplace=True)
full_data

In [None]:
# Find mean and std for each metric
# With std
# grouped_full_data = full_data.groupby(["run_name", "loss", "resize", "with_noise"]).agg(
#     {"lpips": ["mean", "std"],
#      "psnr": ["mean", "std"],
#      "ssim": ["mean", "std"],
#      "pieapp": ["mean", "std"]}).sort_values(by=("ssim", "mean"), ascending=False)


# Only mean
grouped_full_data = full_data.groupby(["run_name", "loss", "resize", "with_noise"]).agg(
    {"lpips": "mean",
     "psnr": "mean",
     "ssim": "mean",
     "pieapp": "mean"}).sort_values(by="ssim", ascending=False).reset_index()
grouped_full_data

In [None]:
plot_losses(grouped_full_data, "Full Data Losses", "psnr", "loss", with_noise=0, resize=0) # "outputs/full-data-losses.png")

In [None]:
# TODO: Do some plotting here

# Probably useful to pivot the data
# full_data.melt(id_vars=["run_name", "tags", "with_noise"], var_name="metric", value_name="value")