## Which images cause most errors during cross testing for LCC and for SiW?

### Some helper functions

In [None]:
import json
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image


def find_worst_images(scores_path, label, n=10):
    test_results = json.load(open(scores_path))
    df = pd.DataFrame(test_results["scores"])
    df = df[df["label"] == label]

    if label == 0:
        df = df.sort_values(by="score", ascending=False)
    elif label == 1:
        df = df.sort_values(by="score")
    else:
        raise ValueError("label must be 0 or 1")

    unique_videos = []
    sorted_images = []

    for index, row in df.iterrows():
        if "siwm2lcc" in scores_path:
            # In LCC folder names are just real/spoof so I take the video name from the image name, e.g.: YOUTUBE_id124_s0
            filename = row["name"].split("/")[4]
            video_name = "_".join(filename.split("_")[:-1])
        else:
            # In SiW I only take different videos e.g.: Live_85
            video_name = os.path.dirname(row["name"]).split("/")[-1]
        if video_name not in unique_videos:
            sorted_images.append(row["name"])
            unique_videos.append(video_name)
            if len(sorted_images) == n:
                break

    df_sorted = pd.DataFrame(sorted_images, columns=["name"])

    return df_sorted["name"].head(n)


def show_worst_images(img_paths, title):
    num_images = len(img_paths)
    num_rows = min(5, num_images)
    num_cols = (num_images + num_rows - 1) // num_rows

    fig, axs = plt.subplots(num_rows, num_cols, figsize=(15, 10))
    fig.subplots_adjust(wspace=0.4, hspace=0.4)

    for i, img_path in enumerate(img_paths):
        img = Image.open(img_path)
        img = img.resize((300, 300))
        row_idx = i // num_cols
        col_idx = i % num_cols
        axs[row_idx, col_idx].imshow(img)
        axs[row_idx, col_idx].axis("off")

        if "data/siwm/" in img_path:
            title_text = os.path.basename(img_path).split("--")[0]
        else:
            filename = img_path.split("/")[4]
            video_name = "_".join(filename.split("_")[:-1])
            title_text = video_name

        # Truncate the title if it's too long
        max_title_length = 15  # Define the maximum length for the title
        if len(title_text) > max_title_length:
            title_text = title_text[:max_title_length] + "..."

        axs[row_idx, col_idx].set_title(title_text)

    # Remove any empty subplots
    if num_images < num_rows * num_cols:
        for i in range(num_images, num_rows * num_cols):
            row_idx = i // num_cols
            col_idx = i % num_cols
            fig.delaxes(axs[row_idx, col_idx])

    plt.suptitle(title, fontsize=16)
    plt.show()

### LCC-to-SiWM

In [None]:
lcc2siwm_scores = "logs/lcc2siwm/stats/lcc2siwm-test/epoch_000.json"

#### Live Images

In [None]:
worst_live_images = find_worst_images(lcc2siwm_scores, label=1, n=30)
worst_live_images.to_csv("logs/lcc2siwm/stats/lcc2siwm-test/worst_live_images.csv")
show_worst_images(worst_live_images, "Worst live images - LCC2SIWM")

#### Spoof Images

In [None]:
worst_spoof_images = find_worst_images(lcc2siwm_scores, label=0, n=30)
worst_spoof_images.to_csv("logs/lcc2siwm/stats/lcc2siwm-test/worst_spoof_images.csv")
show_worst_images(worst_spoof_images, "Worst spoof images - LCC2SIWM")

### SiW-to-LCC

In [None]:
siwm2lcc_scores = "logs/siwm2lcc/stats/siwm2lcc-test/epoch_000.json"

#### Live Images

In [None]:
worst_live_images = find_worst_images(siwm2lcc_scores, label=1, n=30)
worst_live_images.to_csv("logs/siwm2lcc/stats/siwm2lcc-test/worst_live_images.csv")
show_worst_images(worst_live_images, "Worst live images - SIWM2LCC")

#### Spoof Images

In [None]:
worst_spoof_images = find_worst_images(siwm2lcc_scores, label=0, n=30)
worst_spoof_images.to_csv("logs/siwm2lcc/stats/siwm2lcc-test/worst_spoof_images.csv")
show_worst_images(worst_spoof_images, "Worst spoof images - SIWM2LCC")

## LoRA

### LCC-to-SiWM

In [None]:
os.chdir(os.getenv("HOME") + "/spoof")
lcc2siwm_scores = "logs/lora/lora_lcc2siwm/LCC-to-SiW-M.json"

In [None]:
worst_live_images = find_worst_images(lcc2siwm_scores, label=1, n=30)
worst_live_images.to_csv("logs/lora/lora_lcc2siwm/worst_live_images.csv")
show_worst_images(worst_live_images, "Worst live images - LCC2SIWM - LORA")

In [None]:
worst_spoof_images = find_worst_images(lcc2siwm_scores, label=0, n=30)
worst_spoof_images.to_csv("logs/lora/lora_lcc2siwm/worst_spoof_images.csv")
show_worst_images(worst_spoof_images, "Worst spoof images - LCC2SIWM - LORA")

### SiWM-to-LCC

In [None]:
siwm2lcc_scores = "logs/lora/lora_siwm2lcc/stats/test/s2l_lora.json"

In [None]:
worst_live_images = find_worst_images(siwm2lcc_scores, label=1, n=30)
worst_live_images.to_csv("logs/lora/lora_siwm2lcc/worst_live_images.csv")
show_worst_images(worst_live_images, "Worst live images - SIWM2LCC - LORA")

In [None]:
worst_spoof_images = find_worst_images(siwm2lcc_scores, label=0, n=30)
worst_spoof_images.to_csv("logs/lora/lora_siwm2lcc/worst_spoof_images.csv")
show_worst_images(worst_spoof_images, "Worst spoof images - SIWM2LCC - LORA")