# re-photo hybrid training notebook

this notebook builds and evaluates the hybrid detection stack.


In [2]:
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().resolve()
if PROJECT_ROOT.name == "notebooks":
    PROJECT_ROOT = PROJECT_ROOT.parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))


In [3]:
from IPython.display import display

from src.pipeline import (
    PipelineConfig,
    run_full_pipeline,
    evaluate_leave_one,
    predict_with_bundle,
    summarize_metrics,
    evaluate_on_test_set,
)


In [None]:
DATA_DIR = PROJECT_ROOT / "data"
EXIF_CSV = DATA_DIR / "exif_metadata.csv"
TEST_DIR = DATA_DIR / "test"
TEST_EXIF = TEST_DIR / "test_exif_metadata.csv"
ARTIFACTS_DIR = PROJECT_ROOT / "artifacts"

config = PipelineConfig(
    data_dir=DATA_DIR,
    exif_csv=EXIF_CSV,
    artifacts_dir=ARTIFACTS_DIR,
    test_data_dir=TEST_DIR,
    test_exif_csv=TEST_EXIF,
    tile_size=512,
    tile_stride=256,
    max_tiles_per_image=24,
    device="mps",
)
config.force_index_recompute = False
config.force_feature_recompute = False
config


PipelineConfig(data_dir=PosixPath('/Users/mohidtanveer/Downloads/227/final/rephoto-detection/data'), exif_csv=PosixPath('/Users/mohidtanveer/Downloads/227/final/rephoto-detection/data/exif_metadata.csv'), artifacts_dir=PosixPath('/Users/mohidtanveer/Downloads/227/final/rephoto-detection/artifacts'), tile_size=512, tile_stride=256, max_tiles_per_image=24, force_feature_recompute=False, force_index_recompute=False, device='mps', test_data_dir=PosixPath('/Users/mohidtanveer/Downloads/227/final/rephoto-detection/data/test'), test_exif_csv=PosixPath('/Users/mohidtanveer/Downloads/227/final/rephoto-detection/data/test/test_exif_metadata.csv'))

## training with metrics on training set and evaluation on test set

In [None]:
store, bundle = run_full_pipeline(config)
predictions = predict_with_bundle(store, bundle)
display(summarize_metrics(predictions))

if config.test_data_dir and config.test_exif_csv:
    test_store, test_metrics = evaluate_on_test_set(config, bundle)
    display(test_metrics)

Unnamed: 0,signal,auc,fpr_at_95_tpr
0,moire,0.981418,0.071038
1,subpixel,0.892931,0.480874
2,exif,1.0,0.0
3,hybrid,1.0,0.0


Unnamed: 0,signal,auc,fpr_at_95_tpr
0,moire,0.985526,0.15
1,subpixel,0.842105,0.5
2,exif,1.0,0.0
3,hybrid,1.0,0.0


In [None]:
# leave-one-display-type-out evaluation
lod = evaluate_leave_one(store, config, "screen_type")
display(lod)

# leave-one-camera-out evaluation
loc = evaluate_leave_one(store, config, "camera_body")
display(loc)


Unnamed: 0,signal,auc,fpr_at_95_tpr,held_out_value,split_column
0,moire,0.824923,0.672727,lcd,screen_type
1,subpixel,0.719101,1.0,lcd,screen_type
2,exif,1.0,0.0,lcd,screen_type
3,hybrid,0.998774,0.018182,lcd,screen_type
4,moire,0.87262,0.418182,oled,screen_type
5,subpixel,0.76837,0.745455,oled,screen_type
6,exif,1.0,0.0,oled,screen_type
7,hybrid,1.0,0.0,oled,screen_type


Unnamed: 0,signal,auc,fpr_at_95_tpr,held_out_value,split_column
0,moire,0.754052,0.511364,Apple__iPhone 12 Pro Max,camera_body
1,subpixel,0.657505,0.988636,Apple__iPhone 12 Pro Max,camera_body
2,exif,1.0,0.0,Apple__iPhone 12 Pro Max,camera_body
3,hybrid,1.0,0.0,Apple__iPhone 12 Pro Max,camera_body
4,moire,0.907787,0.5,Apple__iPhone 14 Pro Max,camera_body
5,subpixel,0.702186,1.0,Apple__iPhone 14 Pro Max,camera_body
6,exif,1.0,0.0,Apple__iPhone 14 Pro Max,camera_body
7,hybrid,1.0,0.0,Apple__iPhone 14 Pro Max,camera_body
8,moire,0.856027,0.285714,Apple__iPhone 17 Pro,camera_body
9,subpixel,0.896577,0.464286,Apple__iPhone 17 Pro,camera_body


# individual metrics from test set evaluation

In [None]:
import pandas as pd

test_store, test_metrics = evaluate_on_test_set(config, bundle)
test_predictions = predict_with_bundle(test_store, bundle)

rows = []

# iterate through all pics in data/test (in subfolders data/test/authentic and data/test/re-photo/{ai-LCD, ai-OLED, authentic-LCD, authentic-OLED})
for path in (DATA_DIR / "test").glob("**/*.jpeg"):
    rows.append({
        "abs_path": str(path),
        "label_binary": path.parent.name,
        "moire_prob": 0.0,
        "subpixel_prob": 0.0,
        "exif_prob": 0.0,
        "hybrid_prob": 0.0,
    })

rows_by_path = {row["abs_path"]: row for row in rows}

for _, pred_row in test_predictions.iterrows():
    abs_path = pred_row["abs_path"]
    if abs_path not in rows_by_path:
        rows_by_path[abs_path] = {
            "abs_path": abs_path,
            "label_binary": pred_row["label_binary"],
            "moire_prob": pred_row["moire_prob"],
            "subpixel_prob": pred_row["subpixel_prob"],
            "exif_prob": pred_row["exif_prob"],
            "hybrid_prob": pred_row["hybrid_prob"],
        }
    else:
        rows_by_path[abs_path].update({
            "label_binary": pred_row["label_binary"],
            "moire_prob": pred_row["moire_prob"],
            "subpixel_prob": pred_row["subpixel_prob"],
            "exif_prob": pred_row["exif_prob"],
            "hybrid_prob": pred_row["hybrid_prob"],
        })

test_predictions_df = pd.DataFrame(list(rows_by_path.values()))

display(test_predictions_df)

Unnamed: 0,abs_path,label_binary,moire_prob,subpixel_prob,exif_prob,hybrid_prob
0,/Users/mohidtanveer/Downloads/227/final/rephot...,0,0.013169,0.049382,0.0,0.047115
1,/Users/mohidtanveer/Downloads/227/final/rephot...,0,0.017517,0.620177,0.00625,0.090191
2,/Users/mohidtanveer/Downloads/227/final/rephot...,0,0.018016,0.656521,0.010289,0.095075
3,/Users/mohidtanveer/Downloads/227/final/rephot...,0,0.151206,0.384829,0.0015,0.092056
4,/Users/mohidtanveer/Downloads/227/final/rephot...,0,0.34479,0.162529,0.032598,0.121342
5,/Users/mohidtanveer/Downloads/227/final/rephot...,0,0.138857,0.158011,0.0,0.070014
6,/Users/mohidtanveer/Downloads/227/final/rephot...,0,0.003735,0.087185,0.006229,0.049156
7,/Users/mohidtanveer/Downloads/227/final/rephot...,0,0.163029,0.22437,0.002583,0.079923
8,/Users/mohidtanveer/Downloads/227/final/rephot...,0,0.030036,0.699598,0.002857,0.099553
9,/Users/mohidtanveer/Downloads/227/final/rephot...,0,0.251419,0.418613,0.002857,0.118165
