In [1]:
import os
import gc
import psutil
import torch
import torch.nn as nn
import torch.optim as optim
import xgboost as xgb
from sklearn.dummy import DummyClassifier
from torch.utils.data import Dataset, DataLoader, Subset
import torchvision.transforms as transforms
import torch.nn.functional as F
import mlflow
import time
import random
import mlflow.sklearn
import numpy as np
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.inspection import permutation_importance
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    classification_report,
)
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from torchmetrics import RetrievalMAP
from warnings import filterwarnings
from PIL import Image
from torchvision.models import (
    resnet18,
    resnet50,
    resnet152,
    ResNet152_Weights,
    ResNet50_Weights,
)

filterwarnings("ignore")

ModuleNotFoundError: No module named 'torchvision'

In [185]:
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

## Data Analysis

In [186]:
train_df = pd.read_parquet("data/joined_features_all.parquet")

In [187]:
train_df.shape

(1580470, 13)

In [188]:
train_df.sample(5)

Unnamed: 0,id,landmark_id,image_path,width,height,aspect_ratio,mean_rgb,mean_r,mean_g,mean_b,x,y,local_binary_pattern
1417511,f7baa81c30d8e87b,182098,f7baa81c30d8e87b.jpg,800,533,1.500938,[ 99.25129925 113.72908068 127.72841932],99.251299,113.729081,127.728419,-87.399628,-0.602296,"[0.03445825515947467,0.01547607879924953,0.015..."
360909,8caa5a43be355452,45743,8caa5a43be355452.jpg,600,800,0.75,[ 98.5718375 116.13153542 129.62851042],98.571838,116.131535,129.62851,57.22464,49.502514,"[0.04715625,0.023035416666666666,0.01896458333..."
1118141,f8f8f4f214d61a79,143595,f8f8f4f214d61a79.jpg,800,600,1.333333,[149.88622708 136.15529375 126.90377083],149.886227,136.155294,126.903771,66.818024,-44.120079,"[0.040654166666666665,0.018466666666666666,0.0..."
269071,e483549925003a76,34306,e483549925003a76.jpg,799,533,1.499062,[ 68.72720356 108.44995034 146.62469503],68.727204,108.44995,146.624695,74.2966,31.61492,"[0.03846271253701273,0.022537552804044455,0.01..."
1017378,25588a72b2a2e737,131213,25588a72b2a2e737.jpg,800,600,1.333333,[131.41666667 149.10111042 159.0215375 ],131.416667,149.10111,159.021537,-77.22876,-14.109613,"[0.030104166666666668,0.016960416666666665,0.0..."


In [189]:
# Make local_binary_pattern entries numpy ndarrays
def to_ndarray(v):
    s = v.strip().strip("[]")
    parts = s.replace(",", " ").split()
    return [float(x) for x in parts]

In [190]:
train_df["local_binary_pattern"] = train_df["local_binary_pattern"].apply(to_ndarray)

In [191]:
train_df["landmark_id"] = train_df["landmark_id"].astype(str)

In [192]:
def to_image_path(dir, id):
    return f"data/{dir}/{id[0]}/{id[1]}/{id[2]}/{id}.jpg"


def train_image_path(id):
    return to_image_path("train", id)


def test_image_path(id):
    return to_image_path("test", id)


def index_image_path(id):
    return to_image_path("index", id)

In [193]:
train_df["image_path"] = train_df["id"].apply(train_image_path)
train_df

Unnamed: 0,id,landmark_id,image_path,width,height,aspect_ratio,mean_rgb,mean_r,mean_g,mean_b,x,y,local_binary_pattern
0,17660ef415d37059,1,data/train/1/7/6/17660ef415d37059.jpg,533,800,0.666250,[126.00759381 119.0244606 113.26428471],126.007594,119.024461,113.264285,6.526572,-59.742741,"[0.05229596622889306, 0.027049718574108816, 0...."
1,92b6290d571448f6,1,data/train/9/2/b/92b6290d571448f6.jpg,534,800,0.667500,[97.5096559 93.19100421 86.4288764 ],97.509656,93.191004,86.428876,14.416226,-57.622620,"[0.06176498127340824, 0.027186329588014983, 0...."
2,cd41bf948edc0340,1,data/train/c/d/4/cd41bf948edc0340.jpg,800,512,1.562500,[89.43367188 83.05516602 74.40758057],89.433672,83.055166,74.407581,8.051970,-53.929276,"[0.05924560546875, 0.03013427734375, 0.0246411..."
3,fb09f1e98c6d2f70,1,data/train/f/b/0/fb09f1e98c6d2f70.jpg,532,800,0.665000,[107.91263863 106.76824483 109.26745771],107.912639,106.768245,109.267458,4.627127,-54.376423,"[0.050286654135338345, 0.02482142857142857, 0...."
4,25c9dfc7ea69838d,7,data/train/2/5/c/25c9dfc7ea69838d.jpg,800,600,1.333333,[132.4216875 137.05765 144.99947083],132.421687,137.057650,144.999471,-8.277364,-25.763720,"[0.036810416666666665, 0.022747916666666666, 0..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1580465,72c3b1c367e3d559,203092,data/train/7/2/c/72c3b1c367e3d559.jpg,800,533,1.500938,[128.88117026 127.38813086 116.54751407],128.881170,127.388131,116.547514,7.850999,29.970839,"[0.049305816135084425, 0.03048780487804878, 0...."
1580466,7a6a2d9ea92684a6,203092,data/train/7/a/6/7a6a2d9ea92684a6.jpg,800,532,1.503759,[127.55681156 124.83408835 114.69496711],127.556812,124.834088,114.694967,-0.866286,31.105888,"[0.04551691729323308, 0.025406484962406016, 0...."
1580467,9401fad4c497e1f9,203092,data/train/9/4/0/9401fad4c497e1f9.jpg,800,533,1.500938,[129.93018058 130.93818246 127.94207083],129.930181,130.938182,127.942071,-10.209307,-1.973596,"[0.039880393996247654, 0.023184803001876173, 0..."
1580468,aacc960c9a228b5f,203092,data/train/a/a/c/aacc960c9a228b5f.jpg,800,533,1.500938,[142.47123358 142.97529784 136.52950516],142.471234,142.975298,136.529505,70.599495,50.996376,"[0.03226547842401501, 0.020316604127579737, 0...."


In [119]:
landmark_counts = train_df["landmark_id"].value_counts()
landmark_counts.head()

landmark_id
138982    6272
126637    2231
20409     1758
83144     1741
113209    1135
Name: count, dtype: int64

In [196]:
unique_landmarks = train_df["landmark_id"].unique()
len(unique_landmarks)

81313

There are 81313 unique landmarks in the dataset.

In [121]:
top25 = landmark_counts.head(25)
fig = px.bar(
    x=top25.index.astype(str),
    y=top25.values,
    labels={"x": "Landmark ID", "y": "Count"},
    title="Top 25 Landmark IDs by Count: 138982 has 3 times as much as next highest landmark",
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()
fig.write_image("images/landmark_count.png", height=500, width=1200)

The fact that landmark 138982 has 3 times as many images as the next highest landmark is concerning. We should keep an eye on that and may need to reduce the number of samples from that landmark id.

In [None]:
fig = px.histogram(
    train_df,
    x="aspect_ratio",
    nbins=20,
    labels={"aspect_ratio": "Aspect Ratio", "count": "Count"},
    title="Aspect ratio (proportions) is similar for most images",
)
fig.update_layout(height=500, width=1200)
fig.show()
fig.write_image("images/aspect_ratio_histogram.png", height=500, width=1200)

In [None]:
fig = make_subplots(
    rows=1,
    cols=3,
    subplot_titles=("Mean R vs Mean G", "Mean G vs Mean B", "Mean R vs Mean B"),
)

trace_rg = px.scatter(train_df, x="mean_r", y="mean_g").data[0]
trace_gb = px.scatter(train_df, x="mean_g", y="mean_b").data[0]
trace_rb = px.scatter(train_df, x="mean_r", y="mean_b").data[0]

fig.add_trace(trace_rg, row=1, col=1)
fig.add_trace(trace_gb, row=1, col=2)
fig.add_trace(trace_rb, row=1, col=3)

fig.update_xaxes(title_text="Mean R", row=1, col=1)
fig.update_yaxes(title_text="Mean G", row=1, col=1)
fig.update_xaxes(title_text="Mean G", row=1, col=2)
fig.update_yaxes(title_text="Mean B", row=1, col=2)
fig.update_xaxes(title_text="Mean R", row=1, col=3)
fig.update_yaxes(title_text="Mean B", row=1, col=3)

fig.update_layout(
    title_text="Pairwise relationships among Mean R, Mean G, and Mean B across all images. Looks noisy at first",
)

fig.show()
fig.write_image(
    "images/color_channel_pairwise_all_landmarks.png", height=500, width=1200
)

In [197]:
selected_ids = unique_landmarks[:5]
five_landmarks_df = train_df[train_df["landmark_id"].isin(selected_ids)].copy()

These are the landmarks selected by the above code. Since we are dealing with over 80,000 unique landmarks and over 1.5 million images, I will limit our analysis to a subset of landmarks.
- Landmark 1  
![Landmark 1](data/train/1/7/6/17660ef415d37059.jpg)

- Landmark 7  
![Landmark 7](data/train/2/8/b/28b13f94a6f1f3c1.jpg)

- Landmark 9  
![Landmark 9](data/train/0/1/9/0193b65bb58d2c77.jpg)

- Landmark 11  
![Landmark 11](data/train/1/a/6/1a6cb1deed46bb17.jpg)

- Landmark 12  
![Landmark 12](data/train/1/4/9/1492a5d344495391.jpg)


In [None]:
fig = make_subplots(
    rows=1,
    cols=3,
    subplot_titles=("Mean R vs Mean G", "Mean G vs Mean B", "Mean R vs Mean B"),
)

fig_rg = px.scatter(five_landmarks_df, x="mean_r", y="mean_g", color="landmark_id")
for tr in fig_rg.data:
    fig.add_trace(tr, row=1, col=1)

fig_gb = px.scatter(five_landmarks_df, x="mean_g", y="mean_b", color="landmark_id")
for tr in fig_gb.data:
    tr.showlegend = False
    fig.add_trace(tr, row=1, col=2)

fig_rb = px.scatter(five_landmarks_df, x="mean_r", y="mean_b", color="landmark_id")
for tr in fig_rb.data:
    tr.showlegend = False
    fig.add_trace(tr, row=1, col=3)

fig.update_xaxes(title_text="Mean R", row=1, col=1)
fig.update_yaxes(title_text="Mean G", row=1, col=1)
fig.update_xaxes(title_text="Mean G", row=1, col=2)
fig.update_yaxes(title_text="Mean B", row=1, col=2)
fig.update_xaxes(title_text="Mean R", row=1, col=3)
fig.update_yaxes(title_text="Mean B", row=1, col=3)

fig.update_layout(
    title_text="Color channels across 5 landmarks. Indicates color channels could be leverage for landmark identification",
)

fig.show()
fig.write_image("images/color_channel_pairwise_5_landmarks.png", height=500, width=1200)

In [None]:
fig = px.scatter_3d(
    five_landmarks_df,
    x="mean_r",
    y="mean_g",
    z="mean_b",
    color="landmark_id",
    title="3D Scatter of mean RGB for 5 Landmark IDs",
)
fig.update_layout(width=1200, height=900)
fig.show()

In [124]:
def mean_histogram(arrs):
    stacked = np.vstack(arrs)
    return stacked.mean(axis=0)

In [125]:
# Plot the Mean Local Binary Pattern histogram
lbp_df = five_landmarks_df.dropna(subset=["local_binary_pattern"]).copy()

first_hist = lbp_df.iloc[0]["local_binary_pattern"]
n_bins = len(first_hist)
# Get histogram mean grouped by landmark
mean_hists = lbp_df.groupby("landmark_id", observed=True)["local_binary_pattern"].apply(
    mean_histogram
)

records = []
for landmark_id, hist in mean_hists.items():
    for i, v in enumerate(hist):
        records.append({"landmark_id": landmark_id, "bin": i, "value": float(v)})
plot_df = pd.DataFrame(records)

fig = px.line(
    plot_df,
    x="bin",
    y="value",
    color="landmark_id",
    markers=True,
    labels={
        "bin": "Local Binary Pattern Bin",
        "value": "Mean Frequency",
        "landmark_id": "Landmark ID",
    },
    title="Mean Local Binary Pattern Histogram per Landmark. Bin 12 & 13 shows differences in texture",
)
fig.update_layout(width=1200, height=700)
fig.show()
fig.write_image("images/lbp_mean_histogram_5_landmarks.png", width=1200, height=700)

Local Binary Patterns is a texture descriptor. It does this by comparing a pixel to its neighboring pixels. It captures the intensity of each pixel and compares their intensities. It will assign 1 if the neighboring pixel's intensity is greater than the pixel currently being assessed. Otherwise, LBP will assign 0. The algorithm then combines the binary values of all the neighboring pixels to create a value for the pixel being assessed. It does this for all the pixels in the image to create a binary code representing the texture of the image.

In [None]:
fig = px.scatter(
    five_landmarks_df,
    x="x",
    y="y",
    color="landmark_id",
    labels={"x": "Embedding 2d X", "y": "Embedding 2d Y", "landmark_id": "Landmark ID"},
    title="2d Embedding shows promise in clustering images by embeddings",
)
fig.update_layout(width=1200, height=700)
fig.show()
fig.write_image("images/embedding_2d_scatter_5_landmarks.png", width=1200, height=700)

In [None]:
fig = px.scatter(
    train_df[train_df["landmark_id"].isin(unique_landmarks[:25])],
    x="x",
    y="y",
    color="landmark_id",
    labels={"x": "Embedding 2d X", "y": "Embedding 2d Y", "landmark_id": "Landmark ID"},
    title="2d Embedding shows promise in clustering images by embeddings 25 landmarks",
)
fig.update_layout(width=1200, height=700)
fig.show()
fig.write_image("images/embedding_2d_scatter_25_landmarks.png", width=1200, height=700)

A 2D embedding squashes an image down to just two numbers: X and Y. Each image becomes a single point on a 2D plane. I used t-SNE to achieve this, which, like principal component analysis, reduces dimensionality. We take the full embedding of an image and convert that full embedding down to two features.


### Modeling

In [198]:
mlflow.set_experiment("Landmark Recognition")

<Experiment: artifact_location='file:///mnt/c/Users/Matt/workspace/landmarks/mlruns/745548148810810489', creation_time=1756780167672, experiment_id='745548148810810489', last_update_time=1756780167672, lifecycle_stage='active', name='Landmark Recognition', tags={}>

In [199]:
# Expand local_binary_pattern into separate columns for modeling
lbp_expanded = pd.DataFrame(
    train_df["local_binary_pattern"].tolist(), index=train_df.index
)
lbp_expanded.columns = [f"lbp_{i}" for i in range(lbp_expanded.shape[1])]

train_df_expanded_lbp = pd.concat(
    [train_df.drop(columns=["local_binary_pattern"]), lbp_expanded], axis=1
)

In [11]:
train_df_expanded_lbp

Unnamed: 0,id,landmark_id,image_path,width,height,aspect_ratio,mean_rgb,mean_r,mean_g,mean_b,...,lbp_16,lbp_17,lbp_18,lbp_19,lbp_20,lbp_21,lbp_22,lbp_23,lbp_24,lbp_25
0,17660ef415d37059,1,data/train/1/7/6/17660ef415d37059.jpg,533,800,0.666250,[126.00759381 119.0244606 113.26428471],126.007594,119.024461,113.264285,...,0.007265,0.019275,0.009409,0.010033,0.011091,0.015943,0.022183,0.029939,0.203417,0.433513
1,92b6290d571448f6,1,data/train/9/2/b/92b6290d571448f6.jpg,534,800,0.667500,[97.5096559 93.19100421 86.4288764 ],97.509656,93.191004,86.428876,...,0.011941,0.013977,0.011393,0.013102,0.015047,0.020524,0.025602,0.027809,0.071444,0.455536
2,cd41bf948edc0340,1,data/train/c/d/4/cd41bf948edc0340.jpg,800,512,1.562500,[89.43367188 83.05516602 74.40758057],89.433672,83.055166,74.407581,...,0.010195,0.014504,0.009797,0.010339,0.011846,0.017148,0.024343,0.031929,0.086533,0.488169
3,fb09f1e98c6d2f70,1,data/train/f/b/0/fb09f1e98c6d2f70.jpg,532,800,0.665000,[107.91263863 106.76824483 109.26745771],107.912639,106.768245,109.267458,...,0.009030,0.024250,0.011074,0.011675,0.012620,0.017000,0.021339,0.027850,0.202655,0.404803
4,25c9dfc7ea69838d,7,data/train/2/5/c/25c9dfc7ea69838d.jpg,800,600,1.333333,[132.4216875 137.05765 144.99947083],132.421687,137.057650,144.999471,...,0.014794,0.028979,0.013806,0.013513,0.014046,0.017329,0.021167,0.024594,0.101777,0.371237
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1580465,72c3b1c367e3d559,203092,data/train/7/2/c/72c3b1c367e3d559.jpg,800,533,1.500938,[128.88117026 127.38813086 116.54751407],128.881170,127.388131,116.547514,...,0.011027,0.018539,0.010720,0.011100,0.011914,0.016867,0.023136,0.030579,0.114010,0.454266
1580466,7a6a2d9ea92684a6,203092,data/train/7/a/6/7a6a2d9ea92684a6.jpg,800,532,1.503759,[127.55681156 124.83408835 114.69496711],127.556812,124.834088,114.694967,...,0.011494,0.024232,0.012446,0.012303,0.012761,0.016833,0.022000,0.027133,0.159159,0.409908
1580467,9401fad4c497e1f9,203092,data/train/9/4/0/9401fad4c497e1f9.jpg,800,533,1.500938,[129.93018058 130.93818246 127.94207083],129.930181,130.938182,127.942071,...,0.008391,0.025000,0.010544,0.010973,0.011515,0.015701,0.020115,0.024430,0.290068,0.353084
1580468,aacc960c9a228b5f,203092,data/train/a/a/c/aacc960c9a228b5f.jpg,800,533,1.500938,[142.47123358 142.97529784 136.52950516],142.471234,142.975298,136.529505,...,0.013895,0.037812,0.016930,0.015894,0.014125,0.017326,0.019805,0.022676,0.239557,0.320035


In [200]:
# earlier training shows that my machine cant train such a large model.
unique_landmarks_to_train_on = 500
rng = np.random.default_rng(42)
selected_landmark_ids = rng.choice(
    train_df_expanded_lbp["landmark_id"],
    size=unique_landmarks_to_train_on,
    replace=False,
)
subset_train_df = train_df_expanded_lbp[
    train_df_expanded_lbp["landmark_id"].isin(selected_landmark_ids)
]

In [13]:
subset_train_df

Unnamed: 0,id,landmark_id,image_path,width,height,aspect_ratio,mean_rgb,mean_r,mean_g,mean_b,...,lbp_16,lbp_17,lbp_18,lbp_19,lbp_20,lbp_21,lbp_22,lbp_23,lbp_24,lbp_25
8312,0e494985dc4e6f5f,1122,data/train/0/e/4/0e494985dc4e6f5f.jpg,800,530,1.509434,[107.48516509 115.14127123 119.52160613],107.485165,115.141271,119.521606,...,0.010241,0.023519,0.011842,0.012182,0.011311,0.015205,0.020802,0.030292,0.187238,0.403177
8313,10bd9a894e3f7933,1122,data/train/1/0/b/10bd9a894e3f7933.jpg,800,554,1.444043,[116.80949458 114.3092148 108.06415614],116.809495,114.309215,108.064156,...,0.017676,0.016934,0.013973,0.014050,0.015314,0.019192,0.024070,0.026376,0.056153,0.396974
8314,1438a5d2e8d5a45a,1122,data/train/1/4/3/1438a5d2e8d5a45a.jpg,800,533,1.500938,[120.92359053 136.08054409 155.98484287],120.923591,136.080544,155.984843,...,0.011984,0.031541,0.013213,0.012570,0.012015,0.015959,0.018663,0.026398,0.223811,0.345804
8315,1521e26ad1f2039a,1122,data/train/1/5/2/1521e26ad1f2039a.jpg,800,531,1.506591,[160.66089454 151.15706685 141.67239642],160.660895,151.157067,141.672396,...,0.008133,0.012175,0.007561,0.008166,0.008865,0.011766,0.017585,0.027792,0.259769,0.402189
8316,168dccd87677b276,1122,data/train/1/6/8/168dccd87677b276.jpg,800,453,1.766004,[128.80704194 122.88684051 117.8172489 ],128.807042,122.886841,117.817249,...,0.007632,0.009352,0.007078,0.008971,0.008474,0.011890,0.019459,0.041347,0.067381,0.536747
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1577256,9e3c1f5380576164,202755,data/train/9/e/3/9e3c1f5380576164.jpg,800,533,1.500938,[ 92.11616088 115.2420849 118.0916651 ],92.116161,115.242085,118.091665,...,0.013100,0.020762,0.013295,0.013133,0.013140,0.016384,0.020755,0.029191,0.128049,0.423316
1577257,9e4d59bd37028681,202755,data/train/9/e/4/9e4d59bd37028681.jpg,800,580,1.379310,[125.89680172 141.21918534 158.78746767],125.896802,141.219185,158.787468,...,0.015136,0.029545,0.014950,0.013946,0.013159,0.016235,0.019093,0.024414,0.176453,0.353522
1577258,b02cf8181c2bacbf,202755,data/train/b/0/2/b02cf8181c2bacbf.jpg,533,800,0.666250,[139.19162992 154.37495779 165.99801595],139.191630,154.374958,165.998016,...,0.011189,0.034503,0.014357,0.014285,0.013586,0.017418,0.019463,0.020521,0.320976,0.285959
1577259,be593e70db6ae6be,202755,data/train/b/e/5/be593e70db6ae6be.jpg,800,533,1.500938,[ 92.85430582 112.72290572 128.30803471],92.854306,112.722906,128.308035,...,0.013520,0.035305,0.015556,0.014646,0.013940,0.017341,0.020000,0.024965,0.173480,0.365258


In [201]:
# Balance the number of rows per landmark_id by undersampling to the smallest class size
if not subset_train_df.empty:
    min_count = 500
    subset_train_df = (
        subset_train_df.groupby("landmark_id", group_keys=False)
        .apply(lambda df: df.sample(n=min_count, random_state=42, replace=True))
        .reset_index(drop=True)
    )
subset_train_df.drop_duplicates(inplace=True)
subset_train_df

Unnamed: 0,id,landmark_id,image_path,width,height,aspect_ratio,mean_rgb,mean_r,mean_g,mean_b,...,lbp_16,lbp_17,lbp_18,lbp_19,lbp_20,lbp_21,lbp_22,lbp_23,lbp_24,lbp_25
0,c43293071120312f,100178,data/train/c/4/3/c43293071120312f.jpg,800,600,1.333333,[130.05063125 127.90832292 129.47630208],130.050631,127.908323,129.476302,...,0.017477,0.024044,0.014650,0.014444,0.014440,0.017487,0.021917,0.025435,0.084173,0.398675
1,6ef6b0374cdca938,100178,data/train/6/e/f/6ef6b0374cdca938.jpg,600,800,0.750000,[152.53545 144.20116667 143.01896042],152.535450,144.201167,143.018960,...,0.019435,0.036173,0.017879,0.016346,0.015227,0.017331,0.019315,0.021998,0.135931,0.327012
2,4aca6a91c0713159,100178,data/train/4/a/c/4aca6a91c0713159.jpg,600,800,0.750000,[129.86038958 124.06470625 119.3193125 ],129.860390,124.064706,119.319312,...,0.007027,0.006740,0.006725,0.007908,0.010227,0.015250,0.024704,0.036879,0.068973,0.544087
3,8c47c27cefc4a7ae,100178,data/train/8/c/4/8c47c27cefc4a7ae.jpg,465,800,0.581250,[ 96.94115054 114.19861559 108.64566129],96.941151,114.198616,108.645661,...,0.011315,0.021164,0.012473,0.013460,0.014659,0.018858,0.022511,0.024935,0.208030,0.379718
4,815a8ede0484b810,100178,data/train/8/1/5/815a8ede0484b810.jpg,600,800,0.750000,[148.07098125 148.48570833 146.89801042],148.070981,148.485708,146.898010,...,0.014602,0.019181,0.013915,0.014865,0.015783,0.019390,0.023756,0.024750,0.117644,0.409144
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245470,930ecb0c305f9cba,99759,data/train/9/3/0/930ecb0c305f9cba.jpg,600,799,0.750939,[140.78650396 143.98384647 137.66222153],140.786504,143.983846,137.662222,...,0.010524,0.018133,0.009927,0.010736,0.010622,0.014906,0.019451,0.031806,0.127824,0.436393
245481,8cd5f94c0ff2efcc,99759,data/train/8/c/d/8cd5f94c0ff2efcc.jpg,799,600,1.331667,[138.96232791 135.95003963 128.3319587 ],138.962328,135.950040,128.331959,...,0.010004,0.016166,0.009973,0.010834,0.011792,0.015215,0.019347,0.027461,0.216577,0.389007
245482,43aa8fa7b9fca65b,99759,data/train/4/3/a/43aa8fa7b9fca65b.jpg,799,600,1.331667,[136.92144973 114.97258865 103.60497497],136.921450,114.972589,103.604975,...,0.019399,0.021387,0.015738,0.015995,0.016852,0.019791,0.020745,0.019577,0.063008,0.337144
245485,b35b9d4dd698c65e,99759,data/train/b/3/5/b35b9d4dd698c65e.jpg,800,600,1.333333,[165.83801667 149.38724792 148.29259375],165.838017,149.387248,148.292594,...,0.010546,0.015038,0.009373,0.010179,0.010581,0.013440,0.018071,0.030177,0.153771,0.422708


In [271]:
# Use subset of landmarks to train
X = subset_train_df.drop(["landmark_id", "id", "image_path", "mean_rgb"], axis=1)
y = subset_train_df["landmark_id"].astype(int)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Encode labels to ensure they are 0-based consecutive integers
le = LabelEncoder()
# Fit LabelEncoder on all possible classes for Average precision score
le.fit(y)
y_train_encoded = le.transform(y_train)
y_test_encoded = le.transform(y_test)
tensor_target = torch.tensor(y_test_encoded, dtype=torch.long)

In [272]:
sklearn_models = [
    "Dummy Classifier",
    "Random Forest",
    "Logistic Regression",
    "SVC",
    "Gradient Boosting",
]

In [273]:
models = [
    (
        "Dummy Classifier",
        DummyClassifier(),
        (X_train, y_train),
        (X_test, y_test),
        (y_train_encoded, y_test_encoded),
    ),
    (
        "Random Forest",
        RandomForestClassifier(
            n_estimators=100, max_depth=7, n_jobs=-1, random_state=42
        ),
        (X_train, y_train),
        (X_test, y_test),
        (y_train_encoded, y_test_encoded),
    ),
    (
        "Logistic Regression",
        LogisticRegression(random_state=42),
        (X_train, y_train),
        (X_test, y_test),
        (y_train_encoded, y_test_encoded),
    ),
    (
        "SVC",
        SVC(probability=True, random_state=42),
        (X_train, y_train),
        (X_test, y_test),
        (y_train_encoded, y_test_encoded),
    ),
    (
        "Gradient Boosting",
        GradientBoostingClassifier(n_estimators=50, max_depth=1, random_state=42),
        (X_train, y_train),
        (X_test, y_test),
        (y_train_encoded, y_test_encoded),
    ),
]

In [274]:
reports = []

In [275]:
for model_name, model, train, test, y_encoded in models:
    X_train, y_train = train
    X_test, y_test = test
    y_train_encoded, y_test_encoded = y_encoded
    tensor_target = torch.tensor(y_test_encoded, dtype=torch.long)

    print(f"Model: {model_name}")
    start = time.perf_counter()
    model.fit(X_train, y_train)
    elapsed = time.perf_counter() - start
    print(f"Elapsed: {elapsed:.3f}s")

    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)

    report = classification_report(
        y_test_encoded, le.transform(y_pred), output_dict=True
    )

    num_classes = y_pred_proba.shape[1]
    num_samples = y_pred_proba.shape[0]

    preds_tensor = torch.tensor(y_pred_proba, dtype=torch.float32)
    preds_tensor = torch.nan_to_num(preds_tensor, nan=0.0, posinf=1.0, neginf=0.0)
    preds_flat = preds_tensor.reshape(-1)

    targets_class = torch.tensor(y_test_encoded, dtype=torch.long)
    target_onehot = F.one_hot(targets_class, num_classes=num_classes).to(torch.int32)
    target_flat = target_onehot.reshape(-1)

    indexes = (
        torch.arange(num_samples, dtype=torch.long)
        .unsqueeze(1)
        .expand(num_samples, num_classes)
        .reshape(-1)
    )

    retrieval_map = RetrievalMAP(top_k=5)
    average_precision = retrieval_map(preds_flat, target_flat, indexes).item()

    print("Retrieval mAP:", average_precision)
    reports.append({
        "report": report,
        "average_precision": average_precision,
        "model": model_name
    })


Model: Dummy Classifier
Elapsed: 0.006s
Retrieval mAP: 0.03208994120359421
Model: Random Forest
Elapsed: 2.074s
Retrieval mAP: 0.28036749362945557
Model: Logistic Regression
Elapsed: 12.865s
Retrieval mAP: 0.20726139843463898
Model: SVC
Elapsed: 101.594s
Retrieval mAP: 0.24436940252780914
Model: Gradient Boosting
Elapsed: 1672.397s
Retrieval mAP: 0.3048348128795624


In [None]:
reports[0]

In [276]:
for i, model_item in enumerate(models):
    model_name = model_item[0]
    model = model_item[1]
    X_train, _ = model_item[2]
    r =  [r for r in reports if r['model'] == model_name][0]
    report = r['report']
    average_precision = r['average_precision']
    mlflow.end_run()
    with mlflow.start_run(run_name=model_name):
        mlflow.log_params(model.get_params())
        mlflow.log_metric("accuracy", report["accuracy"])
        mlflow.log_metric("average_precision", average_precision)
        mlflow.log_metric("macro_f1", report["macro avg"]["f1-score"])
        mlflow.log_metric("macro_recall", report["macro avg"]["recall"])
        mlflow.log_metric("macro_precision", report["macro avg"]["precision"])
        mlflow.log_metric("weighted_f1", report["weighted avg"]["f1-score"])
        mlflow.log_metric("weighted_recall", report["weighted avg"]["recall"])
        mlflow.log_metric("weighted_precision", report["weighted avg"]["precision"])
        if model_name in sklearn_models:
            mlflow.sklearn.log_model(
                model, name=model_name, input_example=X_train.sample(1)
            )



## XGBoost

In [311]:
def train_eval_xgboost(model_name, dtrain, dval, params, y_train_encoded, y_test_encoded, reports):
    mlflow.end_run()
    with mlflow.start_run(run_name=model_name):
        xgboost_tree = xgb.train(
            params,
            dtrain,
            num_boost_round=100,
            evals=[(dval, 'val')],
            early_stopping_rounds=10,
            verbose_eval=10
        )


        y_pred = xgboost_tree.predict(dval)
        y_pred_flat = y_pred.reshape(-1)
        y_pred_classes = np.argmax(y_pred, axis=1)
        report = classification_report(
            y_test_encoded, y_pred_classes, output_dict=True
        )
        num_samples = y_pred.shape[0]
        num_classes = len(np.unique(y_train_encoded))
        targets_class = torch.tensor(y_test_encoded, dtype=torch.long)
        target_onehot = F.one_hot(targets_class, num_classes=num_classes).to(torch.int32)
        target_flat = target_onehot.reshape(-1)  # Shape: (num_samples * num_classes)

        indexes = (
            torch.arange(num_samples, dtype=torch.long)
            .unsqueeze(1)
            .expand(num_samples, num_classes)
            .reshape(-1)
        )

        retrieval_map = RetrievalMAP(top_k=5)
        average_precision = retrieval_map(
            torch.tensor(y_pred_flat, dtype=torch.float32),
            target_flat,
            indexes
        )

        print("Retrieval mAP:", average_precision)
        reports.append({
            "report": report,
            "average_precision": average_precision,
            "model": "XGBoost"
        })

        mlflow.log_params(params)
        mlflow.log_metric("accuracy", report["accuracy"])
        mlflow.log_metric("average_precision", average_precision)
        mlflow.log_metric("macro_f1", report["macro avg"]["f1-score"])
        mlflow.log_metric("macro_recall", report["macro avg"]["recall"])
        mlflow.log_metric("macro_precision", report["macro avg"]["precision"])
        mlflow.log_metric("weighted_f1", report["weighted avg"]["f1-score"])
        mlflow.log_metric("weighted_recall", report["weighted avg"]["recall"])
        mlflow.log_metric("weighted_precision", report["weighted avg"]["precision"])


In [312]:
dtrain = xgb.DMatrix(X_train, label=y_train_encoded)
dval = xgb.DMatrix(X_test, label=y_test_encoded)

params = {
    'objective': 'multi:softprob',
    'num_class': len(y.unique()),
    'eval_metric': 'mlogloss',
    'max_depth': 6,
    'eta': 0.1,
    'random_state': 42
}


train_eval_xgboost("XGBoost", dtrain, dval, params, y_train_encoded, y_test_encoded, reports)

[0]	val-mlogloss:4.99156
[10]	val-mlogloss:3.54718
[20]	val-mlogloss:3.14439
[30]	val-mlogloss:2.94506
[40]	val-mlogloss:2.82762
[50]	val-mlogloss:2.75399
[60]	val-mlogloss:2.70834
[70]	val-mlogloss:2.67811
[80]	val-mlogloss:2.65684
[90]	val-mlogloss:2.64287
[99]	val-mlogloss:2.63354
6367 491
[2.8496963e-06 3.5616722e-06 2.5072832e-06 1.5948148e-06 2.0984510e-06] (3126197,)
tensor([0, 0, 0, 0, 0], dtype=torch.int32) torch.Size([3126197])
tensor([0, 0, 0, 0, 0]) torch.Size([3126197])
Retrieval mAP: tensor(0.5345)


## Model Evaluation

In [313]:
# Build comparison DataFrame from `reports` and `models`


def get_metrics(report, average_precision, model_name):
    return [
        {
            "metric": "accuracy",
            "value": report["accuracy"],
            "model_name": model_name,
        },
        {
            "metric": "average_precision",
            "value": float(average_precision),
            "model_name": model_name,
        },
        {
            "metric": "macro_f1",
            "value": report["macro avg"]["f1-score"],
            "model_name": model_name,
        },
        {
            "metric": "macro_recall",
            "value": report["macro avg"]["recall"],
            "model_name": model_name,
        },
        {
            "metric": "macro_precision",
            "value": report["macro avg"]["precision"],
            "model_name": model_name,
        },
        {
            "metric": "weighted_f1",
            "value": report["weighted avg"]["f1-score"],
            "model_name": model_name,
        },
        {
            "metric": "weighted_recall",
            "value": report["weighted avg"]["recall"],
            "model_name": model_name,
        },
        {
            "metric": "weighted_precision",
            "value": report["weighted avg"]["precision"],
            "model_name": model_name,
        },
    ]


metrics_records = []
for report in reports:
    metrics_records += get_metrics(report['report'], report['average_precision'], report['model'])

metrics_df = pd.DataFrame(metrics_records)

In [224]:
fig = px.bar(
    metrics_df,
    x="model_name",
    y="value",
    color="metric",
    barmode="group",
    title="Model Metric Comparison: Gradient Boosting has best mAP score",
    labels={"Value": "Score", "model_name": "Model", "metric": "Metric"},
)
fig.update_layout(width=1200, height=600, legend_title_text="Metric")
fig.show()

fig.write_image("images/model_metrics_comparison.png", width=1200, height=600)

In [319]:
fig = px.bar(
    metrics_df,
    x="model_name",
    y="value",
    color="metric",
    barmode="group",
    title="Model Metric Comparison: XGBoost has best mAP score",
    labels={"Value": "Score", "model_name": "Model", "metric": "Metric"},
)
fig.update_layout(width=1200, height=600, legend_title_text="Metric")
fig.show()

fig.write_image("images/model_metrics_comparison2.png", width=1200, height=600)

In [230]:
# Load latest sklearn models logged to MLflow under the "Landmark Recognition" experiment
def load_most_recent_models():
    loaded_models = {}
    client = mlflow.tracking.MlflowClient()
    exp = client.get_experiment_by_name("Landmark Recognition")
    logged_models = client.search_logged_models(
        experiment_ids=[exp.experiment_id], filter_string="status != 'FAILED'"
    )
    logged_models.sort(key=lambda l: l.last_updated_timestamp)

    for logged_model in logged_models:
        name = logged_model.name
        if name in sklearn_models and name not in loaded_models:
            model = mlflow.sklearn.load_model(logged_model.model_uri)
            loaded_models[name] = model
    if loaded_models:
        print(f"Loaded {len(loaded_models)} model(s) from MLflow:")
        for name in loaded_models:
            print(f" - {name}")
    else:
        print(
            "No models could be loaded from MLflow runs in experiment 'Landmark Recognition'."
        )
    return loaded_models

In [231]:
loaded_models = load_most_recent_models()

Loaded 5 model(s) from MLflow:
 - Random Forest
 - Logistic Regression
 - SVC
 - Gradient Boosting
 - Dummy Classifier


In [232]:
loaded_models

{'Random Forest': RandomForestClassifier(max_depth=7, n_jobs=-1, random_state=42),
 'Logistic Regression': LogisticRegression(random_state=42),
 'SVC': SVC(probability=True, random_state=42),
 'Gradient Boosting': GradientBoostingClassifier(max_depth=1, n_estimators=50, random_state=42),
 'Dummy Classifier': DummyClassifier()}

In [233]:
forest_importance = {
    "Feature": loaded_models["Random Forest"].feature_names_in_,
    "Importance": np.abs(loaded_models["Random Forest"].feature_importances_),
}

forest_importance_df = pd.DataFrame(forest_importance)

forest_importance_df = forest_importance_df.sort_values("Importance", ascending=False)
fig = px.bar(
    forest_importance_df[:10],
    x="Importance",
    y="Feature",
    orientation="h",
    title="Top Ten Features RandomForest - Embedding & LBP important, as expected. Unexpectedly aspect ratio most important.",
)
fig.show()
fig.write_image("images/forest_top_features.png", width=1200, height=600)

In [234]:
gradient_boosting_importance = {
    "Feature": loaded_models["Gradient Boosting"].feature_names_in_,
    "Importance": np.abs(loaded_models["Gradient Boosting"].feature_importances_),
}

gradient_boosting_importance_df = pd.DataFrame(gradient_boosting_importance)

gradient_boosting_importance_df = gradient_boosting_importance_df.sort_values(
    "Importance", ascending=False
)
fig = px.bar(
    gradient_boosting_importance_df[:10],
    x="Importance",
    y="Feature",
    orientation="h",
    title="Top Ten Features Gradient Boosting - Embedding & LBP important, as expected.",
)
fig.show()
fig.write_image("images/gradient_top_features.png", width=1200, height=600)

In [235]:
logistic_regression_importance = {
    "Feature": loaded_models["Logistic Regression"].feature_names_in_,
    "Importance": np.abs(loaded_models["Logistic Regression"].coef_[0]),
}
logistic_regression_importance_df = pd.DataFrame(logistic_regression_importance)

logistic_regression_importance_df = logistic_regression_importance_df.sort_values(
    "Importance", ascending=False
)
fig = px.bar(
    logistic_regression_importance_df[:10],
    x="Importance",
    y="Feature",
    orientation="h",
    title="Top Ten Features Logistic Regression - 2D embedding had the strongest influence",
)
fig.show()
fig.write_image("images/logistic_regression_top_features.png", width=1200, height=600)

In [84]:
svc_permutation_importance = permutation_importance(
    loaded_models["SVC"], X, y, n_repeats=10, random_state=42
)

In [87]:
svc_importance = {
    "Feature": loaded_models["SVC"].feature_names_in_,
    "Importance": svc_permutation_importance.importances_mean,
}
svc_importance_df = pd.DataFrame(svc_importance)

svc_importance_df = svc_importance_df.sort_values("Importance", ascending=False)
fig = px.bar(
    svc_importance_df[:10],
    x="Importance",
    y="Feature",
    orientation="h",
    title="Top Ten Features SVC - Height and width had the most influence",
)
fig.show()
fig.write_image("images/svc_top_features.png", width=1200, height=600)

### Deep Learning Models

In [110]:
# Build a Siamese network
class SiameseNetwork(nn.Module):
    def __init__(
        self, resnet=None, embedding_size=128, weights=None, requires_grad=False
    ):
        super(SiameseNetwork, self).__init__()
        self.requires_grad = requires_grad
        if resnet and not weights:
            self.convolution_neural_network = resnet(pretrained=True)
        elif not weights:
            self.convolution_neural_network = resnet18(pretrained=True)
        elif resnet and weights:
            self.convolution_neural_network = resnet(weights=weights)
        else:
            self.convolution_neural_network = resnet18(weights=weights)

        for param in self.convolution_neural_network.parameters():
            param.requires_grad = self.requires_grad

        in_features = self.convolution_neural_network.fc.in_features
        self.convolution_neural_network.fc = nn.Linear(in_features, embedding_size)
        if not requires_grad:
            self.embedding_layer = torch.nn.Sequential(
                torch.nn.ReLU(), torch.nn.Linear(128, 128)
            )

    def forward_one(self, x):
        x = self.convolution_neural_network(x)
        if not self.requires_grad:
            x = self.embedding_layer(x)
        return x

    def forward(self, x1, x2):
        return self.forward_one(x1), self.forward_one(x2)


# Loss function for images (maybe try cosine similarity)
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, y1, y2, label):
        euclidean_dist = F.pairwise_distance(y1, y2)
        return torch.mean(
            (1 - label) * torch.pow(euclidean_dist, 2)
            + (label * torch.pow(torch.clamp(self.margin - euclidean_dist, min=0), 2))
        )


class CosineSimilarityLoss(nn.Module):
    def __init__(self):
        super(CosineSimilarityLoss, self).__init__()

    def forward(self, output1, output2, label):
        cosine_sim = F.cosine_similarity(output1, output2, dim=1)

        # For similar pairs (label=0), maximize similarity (minimize 1 - cosine_sim)
        # For dissimilar pairs (label=1), push similarity below margin
        loss_similar = (1 - label) * (1 - cosine_sim)
        loss_dissimilar = label * torch.clamp(cosine_sim - 0.5, min=0.0)

        # Average loss over the batch
        loss = torch.mean(loss_similar + loss_dissimilar)
        return loss


class SiameseDataset(Dataset):
    def __init__(self, dataset, preload=True):
        self.dataset = dataset
        self.preload = preload
        self.pairs = []
        self.label_dict = {}
        self.unique_landmarks = set()

        print("Started loading label_dict and unique_landmarks")
        for index in range(len(dataset)):
            y1 = self.dataset.get_label(index)
            self.unique_landmarks.add(y1)
            if y1 in self.label_dict:
                self.label_dict[y1].append(index)
            else:
                self.label_dict[y1] = [index]
        print("Completed loading label_dict and unique_landmarks")

        if self.preload:
            print("Started preloading SiameseDataset")
            for index in range(len(self.dataset)):
                index, index2, label = self.get_pair_indices_label(index)
                self.pairs.append((index, index2, label))
            print("Completed preloading SiameseDataset")

    def __getitem__(self, index):
        if self.preload:
            index1, index2, label = self.pairs[index]
        else:
            index1, index2, label = self.get_pair_indices_label(index)

        x1, y1 = self.dataset[index1]
        x2, y2 = self.dataset[index2]
        return x1, x2, torch.tensor(label, dtype=torch.float32)

    def __len__(self):
        return len(self.dataset)

    def get_pair_indices_label(self, index):
        y1 = self.dataset.get_label(index)
        same_landmark = random.randint(0, 1)
        if same_landmark:
            set_to_check = self.label_dict.get(y1)
            while True:
                index2 = random.choice(set_to_check)
                y2 = self.dataset.get_label(index2)
                if y1 == y2:
                    break
            label = 0
        else:
            while True:
                random_landmark = random.choice(list(self.unique_landmarks - set([y1])))
                set_to_check = self.label_dict.get(random_landmark)
                index2 = random.choice(set_to_check)
                y2 = self.dataset.get_label(index2)
                # Different landmark
                if y1 != y2:
                    break
            label = 1
        return index, index2, label

    def reset_pairs(self):
        print("Started reset pairs")
        self.pairs = []
        for index in range(len(self.dataset)):
            index, index2, label = self.get_pair_indices_label(index)
            self.pairs.append((index, index2, label))
        print("Completed reset pairs")


# Dataset
class LandmarksDataset(Dataset):
    def __init__(self, image_paths, landmark_ids, transform, preload=False):
        self.image_paths = image_paths
        self.landmark_ids = landmark_ids
        self.transforms = transform
        self.preload = preload
        if self.preload:
            print("Started preloading LandmarksDataset")
            self.images = [Image.open(path).convert("RGB") for path in image_paths]
            print("Completed preloading LandmarksDataset")

    def __getitem__(self, index):
        if self.preload:
            image = self.images[index]
        else:
            image = Image.open(self.image_paths[index]).convert("RGB")
        label = self.landmark_ids[index]
        transformed_image = self.transforms(image)
        return transformed_image, label

    def get_label(self, index):
        label = self.landmark_ids[index]
        return label

    def __len__(self):
        return len(self.image_paths)

In [246]:
def evaluate_siamese_model(
    model,
    test_loader,
    train_dataset,
    device,
    label_encoder,
    y_train_encoded,
    batch_size=8,
):
    start = time.perf_counter()

    # Ensure model is on the correct device
    model.to(device)
    model.eval()

    # Log dataset size and system memory
    n_samples = len(train_dataset)
    print(f"Dataset size: {n_samples}")
    print(f"Initial CPU Memory: {psutil.virtual_memory().percent}%")
    print(f"Initial GPU Memory: {torch.cuda.memory_allocated()/1024**3:.2f} GiB")

    # Step 1: Compute class-representative embeddings (mean per class) from training data
    try:
        num_classes = len(np.unique(y_train_encoded))
        class_embeddings = []
        embeddings_per_class = [[] for _ in range(num_classes)]
        train_loader = DataLoader(
            train_dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=0,
            pin_memory=False,
        )
        with torch.no_grad():
            for img, label in train_loader:
                img = img.to(device)
                emb = model.forward_one(img).cpu()
                for i, l in enumerate(label):
                    embeddings_per_class[l.item()].append(emb[i])
                del img, emb
                torch.cuda.empty_cache() if device.type == "cuda" else None
            gc.collect()

            for i in range(num_classes):
                if embeddings_per_class[i]:
                    class_emb = torch.stack(embeddings_per_class[i]).mean(dim=0)
                    class_embeddings.append(F.normalize(class_emb, p=2, dim=0))
                else:
                    # Use a small non-zero vector to avoid issues with empty classes
                    class_embeddings.append(
                        torch.zeros_like(embeddings_per_class[0][0]) + 1e-8
                    )
            # [num_classes, emb_dim]
            class_embeddings = torch.stack(class_embeddings)
            print(
                f"Class embeddings shape: {class_embeddings.shape}, Approx. size: {class_embeddings.numel() * 4 / 1024**3:.2f} GiB"
            )
    except RuntimeError as e:
        print(f"Error during class embeddings computation: {e}")
        return None, None

    # Step 2: Compute test embeddings
    try:
        test_embeddings = []
        test_labels = []
        with torch.no_grad():
            for img, label in test_loader:
                img = img.to(device)
                emb = model.forward_one(img).cpu()
                test_embeddings.append(emb)
                test_labels.append(label)
                del img, emb
                torch.cuda.empty_cache() if device.type == "cuda" else None
            gc.collect()

        test_embeddings = torch.cat(test_embeddings)
        test_labels = torch.cat(test_labels)
        print(
            f"Test embeddings shape: {test_embeddings.shape}, Approx. size: {test_embeddings.numel() * 4 / 1024**3:.2f} GiB"
        )
    except RuntimeError as e:
        print(f"Error during test embeddings computation: {e}")
        return None, None

    # Step 3: Compute pseudo-probabilities (similarities to class embeddings) on CPU
    sim_device = torch.device("cpu")
    try:
        test_embeddings_norm = F.normalize(test_embeddings, p=2, dim=1).to(sim_device)
        class_embeddings_norm = F.normalize(class_embeddings, p=2, dim=1).to(sim_device)
        y_pred_proba = torch.mm(
            test_embeddings_norm, class_embeddings_norm.T
        )  # [n_samples, num_classes]
        y_pred_proba = (y_pred_proba + 1) / 2  # Normalize to [0, 1]
        y_pred_proba = torch.nan_to_num(y_pred_proba, nan=0.0, posinf=1.0, neginf=0.0)
        print(
            f"y_pred_proba shape: {y_pred_proba.shape}, Approx. size: {y_pred_proba.numel() * 4 / 1024**3:.2f} GiB"
        )
    except RuntimeError as e:
        print(f"Error during pseudo-probabilities computation: {e}")
        return None, None

    # Step 4: Predict classes
    try:
        _, y_pred = torch.max(y_pred_proba, dim=1)
        y_pred = label_encoder.inverse_transform(
            y_pred.cpu().numpy()
        )  # Convert back to original labels
    except RuntimeError as e:
        print(f"Error during prediction: {e}")
        return None, None

    # Step 5: Classification report
    try:
        report = classification_report(
            test_labels.cpu().numpy(), label_encoder.transform(y_pred), output_dict=True
        )
    except RuntimeError as e:
        print(f"Error during classification report: {e}")
        return None, None

    # Step 6: Compute RetrievalMAP on CPU
    try:
        preds_flat = y_pred_proba.reshape(-1)
        targets_class = test_labels.to(torch.long)
        target_onehot = F.one_hot(targets_class, num_classes=num_classes).to(
            torch.int32
        )
        target_flat = target_onehot.reshape(-1)
        indexes = (
            torch.arange(len(train_dataset), dtype=torch.long)
            .unsqueeze(1)
            .expand(-1, num_classes)
            .reshape(-1)
        ).to(sim_device)

        retrieval_map = RetrievalMAP(top_k=5).to(sim_device)
        average_precision = retrieval_map(preds_flat, target_flat, indexes).item()
    except RuntimeError as e:
        print(f"Error during RetrievalMAP computation: {e}")
        return None, None

    # Cleanup
    del (
        test_embeddings,
        test_embeddings_norm,
        class_embeddings,
        class_embeddings_norm,
        y_pred_proba,
    )
    del preds_flat, targets_class, target_onehot, target_flat, indexes
    torch.cuda.empty_cache()
    gc.collect()

    elapsed = time.perf_counter() - start
    print(f"Elapsed: {elapsed:.3f}s")
    print("Retrieval mAP:", average_precision)
    print("Average precision:", average_precision)
    print(f"CPU Memory after evaluation: {psutil.virtual_memory().percent}%")
    print(f"Final GPU Memory: {torch.cuda.memory_allocated()/1024**3:.2f} GiB")

    return {**report, "average_precision": average_precision}

In [247]:
def find_similar_images(query_image, image_set, model, top_k=5):
    model.eval()
    query_embedding = model.forward_one(query_image.unsqueeze(0).to(device))
    distances = []
    for img in image_set:
        img_embedding = model.forward_one(img.unsqueeze(0).to(device))
        distances.append(F.pairwise_distance(query_embedding, img_embedding))
    return torch.argsort(torch.tensor(distances))[:top_k]

In [248]:
# Checkpointing functions
def save_checkpoint(model, optimizer, epoch, loss, checkpoint_dir, filename):
    os.makedirs(checkpoint_dir, exist_ok=True)
    checkpoint_path = os.path.join(checkpoint_dir, filename)
    torch.save(
        {
            "epoch": epoch,
            "model_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
            "loss": loss,
        },
        checkpoint_path,
    )
    # Log checkpoint to MLflow
    mlflow.log_artifact(checkpoint_path, artifact_path="checkpoints")


def load_checkpoint(model, optimizer, checkpoint_path, device):
    print(f"checkpoint path: {checkpoint_path}")
    checkpoint = torch.load(checkpoint_path, map_location=device)
    model.load_state_dict(checkpoint["model_state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    epoch = checkpoint["epoch"]
    loss = checkpoint["loss"]
    return epoch, loss

def load_checkpoint_epoch_loss(checkpoint_path):
    print(f"checkpoint path: {checkpoint_path}")
    checkpoint = torch.load(checkpoint_path)
    epoch = checkpoint["epoch"]
    loss = checkpoint["loss"]
    return epoch, loss

In [342]:
def train_pytorch_model(
        model_name,
        model,
        learning_rate,
        loss_function,
        total_epochs,
        train_loader,
        train_dataset,
        le,
        y_train,
        eval_only=False,
        deep_learning_model_reports=[]
):
    mlflow.end_run()
    torch.cuda.empty_cache()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    if model.requires_grad:
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    else:
        optimizer = optim.Adam(filter(lambda param: param.requires_grad, model.parameters()), lr=learning_rate)
    print(f'Set up complete for training {model_name}')
    # Check for existing checkpoints sequentially until a missing one is found
    last_existing_epoch = -1
    for epoch in range(total_epochs):
        checkpoint_file = (
            f"{model_name}_lr{learning_rate}_{loss_function.__class__.__name__}_{epoch}.pth"
        )
        checkpoint_path = os.path.join("checkpoints", checkpoint_file)
        if os.path.exists(checkpoint_path):
            last_existing_epoch = epoch
        else:
            break

    start_epoch = last_existing_epoch + 1

    if eval_only:
        with mlflow.start_run(
                run_name=f"{model_name}_lr_{learning_rate}_loss_{loss_function.__class__.__name__}_epochs_165"
        ):
            checkpoint_file = (
                f"{model_name}_lr{learning_rate}_{loss_function.__class__.__name__}_{last_existing_epoch}.pth"
            )
            checkpoint_path = os.path.join("checkpoints", checkpoint_file)
            if not os.path.exists(checkpoint_path):
                return
            start_epoch, last_loss = load_checkpoint(model, optimizer, checkpoint_path, device)
            # Eval metrics
            subset_size = 10000
            subset_indices = list(range(subset_size))
            subset_test_dataset = Subset(train_dataset, subset_indices)
            subset_test_loader = DataLoader(
                subset_test_dataset,
                batch_size=4,
                shuffle=False,
                num_workers=0,
                pin_memory=False,
                prefetch_factor=None
            )
            metrics = evaluate_siamese_model(model, subset_test_loader, subset_test_dataset, device, le, y_train)
            print(
                f"Metrics: Accuracy={metrics['accuracy']:.4f}, Precision={metrics["macro avg"]['precision']:.4f}, "
                f"Recall={metrics["macro avg"]['recall']:.4f}, F1={metrics["macro avg"]["f1-score"]:.4f}, average_precision={metrics['average_precision']:.4f}"
            )

            # Log metrics to MLflow
            mlflow.log_metric("train_loss", last_loss, step=start_epoch)
            mlflow.log_metric("accuracy", metrics["accuracy"])
            mlflow.log_metric("average_precision", metrics["average_precision"])
            mlflow.log_metric("macro_f1", metrics["macro avg"]["f1-score"])
            mlflow.log_metric("macro_recall", metrics["macro avg"]["recall"])
            mlflow.log_metric("macro_precision", metrics["macro avg"]["precision"])
            mlflow.log_metric("weighted_f1", metrics["weighted avg"]["f1-score"])
            mlflow.log_metric("weighted_recall", metrics["weighted avg"]["recall"])
            mlflow.log_metric("weighted_precision", metrics["weighted avg"]["precision"])
            deep_learning_model_reports.append({"model_name": model_name, **metrics})
            return

    if start_epoch >= total_epochs:
        print("Already completed this model")
        return

    if last_existing_epoch >= 0:
        # Load the latest existing checkpoint
        checkpoint_file = (
            f"{model_name}_lr{learning_rate}_{loss_function.__class__.__name__}_{last_existing_epoch}.pth"
        )
        checkpoint_path = os.path.join("checkpoints", checkpoint_file)
        start_epoch, last_loss = load_checkpoint(model, optimizer, checkpoint_path, device)
        start_epoch = start_epoch + 1  # continue after the loaded epoch
        print(f"Resuming from epoch {start_epoch}, last loss: {last_loss:.3f}")

    with mlflow.start_run(
            run_name=f"{model_name}_lr_{learning_rate}_loss_{loss_function.__class__.__name__}_epochs_{total_epochs}"
    ):
        # Log parameters
        mlflow.log_param("model_name", model_name)
        mlflow.log_param("learning_rate", learning_rate)
        mlflow.log_param("loss_function", loss_function.__class__.__name__)
        mlflow.log_param("epochs", total_epochs)
        mlflow.log_param("batch_size", train_loader.batch_size)
        print(
            f"Model: {model_name} - LR {learning_rate} - Loss {loss_function.__class__.__name__} - Epochs {total_epochs}"
        )

        for epoch in range(start_epoch, total_epochs):
            start = time.perf_counter()
            model.train()
            total_loss = 0.0

            for x1, x2, label in train_loader:
                x1, x2, label = x1.to(device), x2.to(device), label.to(device)

                optimizer.zero_grad()
                y1, y2 = model(x1, x2)
                loss = loss_function(y1, y2, label)
                loss.backward()
                optimizer.step()
                total_loss += loss.item()  # Save memory
                del x1
                del x2

            elapsed = time.perf_counter() - start
            avg_loss = total_loss / len(train_loader)
            print(f"Elapsed: {elapsed:.3f}s")
            print(f"Epoch [{epoch + 1}/{total_epochs}], Loss: {avg_loss:.3f}")

            # Save checkpoint for this epoch
            checkpoint_file = (
                f"{model_name}_lr{learning_rate}_{loss_function.__class__.__name__}_{epoch}.pth"
            )
            save_checkpoint(model, optimizer, epoch, avg_loss, "checkpoints", checkpoint_file)

            train_loader.dataset.reset_pairs()

        # Eval metrics
        subset_size = 10000
        subset_indices = list(range(subset_size))
        subset_test_dataset = Subset(train_dataset, subset_indices)
        subset_test_loader = DataLoader(
            subset_test_dataset,
            batch_size=4,
            shuffle=False,
            num_workers=0,
            pin_memory=False,
            prefetch_factor=None
        )
        metrics = evaluate_siamese_model(model, subset_test_loader, subset_test_dataset, device, le, y_train)
        print(
            f"Metrics: Accuracy={metrics['accuracy']:.4f}, Precision={metrics["macro avg"]['precision']:.4f}, "
            f"Recall={metrics["macro avg"]['recall']:.4f}, F1={metrics["macro avg"]["f1-score"]:.4f}, average_precision={metrics['average_precision']:.4f}"
        )

        # Log metrics to MLflow
        mlflow.log_metric("train_loss", avg_loss, step=epoch)
        mlflow.log_metric("accuracy", metrics["accuracy"])
        mlflow.log_metric("average_precision", average_precision)
        mlflow.log_metric("macro_f1", metrics["macro avg"]["f1-score"])
        mlflow.log_metric("macro_recall", metrics["macro avg"]["recall"])
        mlflow.log_metric("macro_precision", metrics["macro avg"]["precision"])
        mlflow.log_metric("weighted_f1", metrics["weighted avg"]["f1-score"])
        mlflow.log_metric("weighted_recall", metrics["weighted avg"]["recall"])
        mlflow.log_metric("weighted_precision", metrics["weighted avg"]["precision"])
        mlflow.pytorch.log_model(model, name=model_name)
        deep_learning_model_reports.append({model_name: metrics})




In [250]:
# Find the landmark_id(s) with the fewest rows in train_df
landmark_counts = train_df["landmark_id"].value_counts()
min_count = landmark_counts.min()
max_count = landmark_counts.max()
mean_count = landmark_counts.mean()
# least_landmark_ids = landmark_counts[landmark_counts == min_count].index.tolist()
print({"min_count": int(min_count), "max_count": max_count, "mean_count": mean_count})
px.box(landmark_counts)

{'min_count': 2, 'max_count': 6272, 'mean_count': 19.436867413574706}


In [93]:
# Remove landmarks with fewer than 500 image paths
counts = train_df["landmark_id"].value_counts()
keep_ids = counts[counts >= 50].index
new_train_df = train_df[train_df["landmark_id"].isin(keep_ids)].reset_index(drop=True)
new_train_df = new_train_df[["image_path", "landmark_id"]]

In [94]:
del train_df

In [95]:
new_train_df.shape

(675029, 2)

In [96]:
# Define transforms
transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ]
)

transforms_v2 = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

X = new_train_df["image_path"].tolist()
y = new_train_df["landmark_id"].astype(dtype=int).tolist()
del new_train_df
# Encode labels to ensure they are 0-based consecutive integers
le = LabelEncoder()
# Fit LabelEncoder on all possible classes for Average precision score
y = le.fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

train_set = LandmarksDataset(
    image_paths=X_train, landmark_ids=y_train, transform=transforms_v2, preload=False
)
test_set = LandmarksDataset(
    image_paths=X_test, landmark_ids=y_test, transform=transforms_v2, preload=False
)
siamese_train_dataset = SiameseDataset(train_set, preload=True)

siamese_test_dataset = SiameseDataset(test_set, preload=True)

train_loader = DataLoader(
    siamese_train_dataset, batch_size=4, shuffle=True, num_workers=4, pin_memory=True
)
test_loader = DataLoader(
    siamese_test_dataset, batch_size=4, shuffle=True, num_workers=4, pin_memory=True
)

print(f"train len {len(X_train)} test len {len(X_test)}")

Started loading label_dict and unique_landmarks
Completed loading label_dict and unique_landmarks
Started preloading SiameseDataset
Completed preloading SiameseDataset
Started loading label_dict and unique_landmarks
Completed loading label_dict and unique_landmarks
Started preloading SiameseDataset
Completed preloading SiameseDataset
train len 540023 test len 135006


In [97]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [135]:
# Define models
siamese_cnn_resnet18 = SiameseNetwork(requires_grad=True)
siamese_cnn_resnet50 = SiameseNetwork(resnet50, requires_grad=True)
siamese_cnn_resnet50_image_weights_frozen = SiameseNetwork(
    resnet50, weights=ResNet50_Weights.IMAGENET1K_V2, requires_grad=False
)
siamese_cnn_resnet50_image_weights = SiameseNetwork(
    resnet50, weights=ResNet50_Weights.IMAGENET1K_V2, requires_grad=True
)
siamese_cnn_resnet152 = SiameseNetwork(resnet152, requires_grad=True)
siamese_cnn_resnet152_image_weights = SiameseNetwork(
    resnet152, weights=ResNet152_Weights.IMAGENET1K_V2, requires_grad=True
)

In [265]:
# Define hyperparameters
learning_rates = [0.001, 0.01]
num_epochs = 20
loss_functions = [ContrastiveLoss(), CosineSimilarityLoss()]
dl_models = [
    (
        "Siamese CNN Resnet18",
        siamese_cnn_resnet18,
        learning_rates,
        loss_functions,
        num_epochs,
        train_loader,
        test_loader,
        test_set,
        le,
        y_train,
    ),
    (
        "Siamese CNN Resnet50",
        siamese_cnn_resnet50,
        learning_rates,
        loss_functions,
        num_epochs,
        train_loader,
        test_loader,
        test_set,
        le,
        y_train,
    ),
    (
        "Siamese CNN Resnet50 FC",
        siamese_cnn_resnet50_image_weights_frozen,
        [0.001],
        [ContrastiveLoss()],
        num_epochs,
        train_loader,
        test_loader,
        test_set,
        le,
        y_train,
    ),
    (
        "Siamese CNN Resnet152",
        siamese_cnn_resnet152,
        learning_rates,
        loss_functions,
        num_epochs,
        train_loader,
        test_loader,
        test_set,
        le,
        y_train,
    ),
    (
        "Siamese CNN Resnet152 ImageWeights",
        siamese_cnn_resnet152_image_weights,
        learning_rates,
        loss_functions,
        num_epochs,
        train_loader,
        test_loader,
        test_set,
        le,
        y_train,
    ),
    (
        "Siamese CNN Resnet50",
        siamese_cnn_resnet50_image_weights,
        learning_rates,
        loss_functions,
        num_epochs,
        train_loader,
        test_loader,
        test_set,
        le,
        y_train,
    ),
    (
        "Siamese CNN Resnet50 ImageWeights Full",
         SiameseNetwork(resnet50, requires_grad=True),
        [0.001],
        [ContrastiveLoss()],
        100,
        train_loader,
        test_loader,
        test_set,
        le,
        y_train,
    ),
]

In [None]:
# For training
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
torch.cuda.empty_cache()
gc.collect()
# Train each model
for dl_model in dl_models:
    (
        model_name,
        model,
        lrs,
        loss_funcs,
        epochs,
        train_loader,
        test_loader,
        test_set,
        le,
        y_train,
    ) = dl_model
    for lr in lrs:
        for loss_func in loss_funcs:
            train_pytorch_model(
                model_name,
                model,
                lr,
                loss_func,
                epochs,
                train_loader,
                test_set,
                le,
                y_train,
            )

In [None]:
# For metrics
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
torch.cuda.empty_cache()
gc.collect()
# Eval each model
deep_learning_model_reports = []
for dl_model in dl_models:
    (
        model_name,
        model,
        lrs,
        loss_funcs,
        epochs,
        train_loader,
        test_loader,
        test_set,
        le,
        y_train,
    ) = dl_model
    for lr in lrs:
        for loss_func in loss_funcs:
            train_pytorch_model(
                model_name,
                model,
                lr,
                loss_func,
                epochs,
                train_loader,
                test_set,
                le,
                y_train,
                eval_only=True,
                deep_learning_model_reports=deep_learning_model_reports
            )

Set up complete for training Siamese CNN Resnet18
checkpoint path: checkpoints/Siamese CNN Resnet18_lr0.001_ContrastiveLoss_9.pth
Dataset size: 10000
Initial CPU Memory: 73.6%
Initial GPU Memory: 0.93 GiB


In [341]:

metrics_records = []
for report in deep_learning_model_reports:
    metrics_records += get_metrics(report['report'], report['average_precision'], report['model'])

metrics_df = pd.DataFrame(metrics_records)
metrics_df

KeyError: 'report'

In [335]:
def get_mlflow_model_data():
    mlflow_models_array = []
    client = mlflow.tracking.MlflowClient()
    exp = client.get_experiment_by_name("Landmark Recognition")
    runs = client.search_runs(
        experiment_ids=[exp.experiment_id],
        filter_string="attributes.status = 'FINISHED'",
        order_by=["attributes.start_time DESC"],
    )
    for run in runs:
        rec = {
            "run_id": run.info.run_id,
            "run_name": run.data.tags.get("mlflow.runName"),
            "artifact_uri": run.info.artifact_uri,
            "start_time": run.info.start_time,
            "end_time": run.info.end_time,
            "status": run.info.status,
        }
        # Flatten params and metrics into the record
        for k, v in run.data.params.items():
            rec[f"param.{k}"] = v
        for k, v in run.data.metrics.items():
            rec[f"metric.{k}"] = v
        mlflow_models_array.append(rec)

    return pd.DataFrame(mlflow_models_array)

In [336]:
mlflow_models_df = get_mlflow_model_data()

In [337]:
mlflow_models_df.head(10)

Unnamed: 0,run_id,run_name,artifact_uri,start_time,end_time,status,param.eta,param.eval_metric,param.max_depth,param.num_class,...,param.strategy,metric.train_loss,param.batch_size,param.epochs,param.loss_function,param.model_name,param.top_k,metric.f1,metric.precision,metric.recall
0,9d6f01dec94b403189ef5452ee26bc9c,XGBoost,file:///mnt/c/Users/Matt/workspace/landmarks/m...,1759460033235,1759460078894,FINISHED,0.1,mlogloss,6.0,491.0,...,,,,,,,,,,
1,13848d5ab1f24586922e8d089c8918cb,Gradient Boosting,file:///mnt/c/Users/Matt/workspace/landmarks/m...,1759416300369,1759416305340,FINISHED,,,1.0,,...,,,,,,,,,,
2,c5c6a6e3d24943c9b6fd2f3a94de08f3,SVC,file:///mnt/c/Users/Matt/workspace/landmarks/m...,1759416295388,1759416300341,FINISHED,,,,,...,,,,,,,,,,
3,8abb2b10c13c4627a516a734ec448395,Logistic Regression,file:///mnt/c/Users/Matt/workspace/landmarks/m...,1759416291427,1759416295360,FINISHED,,,,,...,,,,,,,,,,
4,faa87f43fa2449f19f5d4b9cfccbee26,Random Forest,file:///mnt/c/Users/Matt/workspace/landmarks/m...,1759416286095,1759416291399,FINISHED,,,7.0,,...,,,,,,,,,,
5,0865cb10fb2c4093993a682bf6dc1d5a,Dummy Classifier,file:///mnt/c/Users/Matt/workspace/landmarks/m...,1759416282985,1759416286068,FINISHED,,,,,...,prior,,,,,,,,,
6,07a980ac4dab40958ff6ad0aa9151b83,Siamese CNN Resnet50 ImageWeights Full_lr_0.00...,file:///mnt/c/Users/Matt/workspace/landmarks/m...,1759386604871,1759386885424,FINISHED,,,,,...,,0.075219,,,,,,,,
7,34942762bd1343c09ed99aa7043813e9,Siamese CNN Resnet50_lr_0.01_loss_CosineSimila...,file:///mnt/c/Users/Matt/workspace/landmarks/m...,1759386604332,1759386604590,FINISHED,,,,,...,,,,,,,,,,
8,566cad548b5f428baa966cbfa561733d,Siamese CNN Resnet50_lr_0.01_loss_ContrastiveL...,file:///mnt/c/Users/Matt/workspace/landmarks/m...,1759386604048,1759386604301,FINISHED,,,,,...,,,,,,,,,,
9,64dd2496b12f40c6b83a8b99c33003d9,Siamese CNN Resnet50_lr_0.001_loss_CosineSimil...,file:///mnt/c/Users/Matt/workspace/landmarks/m...,1759386603767,1759386604017,FINISHED,,,,,...,,,,,,,,,,


In [338]:
mlflow_models_df.to_parquet('data/deep_learning_model_reports2.parquet')

In [339]:

siamese_df = mlflow_models_df[mlflow_models_df['run_name'].str.startswith('Siamese', na=False)]

dl_metrics_records = []

for i in range(siamese_df.shape[0]):
    dl_metrics_records.append({
        "metric": "accuracy",
        "value": siamese_df["metric.accuracy"].iloc[i],
        "model_name": siamese_df["param.model_name"].iloc[i],
        "run_name": siamese_df["run_name"].iloc[i],
    })
    dl_metrics_records.append({
        "metric": "average_precision",
        "value": siamese_df["metric.average_precision"].iloc[i],
        "model_name": siamese_df["param.model_name"].iloc[i],
        "run_name": siamese_df["run_name"].iloc[i],
    })
    dl_metrics_records.append({
        "metric": "f1",
        "value": siamese_df["metric.macro_f1"].iloc[i],
        "model_name": siamese_df["param.model_name"].iloc[i],
        "run_name": siamese_df["run_name"].iloc[i],
    })
    dl_metrics_records.append({
        "metric": "recall",
        "value": siamese_df["metric.macro_recall"].iloc[i],
        "model_name": siamese_df["param.model_name"].iloc[i],
        "run_name": siamese_df["run_name"].iloc[i],
    })
    dl_metrics_records.append({
        "metric": "precision",
        "value": siamese_df["metric.macro_precision"].iloc[i],
        "model_name": siamese_df["param.model_name"].iloc[i],
        "run_name": siamese_df["run_name"].iloc[i],
    })

dl_metrics_records_df = pd.DataFrame(dl_metrics_records)
dl_metrics_records_df.dropna(inplace=True)

# Group by model_name and take max average_precision
max_avg_precision = dl_metrics_records_df[dl_metrics_records_df['metric'] == 'average_precision'].groupby('model_name')[
    'value'].max()
max_avg_precision


model_name
Siamese CNN Resnet152                 0.073872
Siamese CNN Resnet152 ImageWeights    0.069348
Siamese CNN Resnet18                  0.077620
Siamese CNN Resnet50                  0.109651
Name: value, dtype: float64

In [None]:
epoch_loss = []
for file in os.listdir('checkpoints'):
    filename = os.fsdecode(file)
    if filename.endswith(".pth"):
        epoch, loss = load_checkpoint_epoch_loss(os.path.join('checkpoints', filename))
        f_split = filename.split("_")
        f_split.pop()
        run = " ".join(f_split)
        epoch_loss.append({
            "filename": filename,
            "epoch": epoch,
            "loss": loss,
            "run": run
        })


In [177]:
epoch_loss_df = pd.DataFrame(epoch_loss)
epoch_loss_df

Unnamed: 0,filename,epoch,loss,run
0,Siamese CNN Resnet152 ImageWeights_lr0.001_Con...,0,0.259296,Siamese CNN Resnet152 ImageWeights lr0.001 Con...
1,Siamese CNN Resnet152 ImageWeights_lr0.001_Con...,1,0.248160,Siamese CNN Resnet152 ImageWeights lr0.001 Con...
2,Siamese CNN Resnet152 ImageWeights_lr0.001_Con...,2,0.237700,Siamese CNN Resnet152 ImageWeights lr0.001 Con...
3,Siamese CNN Resnet152 ImageWeights_lr0.001_Con...,3,0.236886,Siamese CNN Resnet152 ImageWeights lr0.001 Con...
4,Siamese CNN Resnet152 ImageWeights_lr0.001_Con...,4,0.227817,Siamese CNN Resnet152 ImageWeights lr0.001 Con...
...,...,...,...,...
460,Siamese CNN Resnet50 _lr1_CosineSimilarityLoss...,5,0.226961,Siamese CNN Resnet50 lr1 CosineSimilarityLoss
461,Siamese CNN Resnet50 _lr1_CosineSimilarityLoss...,6,0.214689,Siamese CNN Resnet50 lr1 CosineSimilarityLoss
462,Siamese CNN Resnet50 _lr1_CosineSimilarityLoss...,7,0.213391,Siamese CNN Resnet50 lr1 CosineSimilarityLoss
463,Siamese CNN Resnet50 _lr1_CosineSimilarityLoss...,8,0.214145,Siamese CNN Resnet50 lr1 CosineSimilarityLoss


In [178]:
epoch_loss_df.to_parquet('data/epoch_loss.parquet')

In [183]:

fig = px.line(
    epoch_loss_df[epoch_loss_df['run'] != "Siamese CNN Resnet50 ImageWeights Full lr0.001 ContrastiveLoss"],
    x="epoch",
    y="loss",
    color="run",
    title="Training Loss over Epochs for Siamese CNN Resnet152 ImageWeights",
    labels={"epoch": "Epoch", "loss": "Train Loss", "run_name": "Run"},
)
fig.update_layout(width=1200, height=600)
fig.show()


In [333]:
fig = px.bar(
    dl_metrics_records_df,
    x="model_name",
    y="value",
    color="metric",
    barmode="group",
    title="Model Metric Comparison: XGBoost has best mAP score",
    labels={"Value": "Score", "model_name": "Model", "metric": "Metric"},
)
fig.update_layout(width=1200, height=600, legend_title_text="Metric")
fig.show()

fig.write_image("images/dl_model_metrics_comparison.png", width=1200, height=600)