In [1]:
import os

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torch.nn.functional as F
import mlflow
import time
import random
import mlflow.sklearn
import numpy as np
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.inspection import permutation_importance
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    classification_report,
    precision_recall_fscore_support,
    average_precision_score,
)
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from torchmetrics import Accuracy, RetrievalMAP
from warnings import filterwarnings
from PIL import Image
from torchvision.models import resnet18, resnet50, resnet152

filterwarnings("ignore")

In [2]:
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

## Data Analysis

In [3]:
train_df = pd.read_parquet("data/joined_features_all.parquet")

In [4]:
train_df.shape

(1580470, 13)

In [5]:
train_df.sample(5)

Unnamed: 0,id,landmark_id,image_path,width,height,aspect_ratio,mean_rgb,mean_r,mean_g,mean_b,x,y,local_binary_pattern
1360791,a0dd8bfbf2fec3c6,175036,a0dd8bfbf2fec3c6.jpg,800,535,1.495327,[105.58175701 100.59224299 100.16481075],105.581757,100.592243,100.164811,-38.608433,-31.994104,"[0.0509018691588785,0.030598130841121497,0.022..."
772745,cd6699a8e3041fdf,99821,cd6699a8e3041fdf.jpg,800,600,1.333333,[132.4810875 128.22320417 118.97786875],132.481088,128.223204,118.977869,-36.949306,26.824358,"[0.05235,0.030033333333333332,0.02194583333333..."
951765,c8d3a5524470f7d3,123038,c8d3a5524470f7d3.jpg,531,800,0.66375,[100.99157721 99.99068267 98.4145645 ],100.991577,99.990683,98.414564,45.482384,52.554836,"[0.05986111111111111,0.03701271186440678,0.021..."
1028630,cb54f4d2666fcc87,132914,cb54f4d2666fcc87.jpg,533,800,0.66625,[113.69511023 117.12466698 115.73785413],113.69511,117.124667,115.737854,72.422928,-15.387292,"[0.04066369606003752,0.023902439024390244,0.01..."
717608,7a59db43bbbdbdee,92275,7a59db43bbbdbdee.jpg,800,583,1.372213,[130.2100536 128.55510506 105.3944018 ],130.210054,128.555105,105.394402,-19.75704,10.44643,"[0.06275514579759862,0.033130360205831905,0.02..."


In [6]:
# Make local_binary_pattern entries numpy ndarrays
def to_ndarray(v):
    s = v.strip().strip("[]")
    parts = s.replace(",", " ").split()
    return [float(x) for x in parts]

In [7]:
train_df["local_binary_pattern"] = train_df["local_binary_pattern"].apply(to_ndarray)

In [8]:
train_df["landmark_id"] = train_df["landmark_id"].astype(str)

In [9]:
def to_image_path(dir, id):
    return f"data/{dir}/{id[0]}/{id[1]}/{id[2]}/{id}.jpg"


def train_image_path(id):
    return to_image_path("train", id)


def test_image_path(id):
    return to_image_path("test", id)


def index_image_path(id):
    return to_image_path("index", id)

In [10]:
train_df["image_path"] = train_df["id"].apply(train_image_path)
train_df

Unnamed: 0,id,landmark_id,image_path,width,height,aspect_ratio,mean_rgb,mean_r,mean_g,mean_b,x,y,local_binary_pattern
0,17660ef415d37059,1,data/train/1/7/6/17660ef415d37059.jpg,533,800,0.666250,[126.00759381 119.0244606 113.26428471],126.007594,119.024461,113.264285,6.526572,-59.742741,"[0.05229596622889306, 0.027049718574108816, 0...."
1,92b6290d571448f6,1,data/train/9/2/b/92b6290d571448f6.jpg,534,800,0.667500,[97.5096559 93.19100421 86.4288764 ],97.509656,93.191004,86.428876,14.416226,-57.622620,"[0.06176498127340824, 0.027186329588014983, 0...."
2,cd41bf948edc0340,1,data/train/c/d/4/cd41bf948edc0340.jpg,800,512,1.562500,[89.43367188 83.05516602 74.40758057],89.433672,83.055166,74.407581,8.051970,-53.929276,"[0.05924560546875, 0.03013427734375, 0.0246411..."
3,fb09f1e98c6d2f70,1,data/train/f/b/0/fb09f1e98c6d2f70.jpg,532,800,0.665000,[107.91263863 106.76824483 109.26745771],107.912639,106.768245,109.267458,4.627127,-54.376423,"[0.050286654135338345, 0.02482142857142857, 0...."
4,25c9dfc7ea69838d,7,data/train/2/5/c/25c9dfc7ea69838d.jpg,800,600,1.333333,[132.4216875 137.05765 144.99947083],132.421687,137.057650,144.999471,-8.277364,-25.763720,"[0.036810416666666665, 0.022747916666666666, 0..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1580465,72c3b1c367e3d559,203092,data/train/7/2/c/72c3b1c367e3d559.jpg,800,533,1.500938,[128.88117026 127.38813086 116.54751407],128.881170,127.388131,116.547514,7.850999,29.970839,"[0.049305816135084425, 0.03048780487804878, 0...."
1580466,7a6a2d9ea92684a6,203092,data/train/7/a/6/7a6a2d9ea92684a6.jpg,800,532,1.503759,[127.55681156 124.83408835 114.69496711],127.556812,124.834088,114.694967,-0.866286,31.105888,"[0.04551691729323308, 0.025406484962406016, 0...."
1580467,9401fad4c497e1f9,203092,data/train/9/4/0/9401fad4c497e1f9.jpg,800,533,1.500938,[129.93018058 130.93818246 127.94207083],129.930181,130.938182,127.942071,-10.209307,-1.973596,"[0.039880393996247654, 0.023184803001876173, 0..."
1580468,aacc960c9a228b5f,203092,data/train/a/a/c/aacc960c9a228b5f.jpg,800,533,1.500938,[142.47123358 142.97529784 136.52950516],142.471234,142.975298,136.529505,70.599495,50.996376,"[0.03226547842401501, 0.020316604127579737, 0...."


In [119]:
landmark_counts = train_df["landmark_id"].value_counts()
landmark_counts.head()

landmark_id
138982    6272
126637    2231
20409     1758
83144     1741
113209    1135
Name: count, dtype: int64

In [11]:
unique_landmarks = train_df["landmark_id"].unique()
len(unique_landmarks)

81313

There are 81313 unique landmarks in the dataset.

In [121]:
top25 = landmark_counts.head(25)
fig = px.bar(
    x=top25.index.astype(str),
    y=top25.values,
    labels={"x": "Landmark ID", "y": "Count"},
    title="Top 25 Landmark IDs by Count: 138982 has 3 times as much as next highest landmark",
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()
fig.write_image("images/landmark_count.png", height=500, width=1200)

The fact that landmark 138982 has 3 times as many images as the next highest landmark is concerning. We should keep an eye on that and may need to reduce the number of samples from that landmark id.

In [None]:
fig = px.histogram(
    train_df,
    x="aspect_ratio",
    nbins=20,
    labels={"aspect_ratio": "Aspect Ratio", "count": "Count"},
    title="Aspect ratio (proportions) is similar for most images",
)
fig.update_layout(height=500, width=1200)
fig.show()
fig.write_image("images/aspect_ratio_histogram.png", height=500, width=1200)

In [None]:
fig = make_subplots(
    rows=1,
    cols=3,
    subplot_titles=("Mean R vs Mean G", "Mean G vs Mean B", "Mean R vs Mean B"),
)

trace_rg = px.scatter(train_df, x="mean_r", y="mean_g").data[0]
trace_gb = px.scatter(train_df, x="mean_g", y="mean_b").data[0]
trace_rb = px.scatter(train_df, x="mean_r", y="mean_b").data[0]

fig.add_trace(trace_rg, row=1, col=1)
fig.add_trace(trace_gb, row=1, col=2)
fig.add_trace(trace_rb, row=1, col=3)

fig.update_xaxes(title_text="Mean R", row=1, col=1)
fig.update_yaxes(title_text="Mean G", row=1, col=1)
fig.update_xaxes(title_text="Mean G", row=1, col=2)
fig.update_yaxes(title_text="Mean B", row=1, col=2)
fig.update_xaxes(title_text="Mean R", row=1, col=3)
fig.update_yaxes(title_text="Mean B", row=1, col=3)

fig.update_layout(
    title_text="Pairwise relationships among Mean R, Mean G, and Mean B across all images. Looks noisy at first",
)

fig.show()
fig.write_image(
    "images/color_channel_pairwise_all_landmarks.png", height=500, width=1200
)

In [12]:
selected_ids = unique_landmarks[:5]
five_landmarks_df = train_df[train_df["landmark_id"].isin(selected_ids)].copy()

These are the landmarks selected by the above code. Since we are dealing with over 80,000 unique landmarks and over 1.5 million images, I will limit our analysis to a subset of landmarks.
- Landmark 1  
![Landmark 1](data/train/1/7/6/17660ef415d37059.jpg)

- Landmark 7  
![Landmark 7](data/train/2/8/b/28b13f94a6f1f3c1.jpg)

- Landmark 9  
![Landmark 9](data/train/0/1/9/0193b65bb58d2c77.jpg)

- Landmark 11  
![Landmark 11](data/train/1/a/6/1a6cb1deed46bb17.jpg)

- Landmark 12  
![Landmark 12](data/train/1/4/9/1492a5d344495391.jpg)


In [None]:
fig = make_subplots(
    rows=1,
    cols=3,
    subplot_titles=("Mean R vs Mean G", "Mean G vs Mean B", "Mean R vs Mean B"),
)

fig_rg = px.scatter(five_landmarks_df, x="mean_r", y="mean_g", color="landmark_id")
for tr in fig_rg.data:
    fig.add_trace(tr, row=1, col=1)

fig_gb = px.scatter(five_landmarks_df, x="mean_g", y="mean_b", color="landmark_id")
for tr in fig_gb.data:
    tr.showlegend = False
    fig.add_trace(tr, row=1, col=2)

fig_rb = px.scatter(five_landmarks_df, x="mean_r", y="mean_b", color="landmark_id")
for tr in fig_rb.data:
    tr.showlegend = False
    fig.add_trace(tr, row=1, col=3)

fig.update_xaxes(title_text="Mean R", row=1, col=1)
fig.update_yaxes(title_text="Mean G", row=1, col=1)
fig.update_xaxes(title_text="Mean G", row=1, col=2)
fig.update_yaxes(title_text="Mean B", row=1, col=2)
fig.update_xaxes(title_text="Mean R", row=1, col=3)
fig.update_yaxes(title_text="Mean B", row=1, col=3)

fig.update_layout(
    title_text="Color channels across 5 landmarks. Indicates color channels could be leverage for landmark identification",
)

fig.show()
fig.write_image("images/color_channel_pairwise_5_landmarks.png", height=500, width=1200)

In [None]:
fig = px.scatter_3d(
    five_landmarks_df,
    x="mean_r",
    y="mean_g",
    z="mean_b",
    color="landmark_id",
    title="3D Scatter of mean RGB for 5 Landmark IDs",
)
fig.update_layout(width=1200, height=900)
fig.show()

In [124]:
def mean_histogram(arrs):
    stacked = np.vstack(arrs)
    return stacked.mean(axis=0)

In [125]:
# Plot the Mean Local Binary Pattern histogram
lbp_df = five_landmarks_df.dropna(subset=["local_binary_pattern"]).copy()

first_hist = lbp_df.iloc[0]["local_binary_pattern"]
n_bins = len(first_hist)
# Get histogram mean grouped by landmark
mean_hists = lbp_df.groupby("landmark_id", observed=True)["local_binary_pattern"].apply(
    mean_histogram
)

records = []
for landmark_id, hist in mean_hists.items():
    for i, v in enumerate(hist):
        records.append({"landmark_id": landmark_id, "bin": i, "value": float(v)})
plot_df = pd.DataFrame(records)

fig = px.line(
    plot_df,
    x="bin",
    y="value",
    color="landmark_id",
    markers=True,
    labels={
        "bin": "Local Binary Pattern Bin",
        "value": "Mean Frequency",
        "landmark_id": "Landmark ID",
    },
    title="Mean Local Binary Pattern Histogram per Landmark. Bin 12 & 13 shows differences in texture",
)
fig.update_layout(width=1200, height=700)
fig.show()
fig.write_image("images/lbp_mean_histogram_5_landmarks.png", width=1200, height=700)

Local Binary Patterns is a texture descriptor. It does this by comparing a pixel to its neighboring pixels. It captures the intensity of each pixel and compares their intensities. It will assign 1 if the neighboring pixel's intensity is greater than the pixel currently being assessed. Otherwise, LBP will assign 0. The algorithm then combines the binary values of all the neighboring pixels to create a value for the pixel being assessed. It does this for all the pixels in the image to create a binary code representing the texture of the image.

In [None]:
fig = px.scatter(
    five_landmarks_df,
    x="x",
    y="y",
    color="landmark_id",
    labels={"x": "Embedding 2d X", "y": "Embedding 2d Y", "landmark_id": "Landmark ID"},
    title="2d Embedding shows promise in clustering images by embeddings",
)
fig.update_layout(width=1200, height=700)
fig.show()
fig.write_image("images/embedding_2d_scatter_5_landmarks.png", width=1200, height=700)

In [None]:
fig = px.scatter(
    train_df[train_df["landmark_id"].isin(unique_landmarks[:25])],
    x="x",
    y="y",
    color="landmark_id",
    labels={"x": "Embedding 2d X", "y": "Embedding 2d Y", "landmark_id": "Landmark ID"},
    title="2d Embedding shows promise in clustering images by embeddings 25 landmarks",
)
fig.update_layout(width=1200, height=700)
fig.show()
fig.write_image("images/embedding_2d_scatter_25_landmarks.png", width=1200, height=700)

A 2D embedding squashes an image down to just two numbers: X and Y. Each image becomes a single point on a 2D plane. I used t-SNE to achieve this, which, like principal component analysis, reduces dimensionality. We take the full embedding of an image and convert that full embedding down to two features.


### Modeling

In [13]:
mlflow.set_experiment("Landmark Recognition")

<Experiment: artifact_location='file:///mnt/c/Users/Matt/workspace/landmarks/mlruns/745548148810810489', creation_time=1756780167672, experiment_id='745548148810810489', last_update_time=1756780167672, lifecycle_stage='active', name='Landmark Recognition', tags={}>

In [14]:
# Expand local_binary_pattern into separate columns for modeling
lbp_expanded = pd.DataFrame(
    train_df["local_binary_pattern"].tolist(), index=train_df.index
)
lbp_expanded.columns = [f"lbp_{i}" for i in range(lbp_expanded.shape[1])]

train_df_expanded_lbp = pd.concat(
    [train_df.drop(columns=["local_binary_pattern"]), lbp_expanded], axis=1
)

In [21]:
train_df_expanded_lbp

Unnamed: 0,id,landmark_id,image_path,width,height,aspect_ratio,mean_rgb,mean_r,mean_g,mean_b,...,lbp_16,lbp_17,lbp_18,lbp_19,lbp_20,lbp_21,lbp_22,lbp_23,lbp_24,lbp_25
0,17660ef415d37059,1,data/train/1/7/6/17660ef415d37059.jpg,533,800,0.666250,[126.00759381 119.0244606 113.26428471],126.007594,119.024461,113.264285,...,0.007265,0.019275,0.009409,0.010033,0.011091,0.015943,0.022183,0.029939,0.203417,0.433513
1,92b6290d571448f6,1,data/train/9/2/b/92b6290d571448f6.jpg,534,800,0.667500,[97.5096559 93.19100421 86.4288764 ],97.509656,93.191004,86.428876,...,0.011941,0.013977,0.011393,0.013102,0.015047,0.020524,0.025602,0.027809,0.071444,0.455536
2,cd41bf948edc0340,1,data/train/c/d/4/cd41bf948edc0340.jpg,800,512,1.562500,[89.43367188 83.05516602 74.40758057],89.433672,83.055166,74.407581,...,0.010195,0.014504,0.009797,0.010339,0.011846,0.017148,0.024343,0.031929,0.086533,0.488169
3,fb09f1e98c6d2f70,1,data/train/f/b/0/fb09f1e98c6d2f70.jpg,532,800,0.665000,[107.91263863 106.76824483 109.26745771],107.912639,106.768245,109.267458,...,0.009030,0.024250,0.011074,0.011675,0.012620,0.017000,0.021339,0.027850,0.202655,0.404803
4,25c9dfc7ea69838d,7,data/train/2/5/c/25c9dfc7ea69838d.jpg,800,600,1.333333,[132.4216875 137.05765 144.99947083],132.421687,137.057650,144.999471,...,0.014794,0.028979,0.013806,0.013513,0.014046,0.017329,0.021167,0.024594,0.101777,0.371237
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1580465,72c3b1c367e3d559,203092,data/train/7/2/c/72c3b1c367e3d559.jpg,800,533,1.500938,[128.88117026 127.38813086 116.54751407],128.881170,127.388131,116.547514,...,0.011027,0.018539,0.010720,0.011100,0.011914,0.016867,0.023136,0.030579,0.114010,0.454266
1580466,7a6a2d9ea92684a6,203092,data/train/7/a/6/7a6a2d9ea92684a6.jpg,800,532,1.503759,[127.55681156 124.83408835 114.69496711],127.556812,124.834088,114.694967,...,0.011494,0.024232,0.012446,0.012303,0.012761,0.016833,0.022000,0.027133,0.159159,0.409908
1580467,9401fad4c497e1f9,203092,data/train/9/4/0/9401fad4c497e1f9.jpg,800,533,1.500938,[129.93018058 130.93818246 127.94207083],129.930181,130.938182,127.942071,...,0.008391,0.025000,0.010544,0.010973,0.011515,0.015701,0.020115,0.024430,0.290068,0.353084
1580468,aacc960c9a228b5f,203092,data/train/a/a/c/aacc960c9a228b5f.jpg,800,533,1.500938,[142.47123358 142.97529784 136.52950516],142.471234,142.975298,136.529505,...,0.013895,0.037812,0.016930,0.015894,0.014125,0.017326,0.019805,0.022676,0.239557,0.320035


In [32]:
# earlier training shows that my machine cant train such a large model.
unique_landmarks_to_train_on = 500
rng = np.random.default_rng(42)
selected_landmark_ids = rng.choice(
    train_df_expanded_lbp["landmark_id"],
    size=unique_landmarks_to_train_on,
    replace=False,
)
subset_train_df = train_df_expanded_lbp[
    train_df_expanded_lbp["landmark_id"].isin(selected_landmark_ids)
]

In [33]:
subset_train_df

Unnamed: 0,id,landmark_id,image_path,width,height,aspect_ratio,mean_rgb,mean_r,mean_g,mean_b,...,lbp_16,lbp_17,lbp_18,lbp_19,lbp_20,lbp_21,lbp_22,lbp_23,lbp_24,lbp_25
8312,0e494985dc4e6f5f,1122,data/train/0/e/4/0e494985dc4e6f5f.jpg,800,530,1.509434,[107.48516509 115.14127123 119.52160613],107.485165,115.141271,119.521606,...,0.010241,0.023519,0.011842,0.012182,0.011311,0.015205,0.020802,0.030292,0.187238,0.403177
8313,10bd9a894e3f7933,1122,data/train/1/0/b/10bd9a894e3f7933.jpg,800,554,1.444043,[116.80949458 114.3092148 108.06415614],116.809495,114.309215,108.064156,...,0.017676,0.016934,0.013973,0.014050,0.015314,0.019192,0.024070,0.026376,0.056153,0.396974
8314,1438a5d2e8d5a45a,1122,data/train/1/4/3/1438a5d2e8d5a45a.jpg,800,533,1.500938,[120.92359053 136.08054409 155.98484287],120.923591,136.080544,155.984843,...,0.011984,0.031541,0.013213,0.012570,0.012015,0.015959,0.018663,0.026398,0.223811,0.345804
8315,1521e26ad1f2039a,1122,data/train/1/5/2/1521e26ad1f2039a.jpg,800,531,1.506591,[160.66089454 151.15706685 141.67239642],160.660895,151.157067,141.672396,...,0.008133,0.012175,0.007561,0.008166,0.008865,0.011766,0.017585,0.027792,0.259769,0.402189
8316,168dccd87677b276,1122,data/train/1/6/8/168dccd87677b276.jpg,800,453,1.766004,[128.80704194 122.88684051 117.8172489 ],128.807042,122.886841,117.817249,...,0.007632,0.009352,0.007078,0.008971,0.008474,0.011890,0.019459,0.041347,0.067381,0.536747
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1577256,9e3c1f5380576164,202755,data/train/9/e/3/9e3c1f5380576164.jpg,800,533,1.500938,[ 92.11616088 115.2420849 118.0916651 ],92.116161,115.242085,118.091665,...,0.013100,0.020762,0.013295,0.013133,0.013140,0.016384,0.020755,0.029191,0.128049,0.423316
1577257,9e4d59bd37028681,202755,data/train/9/e/4/9e4d59bd37028681.jpg,800,580,1.379310,[125.89680172 141.21918534 158.78746767],125.896802,141.219185,158.787468,...,0.015136,0.029545,0.014950,0.013946,0.013159,0.016235,0.019093,0.024414,0.176453,0.353522
1577258,b02cf8181c2bacbf,202755,data/train/b/0/2/b02cf8181c2bacbf.jpg,533,800,0.666250,[139.19162992 154.37495779 165.99801595],139.191630,154.374958,165.998016,...,0.011189,0.034503,0.014357,0.014285,0.013586,0.017418,0.019463,0.020521,0.320976,0.285959
1577259,be593e70db6ae6be,202755,data/train/b/e/5/be593e70db6ae6be.jpg,800,533,1.500938,[ 92.85430582 112.72290572 128.30803471],92.854306,112.722906,128.308035,...,0.013520,0.035305,0.015556,0.014646,0.013940,0.017341,0.020000,0.024965,0.173480,0.365258


In [37]:
# Balance the number of rows per landmark_id by undersampling to the smallest class size
if not subset_train_df.empty:
    min_count = 500
    subset_train_df = (
        subset_train_df.groupby("landmark_id", group_keys=False)
        .apply(lambda df: df.sample(n=min_count, random_state=42, replace=True))
        .reset_index(drop=True)
    )
subset_train_df.drop_duplicates(inplace=True)
subset_train_df

Unnamed: 0,id,landmark_id,image_path,width,height,aspect_ratio,mean_rgb,mean_r,mean_g,mean_b,...,lbp_16,lbp_17,lbp_18,lbp_19,lbp_20,lbp_21,lbp_22,lbp_23,lbp_24,lbp_25
0,56a249043b34ea5a,100178,data/train/5/6/a/56a249043b34ea5a.jpg,572,800,0.715000,[115.0489576 142.75680726 151.89856643],115.048958,142.756807,151.898566,...,0.009838,0.026844,0.013741,0.014139,0.013802,0.017900,0.023103,0.032334,0.146453,0.443341
1,14dd8628fd2355b2,100178,data/train/1/4/d/14dd8628fd2355b2.jpg,800,600,1.333333,[127.04639167 131.15459375 131.8057125 ],127.046392,131.154594,131.805712,...,0.016569,0.020004,0.014287,0.014610,0.016150,0.019169,0.023298,0.025294,0.074900,0.404796
2,95169c8b561cfcf1,100178,data/train/9/5/1/95169c8b561cfcf1.jpg,600,800,0.750000,[137.2498125 133.20583958 126.7155625 ],137.249812,133.205840,126.715563,...,0.014667,0.021544,0.013294,0.013690,0.014556,0.018006,0.022408,0.026108,0.086475,0.423077
3,147d001cbb2b3f7c,100178,data/train/1/4/7/147d001cbb2b3f7c.jpg,600,800,0.750000,[152.74334167 148.841275 145.24612083],152.743342,148.841275,145.246121,...,0.011604,0.014442,0.011640,0.013083,0.014750,0.019865,0.025425,0.027083,0.113000,0.449442
4,b79875f6f11e39d3,100178,data/train/b/7/9/b79875f6f11e39d3.jpg,800,600,1.333333,[ 99.23425417 106.68382083 83.23637917],99.234254,106.683821,83.236379,...,0.014967,0.021669,0.015254,0.015817,0.016944,0.020571,0.025004,0.025304,0.091654,0.419490
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245450,830fbc235b9c600f,99759,data/train/8/3/0/830fbc235b9c600f.jpg,799,600,1.331667,[105.38810179 92.86807468 77.77164372],105.388102,92.868075,77.771644,...,0.017261,0.022733,0.016389,0.017434,0.018225,0.020763,0.022374,0.021836,0.091642,0.352753
245461,7e759856a5c7b843,99759,data/train/7/e/7/7e759856a5c7b843.jpg,800,600,1.333333,[116.1383125 112.45229167 104.33125 ],116.138312,112.452292,104.331250,...,0.012217,0.019600,0.012148,0.012117,0.012315,0.015796,0.019529,0.026863,0.161950,0.392238
245467,ef3a3dfae6f61559,99759,data/train/e/f/3/ef3a3dfae6f61559.jpg,799,600,1.331667,[134.62271172 142.2919295 144.38765123],134.622712,142.291930,144.387651,...,0.009996,0.025219,0.011033,0.011087,0.010945,0.015394,0.020010,0.029143,0.206237,0.386395
245472,bf4d046ab674535b,99759,data/train/b/f/4/bf4d046ab674535b.jpg,799,600,1.331667,[126.55726533 110.89161452 93.8501335 ],126.557265,110.891615,93.850133,...,0.017242,0.019622,0.014164,0.014591,0.014896,0.017478,0.019844,0.021414,0.064428,0.348942


In [38]:
# Use subset of landmarks to train
X = subset_train_df.drop(["landmark_id", "id", "image_path", "mean_rgb"], axis=1)
y = subset_train_df["landmark_id"].astype(int)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Encode labels to ensure they are 0-based consecutive integers
le = LabelEncoder()
# Fit LabelEncoder on all possible classes for Average precision score
le.fit(y)
y_train_encoded = le.transform(y_train)
y_test_encoded = le.transform(y_test)
tensor_target = torch.tensor(y_test_encoded, dtype=torch.long)

In [102]:
sklearn_models = [
    "Random Forest",
    "Logistic Regression",
    "SVC",
    "Gradient Boosting",
]

In [103]:
models = [
    (
        "Random Forest",
        RandomForestClassifier(
            n_estimators=100, max_depth=7, n_jobs=-1, random_state=42
        ),
        (X_train, y_train),
        (X_test, y_test),
        (y_train_encoded, y_test_encoded),
    ),
    (
        "Logistic Regression",
        LogisticRegression(random_state=42),
        (X_train, y_train),
        (X_test, y_test),
        (y_train_encoded, y_test_encoded),
    ),
    (
        "SVC",
        SVC(probability=True, random_state=42),
        (X_train, y_train),
        (X_test, y_test),
        (y_train_encoded, y_test_encoded),
    ),
    (
        "Gradient Boosting",
        GradientBoostingClassifier(n_estimators=50, max_depth=1, random_state=42),
        (X_train, y_train),
        (X_test, y_test),
        (y_train_encoded, y_test_encoded),
    ),
]

In [104]:
reports = []

In [105]:
for model_name, model, train, test, y_encoded in models:
    X_train, y_train = train
    X_test, y_test = test
    y_train_encoded, y_test_encoded = y_encoded
    tensor_target = torch.tensor(y_test_encoded, dtype=torch.long)

    print(f"Model: {model_name}")
    start = time.perf_counter()
    model.fit(X_train, y_train)
    elapsed = time.perf_counter() - start
    print(f"Elapsed: {elapsed:.3f}s")

    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)

    report = classification_report(
        y_test_encoded, le.transform(y_pred), output_dict=True
    )

    num_classes = y_pred_proba.shape[1]
    num_samples = y_pred_proba.shape[0]

    preds_tensor = torch.tensor(y_pred_proba, dtype=torch.float32)
    preds_tensor = torch.nan_to_num(preds_tensor, nan=0.0, posinf=1.0, neginf=0.0)
    preds_flat = preds_tensor.reshape(-1)

    targets_class = torch.tensor(y_test_encoded, dtype=torch.long)
    target_onehot = F.one_hot(targets_class, num_classes=num_classes).to(torch.int32)
    target_flat = target_onehot.reshape(-1)

    indexes = (
        torch.arange(num_samples, dtype=torch.long)
        .unsqueeze(1)
        .expand(num_samples, num_classes)
        .reshape(-1)
    )

    retrieval_map = RetrievalMAP()
    average_precision = retrieval_map(preds_flat, target_flat, indexes).item()

    print("Retrieval mAP:", average_precision)
    reports.append((report, average_precision))

    print("Average precision:", average_precision)
    reports.append((report, average_precision))

Model: Random Forest
Elapsed: 0.435s
Retrieval mAP: 0.6832329630851746
Average precision: 0.6832329630851746
Model: Logistic Regression
Elapsed: 1.282s
Retrieval mAP: 0.578423023223877
Average precision: 0.578423023223877
Model: SVC
Elapsed: 17.711s
Retrieval mAP: 0.6500400900840759
Average precision: 0.6500400900840759
Model: Gradient Boosting
Elapsed: 158.271s
Retrieval mAP: 0.6906396746635437
Average precision: 0.6906396746635437


In [106]:
reports[0]

({'0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 11.0},
  '2': {'precision': 0.8,
   'recall': 0.09523809523809523,
   'f1-score': 0.1702127659574468,
   'support': 42.0},
  '3': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 15.0},
  '4': {'precision': 0.5,
   'recall': 0.1,
   'f1-score': 0.16666666666666666,
   'support': 20.0},
  '5': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 12.0},
  '6': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 2.0},
  '7': {'precision': 1.0,
   'recall': 0.13043478260869565,
   'f1-score': 0.23076923076923078,
   'support': 23.0},
  '8': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1.0},
  '9': {'precision': 0.2857142857142857,
   'recall': 0.10526315789473684,
   'f1-score': 0.15384615384615385,
   'support': 19.0},
  '10': {'precision': 1.0,
   'recall': 0.07142857142857142,
   'f1-score': 0.13333333333333333,
   'support': 28.0},
  '11': {'precision': 0.0, 'recall': 0

In [107]:
for i, model_item in enumerate(models):
    model_name = model_item[0]
    model = model_item[1]
    X_train, _ = model_item[2]
    report, average_precision = reports[i]
    with mlflow.start_run(run_name=model_name):
        mlflow.log_params(model.get_params())
        mlflow.log_metric("accuracy", report["accuracy"])
        mlflow.log_metric("average_precision", average_precision)
        mlflow.log_metric("macro_f1", report["macro avg"]["f1-score"])
        mlflow.log_metric("macro_recall", report["macro avg"]["recall"])
        mlflow.log_metric("macro_precision", report["macro avg"]["precision"])
        mlflow.log_metric("weighted_f1", report["weighted avg"]["f1-score"])
        mlflow.log_metric("weighted_recall", report["weighted avg"]["recall"])
        mlflow.log_metric("weighted_precision", report["weighted avg"]["precision"])
        if model_name in sklearn_models:
            mlflow.sklearn.log_model(
                model, name=model_name, input_example=X_train.sample(1)
            )

## Model Evaluation

In [108]:
# Build comparison DataFrame from `reports` and `models`


def get_metrics(report, average_precision, model_name):
    return [
        {
            "metric": "accuracy",
            "value": report["accuracy"],
            "model_name": model_name,
        },
        {
            "metric": "average_precision",
            "value": float(average_precision),
            "model_name": model_name,
        },
        {
            "metric": "macro_f1",
            "value": report["macro avg"]["f1-score"],
            "model_name": model_name,
        },
        {
            "metric": "macro_recall",
            "value": report["macro avg"]["recall"],
            "model_name": model_name,
        },
        {
            "metric": "macro_precision",
            "value": report["macro avg"]["precision"],
            "model_name": model_name,
        },
        {
            "metric": "weighted_f1",
            "value": report["weighted avg"]["f1-score"],
            "model_name": model_name,
        },
        {
            "metric": "weighted_recall",
            "value": report["weighted avg"]["recall"],
            "model_name": model_name,
        },
        {
            "metric": "weighted_precision",
            "value": report["weighted avg"]["precision"],
            "model_name": model_name,
        },
    ]


metrics_records = []
for i, model_item in enumerate(models):
    model_name = model_item[0]
    report, average_precision = reports[i]
    metrics_records += get_metrics(report, average_precision, model_name)

metrics_df = pd.DataFrame(metrics_records)

In [109]:
fig = px.bar(
    metrics_df,
    x="model_name",
    y="value",
    color="metric",
    barmode="group",
    title="Model Metric Comparison: Random Forest has best mAP score",
    labels={"Value": "Score", "model_name": "Model", "metric": "Metric"},
)
fig.update_layout(width=1200, height=600, legend_title_text="Metric")
fig.show()

fig.write_image("images/model_metrics_comparison.png", width=1200, height=600)

In [39]:
# Load latest sklearn models logged to MLflow under the "Landmark Recognition" experiment
def load_most_recent_models():
    loaded_models = {}
    client = mlflow.tracking.MlflowClient()
    exp = client.get_experiment_by_name("Landmark Recognition")
    logged_models = client.search_logged_models(
        experiment_ids=[exp.experiment_id], filter_string="status != 'FAILED'"
    )
    logged_models.sort(key=lambda l: l.last_updated_timestamp)

    for logged_model in logged_models:
        name = logged_model.name
        if name in sklearn_models and name not in loaded_models:
            model = mlflow.sklearn.load_model(logged_model.model_uri)
            loaded_models[name] = model
    if loaded_models:
        print(f"Loaded {len(loaded_models)} model(s) from MLflow:")
        for name in loaded_models:
            print(f" - {name}")
    else:
        print(
            "No models could be loaded from MLflow runs in experiment 'Landmark Recognition'."
        )
    return loaded_models

In [53]:
loaded_models = load_most_recent_models()

Loaded 4 model(s) from MLflow:
 - Random Forest
 - Logistic Regression
 - SVC
 - Gradient Boosting


In [49]:
loaded_models

{'Random Forest': RandomForestClassifier(max_depth=7, n_jobs=-1, random_state=42),
 'Logistic Regression': LogisticRegression(random_state=42),
 'SVC': SVC(probability=True, random_state=42),
 'Gradient Boosting': GradientBoostingClassifier(max_depth=1, n_estimators=50, random_state=42)}

In [66]:
forest_importance = {
    "Feature": loaded_models["Random Forest"].feature_names_in_,
    "Importance": np.abs(loaded_models["Random Forest"].feature_importances_),
}

forest_importance_df = pd.DataFrame(forest_importance)

forest_importance_df = forest_importance_df.sort_values("Importance", ascending=False)
fig = px.bar(
    forest_importance_df[:10],
    x="Importance",
    y="Feature",
    orientation="h",
    title="Top Ten Features RandomForest - Embedding & LBP important, as expected. Unexpectedly aspect ratio most important.",
)
fig.show()
fig.write_image("images/forest_top_features.png", width=1200, height=600)

In [67]:
gradient_boosting_importance = {
    "Feature": loaded_models["Gradient Boosting"].feature_names_in_,
    "Importance": np.abs(loaded_models["Gradient Boosting"].feature_importances_),
}

gradient_boosting_importance_df = pd.DataFrame(gradient_boosting_importance)

gradient_boosting_importance_df = gradient_boosting_importance_df.sort_values(
    "Importance", ascending=False
)
fig = px.bar(
    gradient_boosting_importance_df[:10],
    x="Importance",
    y="Feature",
    orientation="h",
    title="Top Ten Features Gradient Boosting - Embedding & LBP important, as expected.",
)
fig.show()
fig.write_image("images/gradient_top_features.png", width=1200, height=600)

In [85]:
logistic_regression_importance = {
    "Feature": loaded_models["Logistic Regression"].feature_names_in_,
    "Importance": np.abs(loaded_models["Logistic Regression"].coef_[0]),
}
logistic_regression_importance_df = pd.DataFrame(logistic_regression_importance)

logistic_regression_importance_df = logistic_regression_importance_df.sort_values(
    "Importance", ascending=False
)
fig = px.bar(
    logistic_regression_importance_df[:10],
    x="Importance",
    y="Feature",
    orientation="h",
    title="Top Ten Features Logistic Regression - 2D embedding had the strongest influence",
)
fig.show()
fig.write_image("images/logistic_regression_top_features.png", width=1200, height=600)

In [84]:
svc_permutation_importance = permutation_importance(
    loaded_models["SVC"], X, y, n_repeats=10, random_state=42
)

In [87]:
svc_importance = {
    "Feature": loaded_models["SVC"].feature_names_in_,
    "Importance": svc_permutation_importance.importances_mean,
}
svc_importance_df = pd.DataFrame(svc_importance)

svc_importance_df = svc_importance_df.sort_values("Importance", ascending=False)
fig = px.bar(
    svc_importance_df[:10],
    x="Importance",
    y="Feature",
    orientation="h",
    title="Top Ten Features SVC - Height and width had the most influence",
)
fig.show()
fig.write_image("images/svc_top_features.png", width=1200, height=600)

### Deep Learning Models

In [160]:
# Build a Siamese network
class SiameseNetwork(nn.Module):
    def __init__(self, resnet=None, embedding_size=128):
        super(SiameseNetwork, self).__init__()
        if resnet:
            self.convolution_neural_network = resnet(pretrained=True)
        else:
            self.convolution_neural_network = resnet18(pretrained=True)
        in_features = self.convolution_neural_network.fc.in_features
        self.convolution_neural_network.fc = nn.Linear(in_features, embedding_size)

    def forward_one(self, x):
        return self.convolution_neural_network(x)

    def forward(self, x1, x2):
        return self.forward_one(x1), self.forward_one(x2)


# Loss function for images (maybe try cosine similarity)
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, y1, y2, label):
        euclidean_dist = F.pairwise_distance(y1, y2)
        return torch.mean(
            (1 - label) * torch.pow(euclidean_dist, 2)
            + (label * torch.pow(torch.clamp(self.margin - euclidean_dist, min=0), 2))
        )


class CosineSimilarityLoss(nn.Module):
    def __init__(self):
        super(CosineSimilarityLoss, self).__init__()

    def forward(self, output1, output2, label):
        cosine_sim = F.cosine_similarity(output1, output2, dim=1)

        # For similar pairs (label=0), maximize similarity (minimize 1 - cosine_sim)
        # For dissimilar pairs (label=1), push similarity below margin
        loss_similar = (1 - label) * (1 - cosine_sim)
        loss_dissimilar = label * torch.clamp(cosine_sim - 0.5, min=0.0)

        # Average loss over the batch
        loss = torch.mean(loss_similar + loss_dissimilar)
        return loss


class SiameseDataset(Dataset):
    def __init__(self, dataset, preload=True):
        self.dataset = dataset
        self.preload = preload
        self.pairs = []
        self.label_dict = {}
        self.unique_landmarks = set()

        print("Started loading label_dict and unique_landmarks")
        for index in range(len(dataset)):
            y1 = self.dataset.get_label(index)
            self.unique_landmarks.add(y1)
            if y1 in self.label_dict:
                self.label_dict[y1].append(index)
            else:
                self.label_dict[y1] = [index]
        print("Completed loading label_dict and unique_landmarks")

        if self.preload:
            print("Started preloading SiameseDataset")
            for index in range(len(dataset)):
                index, index2, label = self.get_pair_indices_label(index)
                self.pairs.append((index, index2, label))
            print("Completed preloading SiameseDataset")

    def __getitem__(self, index):
        if self.preload:
            index1, index2, label = self.pairs[index]
        else:
            index1, index2, label = self.get_pair_indices_label(index)

        x1, y1 = self.dataset[index1]
        x2, y2 = self.dataset[index2]
        return x1, x2, torch.tensor(label, dtype=torch.float32)

    def __len__(self):
        return len(self.dataset)

    def get_pair_indices_label(self, index):
        y1 = self.dataset.get_label(index)
        same_landmark = random.randint(0, 1)
        if same_landmark:
            set_to_check = self.label_dict.get(y1)
            while True:
                index2 = random.choice(set_to_check)
                y2 = self.dataset.get_label(index2)
                if y1 == y2:
                    break
            label = 0
        else:
            while True:
                random_landmark = random.choice(list(self.unique_landmarks - set([y1])))
                set_to_check = self.label_dict.get(random_landmark)
                index2 = random.choice(set_to_check)
                y2 = self.dataset.get_label(index2)
                # Different landmark
                if y1 != y2:
                    break
            label = 1
        return index, index2, label


# Dataset
class LandmarksDataset(Dataset):
    def __init__(self, image_paths, landmark_ids, transform, preload=False):
        self.image_paths = image_paths
        self.landmark_ids = landmark_ids
        self.transforms = transform
        self.preload = preload
        if self.preload:
            print("Started preloading LandmarksDataset")
            self.images = [Image.open(path).convert("RGB") for path in image_paths]
            print("Completed preloading LandmarksDataset")

    def __getitem__(self, index):
        if self.preload:
            image = self.images[index]
        else:
            image = Image.open(self.image_paths[index]).convert("RGB")
        label = self.landmark_ids[index]
        transformed_image = self.transforms(image)
        return transformed_image, label

    def get_label(self, index):
        label = self.landmark_ids[index]
        return label

    def __len__(self):
        return len(self.image_paths)

In [161]:
def evaluate_siamese_model(model, test_loader, test_dataset, device, top_k=5):
    model.eval()

    # Initialize torchmetrics for pair classification
    accuracy_metric = Accuracy(task="binary").to(device)
    map_metric = RetrievalMAP().to(device)

    # Lists for retrieval metrics
    precisions = []
    recalls = []

    # Step 1: Compute embeddings for all test images
    embeddings = []
    labels = []
    with torch.no_grad():
        for img, label in DataLoader(
            test_dataset, batch_size=128, shuffle=False, num_workers=8, pin_memory=True
        ):
            img = img.to(device)
            emb = model.forward_one(img)
            embeddings.append(emb.cpu())
            labels.append(label.cpu())
            # Save memory
            del img

    embeddings = torch.cat(embeddings)
    labels = torch.cat(labels)

    # Step 2: Retrieval metrics (Precision, Recall, F1, mAP)
    for i in range(len(test_dataset)):
        query_emb = embeddings[i : i + 1]
        query_label = labels[i]

        # Compute cosine similarities (since you’re testing CosineSimilarityLoss)
        similarities = F.cosine_similarity(query_emb, embeddings, dim=1)
        # Exclude query itself
        similarities[i] = -float("inf")
        _, indices = torch.topk(similarities, k=top_k, largest=True)
        retrieved_labels = labels[indices]

        # Ground truth: 1 if same class, 0 otherwise
        y_true = (retrieved_labels == query_label).numpy()
        y_pred = np.ones(top_k)

        precision, recall, f1, _ = precision_recall_fscore_support(
            y_true, y_pred, average="binary", zero_division=0
        )
        precisions.append(precision)
        recalls.append(recall)

        # mAP: Average Precision for this query
        y_true_all = (labels == query_label).float()
        # Exclude query itself
        y_true_all[i] = 0
        preds = similarities

        indices = torch.ones_like(y_true_all, dtype=torch.int64) * i
        map_metric.update(preds, y_true_all, indexes=indices)

    # Step 3: Pair classification accuracy
    with torch.no_grad():
        for img1, img2, label in test_loader:
            img1, img2, label = img1.to(device), img2.to(device), label.to(device)
            output1, output2 = model(img1, img2)
            similarities = F.cosine_similarity(output1, output2, dim=1)
            # Threshold for similar/dissimilar
            predictions = (similarities > 0.5).float()
            accuracy_metric.update(predictions, label)
            # save memory
            del img1
            del img2

    # Compute average metrics
    mean_precision = np.mean(precisions)
    mean_recall = np.mean(recalls)
    mean_f1 = (
        2 * (mean_precision * mean_recall) / (mean_precision + mean_recall)
        if (mean_precision + mean_recall) > 0
        else 0
    )

    average_precision = map_metric.compute().item()
    accuracy = accuracy_metric.compute().item()

    return {
        "accuracy": accuracy,
        "precision": mean_precision,
        "recall": mean_recall,
        "f1": mean_f1,
        "average_precision": float(average_precision),
    }

In [162]:
def find_similar_images(query_image, image_set, model, top_k=5):
    model.eval()
    query_embedding = model.forward_one(query_image.unsqueeze(0).to(device))
    distances = []
    for img in image_set:
        img_embedding = model.forward_one(img.unsqueeze(0).to(device))
        distances.append(F.pairwise_distance(query_embedding, img_embedding))
    return torch.argsort(torch.tensor(distances))[:top_k]

In [163]:
# Checkpointing functions
def save_checkpoint(model, optimizer, epoch, loss, checkpoint_dir, filename):
    os.makedirs(checkpoint_dir, exist_ok=True)
    checkpoint_path = os.path.join(checkpoint_dir, filename)
    torch.save(
        {
            "epoch": epoch,
            "model_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
            "loss": loss,
        },
        checkpoint_path,
    )
    # Log checkpoint to MLflow
    mlflow.log_artifact(checkpoint_path, artifact_path="checkpoints")


def load_checkpoint(model, optimizer, checkpoint_path, device):
    checkpoint = torch.load(checkpoint_path, map_location=device)
    model.load_state_dict(checkpoint["model_state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    epoch = checkpoint["epoch"]
    loss = checkpoint["loss"]
    return epoch, loss

In [164]:
def train_pytorch_model(
    model_name,
    model,
    learning_rate,
    loss_function,
    total_epochs,
    train_loader,
    test_loader,
    test_dataset,
    dl_reports,
):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Check for existing checkpoint
    start_epoch = 0
    checkpoint_file = (
        f"{model_name}_lr{learning_rate}_{loss_function.__class__.__name__}.pth"
    )
    checkpoint_path = os.path.join("checkpoints", checkpoint_file)
    if os.path.exists(checkpoint_path):
        start_epoch, last_loss = load_checkpoint(
            model, optimizer, checkpoint_path, device
        )
        print(f"Resuming from epoch {start_epoch + 1}, last loss: {last_loss:.3f}")

    if start_epoch == total_epochs:
        print(f"Already completed this model")

    with mlflow.start_run(
        run_name=f"{model_name}_lr{learning_rate}_{loss_function.__class__.__name__}_epochs{total_epochs}"
    ):
        # Log parameters
        mlflow.log_param("model_name", model_name)
        mlflow.log_param("learning_rate", learning_rate)
        mlflow.log_param("loss_function", loss_function.__class__.__name__)
        mlflow.log_param("epochs", total_epochs)
        mlflow.log_param("batch_size", train_loader.batch_size)
        mlflow.log_param("top_k", 5)
        print(
            f"Model: {model_name} - LR {learning_rate} - Loss {loss_function.__class__.__name__} - Epochs {total_epochs}"
        )

        for epoch in range(start_epoch, total_epochs):
            start = time.perf_counter()
            model.train()
            total_loss = 0.0

            for x1, x2, label in train_loader:
                x1, x2, label = x1.to(device), x2.to(device), label.to(device)

                optimizer.zero_grad()
                y1, y2 = model(x1, x2)
                loss = loss_function(y1, y2, label)
                loss.backward()
                optimizer.step()
                total_loss += loss.item()  # Save memory
                del x1
                del x2

            elapsed = time.perf_counter() - start
            avg_loss = total_loss / len(train_loader)
            print(f"Elapsed: {elapsed:.3f}s")
            print(
                f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss / len(train_loader):.3f}"
            )
            # Save checkpoint
            save_checkpoint(
                model, optimizer, epoch, avg_loss, "checkpoints", checkpoint_file
            )

            # Eval metrics
            metrics = evaluate_siamese_model(
                model, test_loader, test_dataset, device, top_k=5
            )
            print(
                f"Metrics: Accuracy={metrics['accuracy']:.4f}, Precision={metrics['precision']:.4f}, "
                f"Recall={metrics['recall']:.4f}, F1={metrics['f1']:.4f}, average_precision={metrics['average_precision']:.4f}"
            )

            # Log metrics to MLflow
            mlflow.log_metric("train_loss", avg_loss, step=epoch)
            mlflow.log_metric("accuracy", metrics["accuracy"], step=epoch)
            mlflow.log_metric("precision", metrics["precision"], step=epoch)
            mlflow.log_metric("recall", metrics["recall"], step=epoch)
            mlflow.log_metric("f1", metrics["f1"], step=epoch)
            mlflow.log_metric("average_precision", metrics["average_precision"], step=epoch)

            dl_reports.append(
                {
                    "model": model_name,
                    "epoch": epoch,
                    **metrics,
                }
            )

        mlflow.pytorch.log_model(model, name=model_name)

In [165]:
# Define transforms
transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ]
)

X = subset_train_df["image_path"].tolist()
y = subset_train_df["landmark_id"].astype(dtype=int).tolist()

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

train_set = LandmarksDataset(
    image_paths=X_train, landmark_ids=y_train, transform=transform, preload=False
)
test_set = LandmarksDataset(
    image_paths=X_test, landmark_ids=y_test, transform=transform, preload=False
)
siamese_train_dataset = SiameseDataset(train_set, preload=True)

siamese_test_dataset = SiameseDataset(test_set, preload=True)

train_loader = DataLoader(
    siamese_train_dataset, batch_size=128, shuffle=True, num_workers=8, pin_memory=True
)
test_loader = DataLoader(
    siamese_test_dataset, batch_size=128, shuffle=True, num_workers=8, pin_memory=True
)

print(f"train size {len(train_loader)} test size {len(test_loader)}")

Started loading label_dict and unique_landmarks
Completed loading label_dict and unique_landmarks
Started preloading SiameseDataset
Completed preloading SiameseDataset
Started loading label_dict and unique_landmarks
Completed loading label_dict and unique_landmarks
Started preloading SiameseDataset
Completed preloading SiameseDataset
train size 169 test size 43


In [166]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [167]:
# Define models
siamese_cnn_resnet18 = SiameseNetwork()
siamese_cnn_resnet50 = SiameseNetwork(resnet50)
siamese_cnn_resnet152 = SiameseNetwork(resnet152)

In [168]:
deep_learning_model_reports = []

In [169]:
# Define hyperparameters
learning_rates = [0.001]
# learning_rates = [0.001, 0.01, 0.1, 1]
# num_epochs = [2, 5, 10]
num_epochs = [2]
# loss_functions = [ContrastiveLoss(), CosineSimilarityLoss()]
loss_functions = [ContrastiveLoss()]
dl_models = [
    (
        "Siamese CNN Resnet18",
        siamese_cnn_resnet18,
        learning_rates,
        loss_functions,
        num_epochs,
        train_loader,
        test_loader,
        test_set,
    ),
    # (
    #     "Siamese CNN Resnet50 ",
    #     siamese_cnn_resnet50,
    #     learning_rates,
    #     loss_functions,
    #     num_epochs,
    #     train_loader,
    #     test_loader,
    # test_dataset,
    # ),
    # (
    #     "Siamese CNN Resnet152",
    #     siamese_cnn_resnet152,
    #     learning_rates,
    #     loss_functions,
    #     num_epochs,
    #     train_loader,
    #     test_loader,
    #  test_dataset,
    # ),
]

In [170]:
# Train each model
for dl_model in dl_models:
    (
        model_name,
        model,
        lrs,
        loss_funcs,
        epochs,
        train_loader,
        test_loader,
        test_set,
    ) = dl_model
    for epoch in epochs:
        for lr in lrs:
            for loss_func in loss_funcs:
                train_pytorch_model(
                    model_name,
                    model,
                    lr,
                    loss_func,
                    epoch,
                    train_loader,
                    test_loader,
                    test_set,
                    deep_learning_model_reports,
                )

Model: Siamese CNN Resnet18 - LR 0.001 - Loss ContrastiveLoss - Epochs 2
Elapsed: 52.470s
Epoch [1/[2]], Loss: 0.620
Metrics: Accuracy=0.4902, Precision=0.1266, Recall=0.3547, F1=0.1867, average_precision=0.0679


TypeError: unsupported operand type(s) for ** or pow(): 'int' and 'dict'

In [94]:
deep_learning_model_reports

[]

In [None]:
dl_reports_df = pd.DataFrame(deep_learning_model_reports)
dl_reports_df.to_parquet("data/deep_learning_model_reports.parquet", index=False)
dl_reports_df