In [96]:
from warnings import filterwarnings
import torch
import mlflow
import time
import mlflow.sklearn
import numpy as np
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from torchmetrics.classification import MulticlassAveragePrecision
from torch import tensor
filterwarnings("ignore")


In [41]:
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

## Data Analysis

In [3]:
train_df = pd.read_parquet("data/joined_features_all.parquet")

In [117]:
train_df.shape

(1580470, 13)

In [4]:
train_df.sample(5)

Unnamed: 0,id,landmark_id,image_path,width,height,aspect_ratio,mean_rgb,mean_r,mean_g,mean_b,x,y,local_binary_pattern
920607,3b8c42b99264d3b4,119094,3b8c42b99264d3b4.jpg,792,792,1.0,[114.08810325 144.88730742 191.58947748],114.088103,144.887307,191.589477,-30.021135,-23.72884,"[0.022301614631160087,0.013426563615957555,0.0..."
1037891,2064f83d3c592c6a,133893,2064f83d3c592c6a.jpg,800,533,1.500938,[70.37304174 59.18637664 43.84110694],70.373042,59.186377,43.841107,11.387449,8.295972,"[0.011930112570356474,0.009090056285178236,0.0..."
351151,2ed2bc8bfd135415,44508,2ed2bc8bfd135415.jpg,800,529,1.512287,[117.92771267 131.52782845 130.10267722],117.927713,131.527828,130.102677,-62.134388,32.012714,"[0.04900756143667297,0.021512287334593573,0.02..."
805421,6ba23481fb2b1b6d,103956,6ba23481fb2b1b6d.jpg,800,600,1.333333,[ 94.71058542 128.06776667 147.02257292],94.710585,128.067767,147.022573,-83.724937,4.614239,"[0.03969375,0.024647916666666665,0.01839791666..."
1089456,4b94b041a1f1d809,139706,4b94b041a1f1d809.jpg,800,531,1.506591,[109.51996469 107.6682533 76.30091102],109.519965,107.668253,76.300911,-37.666107,13.911551,"[0.05657485875706215,0.03700329566854991,0.022..."


In [4]:
# Make local_binary_pattern entries numpy ndarrays
def to_ndarray(v):
    s = v.strip().strip('[]')
    parts = s.replace(',', ' ').split()
    return [float(x) for x in parts]

In [5]:
train_df['local_binary_pattern'] = train_df['local_binary_pattern'].apply(to_ndarray)

In [6]:
train_df['landmark_id'] = train_df['landmark_id'].astype(str)

In [119]:
landmark_counts = train_df['landmark_id'].value_counts()
landmark_counts.head()

landmark_id
138982    6272
126637    2231
20409     1758
83144     1741
113209    1135
Name: count, dtype: int64

In [120]:
unique_landmarks = train_df['landmark_id'].unique()
len(unique_landmarks)

81313

There are 81313 unique landmarks in the dataset.

In [121]:
top25 = landmark_counts.head(25)
fig = px.bar(
    x=top25.index.astype(str),
    y=top25.values,
    labels={'x': 'Landmark ID', 'y': 'Count'},
    title='Top 25 Landmark IDs by Count: 138982 has 3 times as much as next highest landmark'
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()
fig.write_image('images/landmark_count.png', height=500, width=1200)

The fact that landmark 138982 has 3 times as many images as the next highest landmark is concerning. We should keep an eye on that and may need to reduce the number of samples from that landmark id.

In [None]:
fig = px.histogram(
    train_df,
    x='aspect_ratio',
    nbins=20,
    labels={'aspect_ratio': 'Aspect Ratio', 'count': 'Count'},
    title='Aspect ratio (proportions) is similar for most images'
)
fig.update_layout(height=500, width=1200)
fig.show()
fig.write_image('images/aspect_ratio_histogram.png', height=500, width=1200)

In [None]:

fig = make_subplots(
    rows=1,
    cols=3,
    subplot_titles=("Mean R vs Mean G", "Mean G vs Mean B", "Mean R vs Mean B")
)

trace_rg = px.scatter(train_df, x='mean_r', y='mean_g').data[0]
trace_gb = px.scatter(train_df, x='mean_g', y='mean_b').data[0]
trace_rb = px.scatter(train_df, x='mean_r', y='mean_b').data[0]

fig.add_trace(trace_rg, row=1, col=1)
fig.add_trace(trace_gb, row=1, col=2)
fig.add_trace(trace_rb, row=1, col=3)

fig.update_xaxes(title_text="Mean R", row=1, col=1)
fig.update_yaxes(title_text="Mean G", row=1, col=1)
fig.update_xaxes(title_text="Mean G", row=1, col=2)
fig.update_yaxes(title_text="Mean B", row=1, col=2)
fig.update_xaxes(title_text="Mean R", row=1, col=3)
fig.update_yaxes(title_text="Mean B", row=1, col=3)

fig.update_layout(
    title_text="Pairwise relationships among Mean R, Mean G, and Mean B across all images. Looks noisy at first",
)

fig.show()
fig.write_image('images/color_channel_pairwise_all_landmarks.png', height=500, width=1200)


In [123]:
selected_ids = unique_landmarks[:5]
five_landmarks_df = train_df[train_df['landmark_id'].isin(selected_ids)].copy()

These are the landmarks selected by the above code. Since we are dealing with over 80,000 unique landmarks and over 1.5 million images, I will limit our analysis to a subset of landmarks.
- Landmark 1  
![Landmark 1](data/train/1/7/6/17660ef415d37059.jpg)

- Landmark 7  
![Landmark 7](data/train/2/8/b/28b13f94a6f1f3c1.jpg)

- Landmark 9  
![Landmark 9](data/train/0/1/9/0193b65bb58d2c77.jpg)

- Landmark 11  
![Landmark 11](data/train/1/a/6/1a6cb1deed46bb17.jpg)

- Landmark 12  
![Landmark 12](data/train/1/4/9/1492a5d344495391.jpg)


In [None]:
fig = make_subplots(
    rows=1,
    cols=3,
    subplot_titles=("Mean R vs Mean G", "Mean G vs Mean B", "Mean R vs Mean B")
)

fig_rg = px.scatter(five_landmarks_df, x='mean_r', y='mean_g', color='landmark_id')
for tr in fig_rg.data:
    fig.add_trace(tr, row=1, col=1)

fig_gb = px.scatter(five_landmarks_df, x='mean_g', y='mean_b', color='landmark_id')
for tr in fig_gb.data:
    tr.showlegend = False
    fig.add_trace(tr, row=1, col=2)

fig_rb = px.scatter(five_landmarks_df, x='mean_r', y='mean_b', color='landmark_id')
for tr in fig_rb.data:
    tr.showlegend = False
    fig.add_trace(tr, row=1, col=3)

fig.update_xaxes(title_text="Mean R", row=1, col=1)
fig.update_yaxes(title_text="Mean G", row=1, col=1)
fig.update_xaxes(title_text="Mean G", row=1, col=2)
fig.update_yaxes(title_text="Mean B", row=1, col=2)
fig.update_xaxes(title_text="Mean R", row=1, col=3)
fig.update_yaxes(title_text="Mean B", row=1, col=3)

fig.update_layout(
    title_text="Color channels across 5 landmarks. Indicates color channels could be leverage for landmark identification",
)

fig.show()
fig.write_image('images/color_channel_pairwise_5_landmarks.png', height=500, width=1200)


In [None]:

fig = px.scatter_3d(
    five_landmarks_df,
    x='mean_r',
    y='mean_g',
    z='mean_b',
    color='landmark_id',
    title='3D Scatter of mean RGB for 5 Landmark IDs'
)
fig.update_layout(width=1200, height=900)
fig.show()


In [124]:
def mean_histogram(arrs):
    stacked = np.vstack(arrs)
    return stacked.mean(axis=0)

In [125]:
# Plot the Mean Local Binary Pattern histogram
lbp_df = five_landmarks_df.dropna(subset=['local_binary_pattern']).copy()

first_hist = lbp_df.iloc[0]['local_binary_pattern']
n_bins = len(first_hist)
# Get histogram mean grouped by landmark
mean_hists = lbp_df.groupby('landmark_id', observed=True)['local_binary_pattern'].apply(mean_histogram)

records = []
for landmark_id, hist in mean_hists.items():
    for i, v in enumerate(hist):
        records.append({'landmark_id': landmark_id, 'bin': i, 'value': float(v)})
plot_df = pd.DataFrame(records)

fig = px.line(
    plot_df,
    x='bin',
    y='value',
    color='landmark_id',
    markers=True,
    labels={'bin': 'Local Binary Pattern Bin', 'value': 'Mean Frequency', 'landmark_id': 'Landmark ID'},
    title='Mean Local Binary Pattern Histogram per Landmark. Bin 12 & 13 shows differences in texture'
)
fig.update_layout(width=1200, height=700)
fig.show()
fig.write_image('images/lbp_mean_histogram_5_landmarks.png', width=1200, height=700)


Local Binary Patterns is a texture descriptor. It does this by comparing a pixel to its neighboring pixels. It captures the intensity of each pixel and compares their intensities. It will assign 1 if the neighboring pixel's intensity is greater than the pixel currently being assessed. Otherwise, LBP will assign 0. The algorithm then combines the binary values of all the neighboring pixels to create a value for the pixel being assessed. It does this for all the pixels in the image to create a binary code representing the texture of the image.

In [None]:
fig = px.scatter(
    five_landmarks_df,
    x='x',
    y='y',
    color='landmark_id',
    labels={'x': 'Embedding 2d X', 'y': 'Embedding 2d Y', 'landmark_id': 'Landmark ID'},
    title='2d Embedding shows promise in clustering images by embeddings'
)
fig.update_layout(width=1200, height=700)
fig.show()
fig.write_image('images/embedding_2d_scatter_5_landmarks.png', width=1200, height=700)

In [None]:
fig = px.scatter(
    train_df[train_df['landmark_id'].isin(unique_landmarks[:25])],
    x='x',
    y='y',
    color='landmark_id',
    labels={'x': 'Embedding 2d X', 'y': 'Embedding 2d Y', 'landmark_id': 'Landmark ID'},
    title='2d Embedding shows promise in clustering images by embeddings 25 landmarks'
)
fig.update_layout(width=1200, height=700)
fig.show()
fig.write_image('images/embedding_2d_scatter_25_landmarks.png', width=1200, height=700)

A 2D embedding squashes an image down to just two numbers: X and Y. Each image becomes a single point on a 2D plane. I used t-SNE to achieve this, which, like principal component analysis, reduces dimensionality. We take the full embedding of an image and convert that full embedding down to two features.


### Modeling

In [71]:
mlflow.set_experiment("Landmark Recognition")

2025/09/01 19:29:27 INFO mlflow.tracking.fluent: Experiment with name 'Landmark Recognition' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///mnt/c/Users/Matt/workspace/landmarks/mlruns/745548148810810489', creation_time=1756780167672, experiment_id='745548148810810489', last_update_time=1756780167672, lifecycle_stage='active', name='Landmark Recognition', tags={}>

In [72]:
# Expand local_binary_pattern into separate columns for modeling
lbp_expanded = pd.DataFrame(train_df['local_binary_pattern'].tolist(), index=train_df.index)
lbp_expanded.columns = [f'lbp_{i}' for i in range(lbp_expanded.shape[1])]

train_df_expanded_lbp = pd.concat([train_df.drop(columns=['local_binary_pattern']), lbp_expanded], axis=1)

In [73]:
train_df_expanded_lbp

Unnamed: 0,id,landmark_id,image_path,width,height,aspect_ratio,mean_rgb,mean_r,mean_g,mean_b,...,lbp_16,lbp_17,lbp_18,lbp_19,lbp_20,lbp_21,lbp_22,lbp_23,lbp_24,lbp_25
0,17660ef415d37059,1,17660ef415d37059.jpg,533,800,0.666250,[126.00759381 119.0244606 113.26428471],126.007594,119.024461,113.264285,...,0.007265,0.019275,0.009409,0.010033,0.011091,0.015943,0.022183,0.029939,0.203417,0.433513
1,92b6290d571448f6,1,92b6290d571448f6.jpg,534,800,0.667500,[97.5096559 93.19100421 86.4288764 ],97.509656,93.191004,86.428876,...,0.011941,0.013977,0.011393,0.013102,0.015047,0.020524,0.025602,0.027809,0.071444,0.455536
2,cd41bf948edc0340,1,cd41bf948edc0340.jpg,800,512,1.562500,[89.43367188 83.05516602 74.40758057],89.433672,83.055166,74.407581,...,0.010195,0.014504,0.009797,0.010339,0.011846,0.017148,0.024343,0.031929,0.086533,0.488169
3,fb09f1e98c6d2f70,1,fb09f1e98c6d2f70.jpg,532,800,0.665000,[107.91263863 106.76824483 109.26745771],107.912639,106.768245,109.267458,...,0.009030,0.024250,0.011074,0.011675,0.012620,0.017000,0.021339,0.027850,0.202655,0.404803
4,25c9dfc7ea69838d,7,25c9dfc7ea69838d.jpg,800,600,1.333333,[132.4216875 137.05765 144.99947083],132.421687,137.057650,144.999471,...,0.014794,0.028979,0.013806,0.013513,0.014046,0.017329,0.021167,0.024594,0.101777,0.371237
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1580465,72c3b1c367e3d559,203092,72c3b1c367e3d559.jpg,800,533,1.500938,[128.88117026 127.38813086 116.54751407],128.881170,127.388131,116.547514,...,0.011027,0.018539,0.010720,0.011100,0.011914,0.016867,0.023136,0.030579,0.114010,0.454266
1580466,7a6a2d9ea92684a6,203092,7a6a2d9ea92684a6.jpg,800,532,1.503759,[127.55681156 124.83408835 114.69496711],127.556812,124.834088,114.694967,...,0.011494,0.024232,0.012446,0.012303,0.012761,0.016833,0.022000,0.027133,0.159159,0.409908
1580467,9401fad4c497e1f9,203092,9401fad4c497e1f9.jpg,800,533,1.500938,[129.93018058 130.93818246 127.94207083],129.930181,130.938182,127.942071,...,0.008391,0.025000,0.010544,0.010973,0.011515,0.015701,0.020115,0.024430,0.290068,0.353084
1580468,aacc960c9a228b5f,203092,aacc960c9a228b5f.jpg,800,533,1.500938,[142.47123358 142.97529784 136.52950516],142.471234,142.975298,136.529505,...,0.013895,0.037812,0.016930,0.015894,0.014125,0.017326,0.019805,0.022676,0.239557,0.320035


In [74]:
# earlier training shows that my machine cant train such a large model.
unique_landmarks_to_train_on = 100
rng = np.random.default_rng(42)
selected_landmark_ids = rng.choice(train_df_expanded_lbp['landmark_id'], size=unique_landmarks_to_train_on, replace=False)
subset_train_df = train_df_expanded_lbp[train_df_expanded_lbp['landmark_id'].isin(selected_landmark_ids)]

In [75]:
subset_train_df

Unnamed: 0,id,landmark_id,image_path,width,height,aspect_ratio,mean_rgb,mean_r,mean_g,mean_b,...,lbp_16,lbp_17,lbp_18,lbp_19,lbp_20,lbp_21,lbp_22,lbp_23,lbp_24,lbp_25
69217,009de86d7dc3e14a,8954,009de86d7dc3e14a.jpg,800,523,1.529637,[104.91198136 112.79541348 105.32609943],104.911981,112.795413,105.326099,...,0.009814,0.009771,0.009338,0.010569,0.012335,0.016895,0.024498,0.034739,0.064388,0.519601
69218,017448f0b8e77ddc,8954,017448f0b8e77ddc.jpg,800,534,1.498127,[ 98.81215356 102.82689373 90.45805946],98.812154,102.826894,90.458059,...,0.012231,0.011859,0.010808,0.012666,0.014431,0.019199,0.024658,0.028996,0.058272,0.470012
69219,109b795f098c5bbd,8954,109b795f098c5bbd.jpg,800,533,1.500938,[128.91893762 123.895 119.82210131],128.918938,123.895000,119.822101,...,0.011714,0.013466,0.010373,0.010530,0.012181,0.015997,0.021595,0.027470,0.133379,0.414583
69220,14b63e3cea48bfcf,8954,14b63e3cea48bfcf.jpg,600,800,0.750000,[126.03130208 116.35970417 121.84090208],126.031302,116.359704,121.840902,...,0.014102,0.014225,0.011806,0.013008,0.014965,0.019171,0.023902,0.026621,0.064933,0.430696
69221,19f10ba1d79f0a02,8954,19f10ba1d79f0a02.jpg,800,616,1.298701,[121.42228693 112.61638799 73.71635755],121.422287,112.616388,73.716358,...,0.011550,0.011477,0.010733,0.011741,0.013486,0.017914,0.024367,0.031532,0.059184,0.478669
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1541859,e203710bff9e49c6,198124,e203710bff9e49c6.jpg,800,600,1.333333,[102.36249583 114.3944375 117.27861667],102.362496,114.394437,117.278617,...,0.017592,0.019119,0.012827,0.012544,0.012006,0.013583,0.015902,0.025487,0.081102,0.369904
1541860,e6244a1f672a393d,198124,e6244a1f672a393d.jpg,800,600,1.333333,[108.10469375 82.18396458 58.87403958],108.104694,82.183965,58.874040,...,0.022371,0.030585,0.019621,0.018537,0.018156,0.019306,0.018738,0.016121,0.128194,0.270435
1541861,e7c991f4c13a226f,198124,e7c991f4c13a226f.jpg,800,450,1.777778,[48.32043889 40.78554167 35.662225 ],48.320439,40.785542,35.662225,...,0.013656,0.030708,0.015628,0.016136,0.015975,0.019492,0.021747,0.022492,0.249972,0.283503
1541862,f1539d821eb64840,198124,f1539d821eb64840.jpg,640,480,1.333333,[139.96185872 139.72179036 144.7515332 ],139.961859,139.721790,144.751533,...,0.012298,0.017318,0.011569,0.011107,0.012083,0.016624,0.022894,0.031501,0.114652,0.441559


In [91]:
# Use subset of landmarks to train
X = subset_train_df.drop(['landmark_id', 'id', 'image_path', 'mean_rgb'], axis=1)
y = subset_train_df['landmark_id'].astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Encode labels to ensure they are 0-based consecutive integers
le = LabelEncoder()
# Fit LabelEncoder on all possible classes for Average precision score
le.fit(y)
y_train_encoded = le.transform(y_train)
y_test_encoded = le.transform(y_test)
tensor_target = torch.tensor(y_test_encoded, dtype=torch.long)

In [97]:
sklearn_models = [
    "Random Forest",
    "Logistic Regression",
    "SVC",
    "Gradient Boosting",
]

models = [
    (
        "Random Forest",
         RandomForestClassifier(n_estimators=100, max_depth=7, n_jobs=-1, random_state=42),
         (X_train, y_train),
         (X_test, y_test),
         (y_train_encoded, y_test_encoded),
    ),
    (
        "Logistic Regression",
         LogisticRegression(random_state=42),
         (X_train, y_train),
         (X_test, y_test),
         (y_train_encoded, y_test_encoded),

    ),
    (
        "SVC",
         SVC(probability=True, random_state=42),
         (X_train, y_train),
         (X_test, y_test),
         (y_train_encoded, y_test_encoded),

    ),
    (
        "Gradient Boosting",
         GradientBoostingClassifier(n_estimators=50, max_depth=1,  random_state=42),
         (X_train, y_train),
         (X_test, y_test),
         (y_train_encoded, y_test_encoded),
    ),
]

In [98]:
reports = []

In [99]:
for model_name, model, train, test, y_encoded in models:
    X_train, y_train = train
    X_test, y_test = test
    y_train_encoded, y_test_encoded = y_encoded
    tensor_target = tensor(y_test_encoded, dtype=torch.long)

    print(f"Model: {model_name}")
    start = time.perf_counter()
    model.fit(X_train, y_train)
    elapsed = time.perf_counter() - start
    print(f"Elapsed: {elapsed:.3f}s")

    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)

    report = classification_report(y_test_encoded, le.transform(y_pred), output_dict=True)
    metric = MulticlassAveragePrecision(
        num_classes=len(y_pred_proba[0]),
        average="macro"
    )
    tensor_pred = tensor(y_pred_proba)
    average_precision = metric(tensor_pred, tensor_target)
    print("Average precision:", average_precision)
    reports.append((report, average_precision))


Model: Random Forest
Elapsed: 0.409s
Average precision: tensor(0.3140)
Model: Logistic Regression
Elapsed: 1.123s
Average precision: tensor(0.0786)
Model: SVC
Elapsed: 17.820s
Average precision: tensor(0.1536)
Model: Gradient Boosting
Elapsed: 154.972s
Average precision: tensor(0.2627)


In [101]:
reports[0]

({'0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 11.0},
  '2': {'precision': 0.8,
   'recall': 0.09523809523809523,
   'f1-score': 0.1702127659574468,
   'support': 42.0},
  '3': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 15.0},
  '4': {'precision': 0.5,
   'recall': 0.1,
   'f1-score': 0.16666666666666666,
   'support': 20.0},
  '5': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 12.0},
  '6': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 2.0},
  '7': {'precision': 1.0,
   'recall': 0.13043478260869565,
   'f1-score': 0.23076923076923078,
   'support': 23.0},
  '8': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1.0},
  '9': {'precision': 0.2857142857142857,
   'recall': 0.10526315789473684,
   'f1-score': 0.15384615384615385,
   'support': 19.0},
  '10': {'precision': 1.0,
   'recall': 0.07142857142857142,
   'f1-score': 0.13333333333333333,
   'support': 28.0},
  '11': {'precision': 0.0, 'recall': 0

In [106]:
for i, model_item in enumerate(models):
    model_name = model_item[0]
    model = model_item[1]
    X_train, _ = model_item[2]
    report, average_precision = reports[i]
    with mlflow.start_run(run_name=model_name):
        mlflow.log_params(model.get_params())
        mlflow.log_metric('accuracy', report['accuracy'])
        mlflow.log_metric('average_precision', average_precision)
        mlflow.log_metric('macro_f1', report['macro avg']['f1-score'])
        mlflow.log_metric('macro_recall', report['macro avg']['recall'])
        mlflow.log_metric('macro_precision', report['macro avg']['precision'])
        mlflow.log_metric('weighted_f1', report['weighted avg']['f1-score'])
        mlflow.log_metric('weighted_recall', report['weighted avg']['recall'])
        mlflow.log_metric('weighted_precision', report['weighted avg']['precision'])
        if model_name in sklearn_models:
            mlflow.sklearn.log_model(model, name=model_name, input_example=X_train.sample(1))

## Model Evaluation

In [114]:
# Build comparison DataFrame from `reports` and `models`

def get_metrics(report, average_precision, model_name):
    return [
        {
            'metric': 'accuracy',
            'value': report['accuracy'],
            'model_name': model_name,
        },
        {
            'metric': 'average_precision',
            'value': float(average_precision),
            'model_name': model_name,
        },
        {
            'metric': 'macro_f1',
            'value': report['macro avg']['f1-score'],
            'model_name': model_name,
        },
        {
            'metric': 'macro_recall',
            'value': report['macro avg']['recall'],
            'model_name': model_name,
        },
        {
            'metric': 'macro_precision',
            'value': report['macro avg']['precision'],
            'model_name': model_name,
        },
        {
            'metric': 'weighted_f1',
            'value': report['weighted avg']['f1-score'],
            'model_name': model_name,
        },
        {
            'metric': 'weighted_recall',
            'value': report['weighted avg']['recall'],
            'model_name': model_name,
        },
        {
            'metric': 'weighted_precision',
            'value': report['weighted avg']['precision'],
            'model_name': model_name,
        },
    ]

metrics_records = []
for i, model_item in enumerate(models):
    model_name = model_item[0]
    report, average_precision = reports[i]
    metrics_records += get_metrics(report, average_precision, model_name)

metrics_df = pd.DataFrame(metrics_records)


In [116]:

fig = px.bar(
    metrics_df,
    x='model_name',
    y='value',
    color='metric',
    barmode='group',
    title='Model Metric Comparison: Random Forest has best mAP score',
    labels={'Value': 'Score', 'model_name': 'Model', 'metric': 'Metric'}
)
fig.update_layout(width=1200, height=600, legend_title_text='Metric')
fig.show()

fig.write_image('images/model_metrics_comparison.png', width=1200, height=600)