A notebook to:
1) Answer the question of whether MSE loss is a poor choice if our test statistic is IOU
2) Look at the actual model predictions vs targets to see if there are any obvious things we're doing wrong

In [None]:
from torch import load, from_numpy, tensor, no_grad
from torch.utils.data import DataLoader

import numpy as np
import matplotlib.pyplot as plt

from model_config import DataParams, TrainingParams, ArchitectureParams
from model_trainer import ModelTrainer
from cnn import CNN
from circle_dataset import CircleDataset
from circle_detection import show_circle, draw_circle, iou, CircleParams, count_ious_over_thresholds

## Generate Predictions

In [None]:
# Load the model 
cnn = load("models/model_0_epoch_9.pth")

In [None]:
trainer = load("models/model_trainer_0_epoch_9.pth")

In [None]:
# set up some validation data
validation_dataset = CircleDataset(10000,.5)
val_loader = DataLoader(validation_dataset, batch_size=64, shuffle=False)

In [None]:
# run the model and get predictions
cnn.model.eval()

all_preds = []
all_targets = []
with no_grad():
    for batch_idx, (data, targets) in enumerate(val_loader):        
        outputs = cnn.model(data)
        all_preds.extend(outputs.numpy())
        all_targets.extend(targets.numpy())

all_preds = np.array(all_preds)
all_targets = np.array(all_targets)


## Analyze Correlation Data
Will MSE loss work if our test statistic is IOU?

In [None]:
# check correlation between MSE and IOU 
mse_scores = []
coord_mses = []
radius_mses = []
iou_scores = []

preds_tensor = tensor(all_preds)
target_tensor = tensor(all_targets)

# for all data
for idx, _ in enumerate(preds_tensor):
    current_iou = iou(CircleParams(*target_tensor[idx]), CircleParams(*preds_tensor[idx]))
    current_mse = np.mean(np.square(all_preds[idx] - all_targets[idx]))
    coord_mse = np.mean(np.square(all_preds[idx][0:2] - all_targets[idx][0:2]))
    radius_mse = np.mean(np.square(all_preds[idx][2:] - all_targets[idx][2:]))


    mse_scores.append(current_mse)
    coord_mses.append(coord_mse)
    radius_mses.append(radius_mse)
    iou_scores.append(current_iou)

correlation = np.corrcoef(mse_scores, iou_scores)[0,1]
print("\nCorrelation between MSE and IOU: ", correlation)
coord_correlation = np.corrcoef(coord_mses, iou_scores)[0,1]
print("\nCorrelation between Coordinate MSE and IOU: ", coord_correlation)
radius_correlation = np.corrcoef(radius_mses, iou_scores)[0,1]
print("\nCorrelation between Radius MSE and IOU: ", radius_correlation)

plt.figure(figsize=(10, 6))
plt.scatter(mse_scores, iou_scores, alpha=0.5)
plt.grid(True, alpha=0.3)
plt.xlabel('Mean Square Error (MSE)')
plt.ylabel('Intersection over Union (IOU)')
print("Avg IOU: ", np.average(iou_scores))
plt.title('MSE vs IOU Correlation Plot')


# for data where loss is looking and bad - if we train the model well, will we expect high IOU?
mask = np.array(mse_scores) > 50
# mask = np.array(mse_scores) < 50
filtered_iou = np.array(iou_scores)[mask]
print("Avg IOU | MSE < 50: ", np.average(filtered_iou))


for mse, mse_str in zip([mse_scores, coord_mses, radius_mses],["Overall MSE", "Coordinate MSE", "Radius MSE"]):
    filtered_mse = np.array(mse)[mask]
    
    
    correlation = np.corrcoef(filtered_mse, filtered_iou)[0,1]
    print(f"\nCorrelation between {mse_str} and IOU | Overall MSE < 50: ", correlation)

    plt.figure(figsize=(10, 6))
    plt.scatter(filtered_mse, filtered_iou, alpha=0.5)
    plt.grid(True, alpha=0.3)
    plt.xlabel(f'{mse_str}')
    plt.ylabel('Intersection over Union (IOU)')
    plt.title(f'{mse_str} vs IOU Correlation Plot | Overall MSE < 50')
    
    plt.tight_layout()
    plt.show()

In [None]:
# look at the MSE distribution
plt.hist(mse_scores, bins=30, range=(0,20), alpha=0.5, label='Array 1')
mse_scores = np.array(mse_scores)
thresholds = [1, 2, 3, 5, 10]
for threshold in thresholds:
    percent_below = (mse_scores <= threshold).mean() * 100
    print(f"{threshold:.2f} or lower: {percent_below:.2f}%")
print(np.percentile(mse_scores, [25, 50, 75, 85, 90]))
plt.show()

In [None]:
# look at the IOU score distribution
plt.hist(iou_scores, bins=30, range=(0,1), alpha=0.5, label='Array 1')
iou_scores = np.array(iou_scores)
thresholds = [.1, 0.25, 0.5, 0.75, .9, .95]
for threshold in thresholds:
    percent_above = (iou_scores > threshold).mean() * 100
    n_above = (iou_scores > threshold).sum()

    print(f"{threshold:.2f} or higher: {percent_above:.2f}%, {n_above}")
plt.show()

- The (-)IOU and MSE are correlated, but not as tightly as I'd like.
- When the loss is lower, they are more correlated
- The radius MSE overall is more correlated with the IOU than the coordinate MSE, which suggests that if you were to weight the loss we could get a more accurate IOU - but when I look at subgroups it seems like this trend may be more about outliers and not a good thing to pick up on
- Overall, the data suggest that with MSE loss we can probably get pretty accurate (say something like > .9 IOU on average), but that we'll struggle to get extremely accurate (>.95 on average)
- I think that's reasonable for this task, so I'll stick to MSE loss
- If we cared about that last little bit, we'd need to change our loss function - say to something that approximates the GIOU but for circles

## Look through data examples
Is there anything obvious happening that we can make better with a different loss or diff training data?

In [None]:
# Look at some examples to make sure there's no obvious fix (e.g. radius isn't getting weighed enough, etc.)

all_preds_int = np.round(np.array(all_preds)).astype(int)
all_targets_int = np.round(np.array(all_targets)).astype(int)

for idx in range(12):
    fig, ax = plt.subplots(figsize=(8, 8))
    current_iou = iou(CircleParams(*all_targets[idx]), CircleParams(*all_preds[idx]))
    current_mse = np.mean(np.square(all_preds[idx] - all_targets[idx]))

    print("IOU: ",current_iou)
    print("Target: ", all_targets[idx])
    print("Pred: ", all_preds[idx])
    print("MSE: ", current_mse)
    
    # Draw target circle
    target_img = draw_circle(np.zeros((100, 100)), *all_targets_int[idx])
    ax.imshow(target_img, cmap="Blues", alpha=0.5)  
    
    # Draw prediction circle in a different color
    pred_img = draw_circle(np.zeros((100, 100)), *all_preds_int[idx])
    ax.imshow(pred_img, cmap="Reds", alpha=0.5)
    
    ax.set_title(f"Circle {idx} - Target (blue) vs Prediction (red)")
    plt.show()

In [None]:
# Look at some examples where the model is performing poorly

# bad_mask = (np.array(iou_scores) < .7) & (np.array(mse_scores) < 10)

bad_mask = np.array(mse_scores) > 150
# bad_mask = np.array(mse_scores) < 10 # look at some good ones
# bad_mask = np.array(iou_scores) < .1
# bad_mask = np.array(iou_scores) > .9 # some good IOUs as well

all_preds_bad = np.round(np.array(all_preds)[bad_mask]).astype(int)
all_targets_bad = np.round(np.array(all_targets)[bad_mask]).astype(int)

for idx in range(12):
    fig, ax = plt.subplots(figsize=(8, 8))
    current_iou = iou(CircleParams(*all_targets_bad[idx]), CircleParams(*all_preds_bad[idx]))
    current_mse = np.mean(np.square(all_preds_bad[idx] - all_targets_bad[idx]))

    print("IOU: ",current_iou)
    print("Target: ", all_targets_bad[idx])
    print("Pred: ", all_preds_bad[idx])
    print("MSE: ", current_mse)
    
    # Draw target circle
    target_img = draw_circle(np.zeros((100, 100)), *all_targets_bad[idx])
    ax.imshow(target_img, cmap="Blues", alpha=0.5)  
    
    # Draw prediction circle in a different color
    pred_img = draw_circle(np.zeros((100, 100)), *all_preds_bad[idx])
    ax.imshow(pred_img, cmap="Reds", alpha=0.5)
    
    ax.set_title(f"Circle {idx} - Target (blue) vs Prediction (red)")
    plt.show()

- It's not obvious having explored things that there's anything regularly off - just seems like sometimes the model spits out nonsense
- The cases where the model performs well vs not aren't clearly different to the naked eye
- There isn't a clear pattern to how the model is off (e.g. more on coordinates, more on radius, etc.)

## Select and test the final model

In [None]:
# model 1 is continued training from model 0
trainer = load("models/model_trainer_1_epoch_3.pth")

In [None]:
epochs = range(1, len(trainer.validation_losses) + 1) 

In [None]:
plt.plot(epochs, trainer.validation_losses, label="Validation Loss")
plt.plot(epochs, trainer.training_losses, label="Training Loss")
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid(True)
plt.legend()
plt.title("Model Loss through Training")
plt.show()

Based on this, performance flattens out around epoch 6, so we'll use that as our final model

In [None]:
cnn = load("models/model_0_epoch_6.pth")
cnn_trainer = load("models/model_trainer_0_epoch_6.pth")

In [None]:
# all this does is load a fresh dataset and run validation - so we can use it as our test set
cnn_trainer.validate_model()