In [None]:
import pathlib
import os
import sys
from pathlib import Path
import tarfile
import random

import numpy as np

import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from torchvision import transforms as T

from PIL import Image
from tqdm import tqdm

import matplotlib.pyplot as plt
import seaborn as sns

torch.backends.cudnn.benchmark = True

In [None]:
torch.__version__

'2.8.0+cu126'

In [None]:
modeltype = 'mobilenet'

ds = 'sick_ones_bendbias_v3_2class_normal'
eval_ds = 'sick_ones_bendbias_v3_2class_variation'

random_state = 42

In [None]:
from google.colab import drive
drive.mount('/content/drive')

MessageError: Error: credential propagation was unsuccessful

In [None]:
relative_model_path = "two4two_sickones_models_pytorch"
base_path = Path('/content/drive/MyDrive') / relative_model_path
base_path

PosixPath('/content/drive/MyDrive/two4two_sickones_models_pytorch')

In [None]:
# data downloading and dataset utilities

def download_file(url, file_name, cache_dir="data", extract=True, force_download=False, archive_folder=None):
    # Ensure the cache directory exists
    os.makedirs(cache_dir, exist_ok=True)
    file_path = os.path.join(cache_dir, file_name)

    # Download the file
    if not os.path.exists(file_path) or force_download:
      torch.hub.download_url_to_file(url, file_path)
      print(f"File downloaded to: {file_path}")
    else:
      print(f"File already exists at: {file_path}")

    if extract:
      with tarfile.open(file_path, "r:gz") as tar:
          tar.extractall(path=cache_dir)
      print(f"File extracted to: {cache_dir}")
      return Path(cache_dir) / archive_folder if archive_folder is not None else Path(cache_dir)
    elif archive_folder is not None and (Path(cache_dir) / archive_folder).exsists:
      return Path(cache_dir) / archive_folder
    else:
      return Path(cache_dir)

    return Path(file_path)

def load_dataframe(data_dir, dataset):
  data_dir = data_dir / dataset
  df = pd.read_json(data_dir / 'parameters.jsonl', lines=True)
  df['filename'] = df['id'] + '.png'
  #df['ill'] = df['ill'].astype(int).astype(str)

  return df

class ImageDataset(Dataset):
    def __init__(self, df, data_dir, transform=None, target_columns=None):
        self.df = df
        self.data_dir = data_dir
        self.transform = transform
        self.target_columns = target_columns if target_columns is not None else ['spherical', 'ill_spherical', 'bending', 'arm_position']


    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = os.path.join(self.data_dir, self.df.iloc[idx]['filename'])
        image = Image.open(img_path).convert('RGB')
        # Load the specified target columns instead of just 'ill'
        targets = self.df.iloc[idx][self.target_columns].values.astype(np.float32)
        targets = torch.tensor(targets)

        if self.transform:
            image = self.transform(image)

        return image, targets

In [None]:
# download datafrom sciebo
data_dir = download_file(url="https://osf.io/download/kexzt/?view_only=adcc520b88cc4ea3b8236c5178ba3ab5",
                         file_name="blockies_datasets.tar.gz",
                         cache_dir='/content/data', # change this if not using Colab
                         extract=True,
                         force_download=False,
                         archive_folder='blockies_datasets')
data_dir

100%|██████████| 1.00G/1.00G [00:26<00:00, 41.3MB/s]
  tar.extractall(path=cache_dir)


File downloaded to: /content/data/blockies_datasets.tar.gz
File extracted to: /content/data


PosixPath('/content/data/blockies_datasets')

In [None]:
#train_df_reduced = load_dataframe(ds_dir, 'train')
train_transforms = T.Compose([
    T.ToTensor()
])

train_df_reduced_filtered = filter_existing_images(train_df_reduced, ds_dir / 'train')
val_df_reduced_filtered = filter_existing_images(val_df_reduced, ds_dir / 'validation')
test_df_reduced_filtered = filter_existing_images(test_df_reduced, ds_dir / 'test')
eval_df_reduced_filtered = filter_existing_images(eval_df_reduced, eval_ds_dir / 'test')

train_dataset_for_norm = ImageDataset(train_df_reduced, ds_dir / 'train', transform=train_transforms)
dataloader_for_norm = DataLoader(train_dataset_for_norm, batch_size=100, shuffle=True,
                        num_workers=6, pin_memory=True)

# Initialize variables to calculate mean
mean = torch.zeros(3)  # For RGB channels
total_pixels = 0

# Loop through the dataset
for images, _ in tqdm(dataloader_for_norm):
    # Sum pixel values per channel
    mean += images.sum(dim=[0, 2, 3])
    total_pixels += images.size(0) * images.size(2) * images.size(3)

# Divide by total number of pixels
mean /= total_pixels

print(f"Mean per channel: {mean}")

# Initialize variables for std calculation
std = torch.zeros(3)

# Loop again for standard deviation
for images, _ in tqdm(dataloader_for_norm):
    std += ((images - mean.view(1, 3, 1, 1))**2).sum(dim=[0, 2, 3])

std = torch.sqrt(std / total_pixels)

print(f"Standard Deviation per channel: {std}")

100%|██████████| 400/400 [00:13<00:00, 28.79it/s]


Mean per channel: tensor([0.8068, 0.7830, 0.8005])


100%|██████████| 400/400 [00:13<00:00, 28.74it/s]

Standard Deviation per channel: tensor([0.1093, 0.1136, 0.1029])





In [None]:
# Comprehensive Data Augmentation Pipeline
transform_train = T.Compose([
    # Resize and Crop
    T.RandomResizedCrop(128, scale=(0.8, 1.0)),
    # Flipping
    T.RandomHorizontalFlip(p=0.5),
    T.RandomVerticalFlip(p=0.1),
    # Rotation
    T.RandomRotation(degrees=30),
    # Affine Transformations
    T.RandomAffine(
        degrees=15,
        translate=(0.1, 0.1),
        scale=(0.8, 1.2),
        shear=10
    ),
    # Perspective Transformation
    T.RandomPerspective(distortion_scale=0.5, p=0.5),
    # Color Augmentations
    T.ColorJitter(
        brightness=0.4,
        contrast=0.4,
        saturation=0.4,
        hue=0.1
    ),
    # Grayscale
    T.RandomGrayscale(p=0.1),
    # Blur
    T.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
    T.ToTensor(),
    # Random Erasing
    T.RandomErasing(
        p=0.5,
        scale=(0.02, 0.33),
        ratio=(0.3, 3.3)
    ),
    T.Normalize(mean=mean, std=std)
])
# load datasets and dataloaders for Training and Evaluation
transform = T.Compose([
    T.ToTensor(),
    T.Normalize(mean=mean, std=std)
])

In [None]:
len(val_df_reduced), len(test_df_reduced), len(eval_df_reduced), len(train_df_reduced)

(1000, 3000, 3000, 40000)

In [None]:
# Correcting the data_dir for validation, test, and eval datasets
train_dataset = ImageDataset(train_df_reduced, ds_dir / 'train', transform=transform_train, target_columns=regression_target_columns)
print(f"Number of training samples: {len(train_dataset)}")
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True,
                              num_workers=6, pin_memory=True)

train_eval_dataset = ImageDataset(train_df_reduced, ds_dir / 'train', transform=transform, target_columns=regression_target_columns)
print(f"Number of training eval samples: {len(train_eval_dataset)}")
train_eval_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=False,
                                   num_workers=6, pin_memory=True)

val_dataset = ImageDataset(val_df_reduced,  ds_dir / 'validation', transform=transform, target_columns=regression_target_columns)
print(f"Number of validation samples: {len(val_dataset)}")
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False,
                            num_workers=6, pin_memory=True)

test_dataset = ImageDataset(test_df_reduced,  ds_dir / 'test' , transform=transform, target_columns=regression_target_columns)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False,
                             num_workers=6, pin_memory=True)

eval_dataset = ImageDataset(eval_df_reduced,  eval_ds_dir / 'test', transform=transform, target_columns=regression_target_columns)
eval_dataloader = DataLoader(eval_dataset, batch_size=32, shuffle=False,
                             num_workers=6, pin_memory=True)

Number of training samples: 40000
Number of training eval samples: 40000
Number of validation samples: 1000


In [None]:
data_ex = next(iter(train_dataloader))
data_ex[0].shape, data_ex[1].shape

(torch.Size([32, 3, 128, 128]), torch.Size([32, 4]))

Training model

In [None]:
def load_mobilenetv2_regression(num_targets, pretrained=True, checkpoint_path=None):
  """Loads a MobileNetV2 model for regression, optionally loading from a checkpoint.

  Args:
    num_targets: The number of output regression targets.
    pretrained: Whether to load the pre-trained weights.
    checkpoint_path: Path to a checkpoint file to load.

  Returns:
    A MobileNetV2 model configured for regression.
  """
  model = models.mobilenet_v2(weights=None if not pretrained else 'DEFAULT')
  model.classifier[1] = nn.Linear(model.last_channel, num_targets)

  if checkpoint_path:
    try:
        checkpoint = torch.load(checkpoint_path, map_location=torch.device('cpu')) # Load to CPU first
        model.load_state_dict(checkpoint)
        print(f"Loaded checkpoint from: {checkpoint_path}")
    except RuntimeError as e:
        print(f"Error loading checkpoint: {e}")
        print("Checkpoint might not match the model architecture. Starting training without loading checkpoint.")

  return model

In [None]:
def set_seed(seed):
  """
  Sets random seeds for reproducibility.
  """
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False

# Ensure r2_score is imported
from sklearn.metrics import r2_score

def evaluate_model(model, dataloader, criterion, device, num_targets):
    model.eval()  # Set model to evaluation mode
    running_loss = 0.0

    all_preds = []
    all_labels = []


    with torch.no_grad():  # Disable gradient calculation
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Track loss and accuracy
            running_loss += loss.item()
            all_preds.extend(outputs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    avg_loss = running_loss / len(dataloader)
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)

    # Calculate R-squared for each target
    r2_scores = []
    for i in range(num_targets):
        r2 = r2_score(all_labels[:, i], all_preds[:, i])
        r2_scores.append(r2)

    print(f"Evaluation Loss: {avg_loss:.4f}")
    for i, r2 in enumerate(r2_scores):
        print(f"R-squared for target {i}: {r2:.4f}")
    return all_preds, avg_loss, r2_scores

def train_model(model, dl_train, dl_val, criterion, optimizer, scheduler, device, checkpoint_path, num_epochs=5, num_targets=4):

  model = model.to(device)

  best_val_loss = sys.float_info.max
  best_epoch = 0

  for epoch in range(num_epochs):

    print(f"Epoch [{epoch+1}/{num_epochs}]")

    # Training phase
    model.train()

    running_train_loss = 0.0

    # Wrap the training dataloader with tqdm for progress visualization
    for inputs, labels in tqdm(dl_train, desc=f"Training Epoch {epoch+1}"):
        # print(inputs.shape, labels.shape, inputs.min(), inputs.max(), inputs.mean())
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Track training loss and accuracy
        running_train_loss += loss.item()

    train_loss = running_train_loss / len(dl_train)

    # Validation phase
    model.eval()
    running_val_loss = 0.0

    with torch.no_grad():
        for inputs, labels in tqdm(dl_val, desc=f"Validation Epoch {epoch+1}"):
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Track validation loss and accuracy
            running_val_loss += loss.item()

    val_loss = running_val_loss / len(dl_val)

    scheduler.step(val_loss)

    # Print epoch results
    print(f"\tTrain Loss: {train_loss:.4f}")
    print(f"\tValidation Loss: {val_loss:.4f}")

    # Checkpointing the best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        print(f"New best model found at epoch {epoch+1} with validation loss: {val_loss:.4f}")
        best_epoch = epoch + 1
        torch.save(model.state_dict(), checkpoint_path / 'tmp' / 'best_model.pth')  # Save only the model's state_dict

  # To load the best model later:
  model = load_mobilenetv2_regression(num_targets=num_targets,
                                        pretrained=False,
                                        checkpoint_path=checkpoint_path / 'tmp' / 'best_model.pth')
  model.to(device)

  # Evaluate the best model on the validation set
  _, val_loss, val_r2_scores = evaluate_model(model, dl_val, criterion, device, num_targets)


  print(f"Training Run complete! Val loss = {best_val_loss:.4f} | Val R-squared (avg) = {np.mean(val_r2_scores):.4f} | Epoch = {best_epoch}", )
  print("-" * 30)

  return model, val_loss, np.mean(val_r2_scores)

In [None]:
# setup model path
model_path = base_path / ds / f'{modeltype}_regression'
model_path.mkdir(parents=True, exist_ok=True)
print("Model path:", model_path)

# setup checkpoint folders
checkpoint_path = model_path / "torch_mobilenetv2/"
(checkpoint_path / 'tmp').mkdir(parents=True, exist_ok=True)
(checkpoint_path / 'final').mkdir(parents=True, exist_ok=True)

# Define loss function (using MSELoss for regression)
criterion = nn.MSELoss(reduction='sum') # Or nn.L1Loss()

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

# Define the number of regression targets
num_regression_targets = len(regression_target_columns)

Model path: /content/drive/MyDrive/two4two_sickones_models_pytorch/sick_ones_bendbias_v3_2class_normal/mobilenet_regression


In [None]:
print(f"Shape of train_df: {train_df_reduced.shape}")
print(f"Shape of val_df: {val_df_reduced.shape}")
print(f"Shape of test_df: {test_df_reduced.shape}")
print(f"Shape of eval_df: {eval_df_reduced.shape}")

Shape of train_df: (40000, 6)
Shape of val_df: (1000, 6)
Shape of test_df: (3000, 6)
Shape of eval_df: (3000, 6)


In [None]:
from pathlib import Path

def check_missing_files(df, directory):
    missing = []
    for fname in df['filename']:
        if not (Path(directory) / fname).exists():
            missing.append(fname)
    print(f"Missing files: {len(missing)}")
    if missing:
        print("Examples of missing files:", missing[:5])
    return missing

# Run this for all datasets
missing_val = check_missing_files(val_df_reduced, ds_dir / 'validation')
missing_train = check_missing_files(train_df_reduced, ds_dir / 'train')
missing_test = check_missing_files(test_df_reduced, ds_dir / 'test')
missing_eval = check_missing_files(eval_df_reduced, eval_ds_dir / 'test')


Missing files: 0
Missing files: 0
Missing files: 0
Missing files: 0


In [None]:
# run training
n_runs = 1
n_epochs = 50

load_checkpoints = False
learning_rate = 0.001

best_val_loss = sys.float_info.max
for i in range(n_runs):

  set_seed(42 + i)

  print(f"Run {i+1} / {n_runs}")
  print("=" * 30)

  #if i > 0:
   # print('loading previous checkpoint with augmentation')
    #load_checkpoints = True

  #if i >= 0:
    # load previous checkpoint and train without augmentation
   # print('Loading previous checkpoint and training with out augmentation')
    #train_dataset = ImageDataset(train_df, ds_dir / 'train', transform=transform)
    #train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True,
                                 # num_workers=6, pin_memory=True)


  # Load the regression model
  model = load_mobilenetv2_regression(num_regression_targets, pretrained=False, checkpoint_path= checkpoint_path / 'final' / 'best_model.pth' if load_checkpoints else None)
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.001, amsgrad=True)

  # Scheduler
  scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.4, patience=4,
                                                          threshold=0.01, threshold_mode='abs')

 # Train the regression model
  model, val_loss, val_avg_r2 = train_model(model,
                                         train_dataloader, val_dataloader,
                                         criterion, optimizer, scheduler,
                                         device, checkpoint_path,
                                         num_epochs=n_epochs,
                                         num_targets=num_regression_targets)

  # Checkpointing the best model
  if val_loss < best_val_loss:
      best_val_loss = val_loss
      print(f"New best model found at Run {i+1} with validation loss: {val_loss:.4f}")
      torch.save(model.state_dict(), checkpoint_path / 'final' / 'best_model.pth')  # Save only the model's state_dict
  print()

# Load best model:
model = load_mobilenetv2_regression(num_targets=num_regression_targets,
                         pretrained=False,
                         checkpoint_path=checkpoint_path / 'final' / 'best_model.pth')
model.to(device)

# Evaluate the final best model on the validation set
_, val_loss, val_r2_scores = evaluate_model(model, val_dataloader, criterion, device, num_regression_targets)

print(f"Training complete! Final Val loss = {val_loss:.4f} | Final Val R-squared (avg) = {np.mean(val_r2_scores):.4f}", )
print("-" * 30)

Run 1 / 1
Epoch [1/50]


Training Epoch 1: 100%|██████████| 1250/1250 [00:58<00:00, 21.43it/s]
Validation Epoch 1: 100%|██████████| 32/32 [00:00<00:00, 45.32it/s]


	Train Loss: 11.7497
	Validation Loss: 10.3098
New best model found at epoch 1 with validation loss: 10.3098
Epoch [2/50]


Training Epoch 2: 100%|██████████| 1250/1250 [00:56<00:00, 22.04it/s]
Validation Epoch 2: 100%|██████████| 32/32 [00:00<00:00, 45.20it/s]


	Train Loss: 9.2526
	Validation Loss: 7.5730
New best model found at epoch 2 with validation loss: 7.5730
Epoch [3/50]


Training Epoch 3: 100%|██████████| 1250/1250 [00:56<00:00, 22.04it/s]
Validation Epoch 3: 100%|██████████| 32/32 [00:00<00:00, 46.83it/s]


	Train Loss: 7.7054
	Validation Loss: 5.6834
New best model found at epoch 3 with validation loss: 5.6834
Epoch [4/50]


Training Epoch 4: 100%|██████████| 1250/1250 [00:57<00:00, 21.85it/s]
Validation Epoch 4: 100%|██████████| 32/32 [00:00<00:00, 43.55it/s]


	Train Loss: 7.1033
	Validation Loss: 4.9064
New best model found at epoch 4 with validation loss: 4.9064
Epoch [5/50]


Training Epoch 5: 100%|██████████| 1250/1250 [00:57<00:00, 21.84it/s]
Validation Epoch 5: 100%|██████████| 32/32 [00:00<00:00, 45.53it/s]


	Train Loss: 6.7850
	Validation Loss: 4.6685
New best model found at epoch 5 with validation loss: 4.6685
Epoch [6/50]


Training Epoch 6: 100%|██████████| 1250/1250 [00:57<00:00, 21.62it/s]
Validation Epoch 6: 100%|██████████| 32/32 [00:00<00:00, 41.90it/s]


	Train Loss: 6.5504
	Validation Loss: 4.6524
New best model found at epoch 6 with validation loss: 4.6524
Epoch [7/50]


Training Epoch 7: 100%|██████████| 1250/1250 [00:56<00:00, 22.00it/s]
Validation Epoch 7: 100%|██████████| 32/32 [00:00<00:00, 48.78it/s]


	Train Loss: 6.3162
	Validation Loss: 4.4775
New best model found at epoch 7 with validation loss: 4.4775
Epoch [8/50]


Training Epoch 8: 100%|██████████| 1250/1250 [00:56<00:00, 21.93it/s]
Validation Epoch 8: 100%|██████████| 32/32 [00:00<00:00, 45.25it/s]


	Train Loss: 6.0894
	Validation Loss: 4.7105
Epoch [9/50]


Training Epoch 9: 100%|██████████| 1250/1250 [00:57<00:00, 21.78it/s]
Validation Epoch 9: 100%|██████████| 32/32 [00:00<00:00, 49.43it/s]


	Train Loss: 5.8672
	Validation Loss: 3.9041
New best model found at epoch 9 with validation loss: 3.9041
Epoch [10/50]


Training Epoch 10: 100%|██████████| 1250/1250 [00:57<00:00, 21.79it/s]
Validation Epoch 10: 100%|██████████| 32/32 [00:00<00:00, 49.71it/s]


	Train Loss: 5.6705
	Validation Loss: 4.2500
Epoch [11/50]


Training Epoch 11: 100%|██████████| 1250/1250 [00:56<00:00, 22.01it/s]
Validation Epoch 11: 100%|██████████| 32/32 [00:00<00:00, 49.22it/s]


	Train Loss: 5.4059
	Validation Loss: 3.3557
New best model found at epoch 11 with validation loss: 3.3557
Epoch [12/50]


Training Epoch 12: 100%|██████████| 1250/1250 [00:56<00:00, 21.96it/s]
Validation Epoch 12: 100%|██████████| 32/32 [00:00<00:00, 48.28it/s]


	Train Loss: 5.1888
	Validation Loss: 3.1169
New best model found at epoch 12 with validation loss: 3.1169
Epoch [13/50]


Training Epoch 13: 100%|██████████| 1250/1250 [00:57<00:00, 21.89it/s]
Validation Epoch 13: 100%|██████████| 32/32 [00:00<00:00, 46.77it/s]


	Train Loss: 4.9937
	Validation Loss: 3.1804
Epoch [14/50]


Training Epoch 14: 100%|██████████| 1250/1250 [00:56<00:00, 21.96it/s]
Validation Epoch 14: 100%|██████████| 32/32 [00:00<00:00, 47.86it/s]


	Train Loss: 4.7673
	Validation Loss: 2.5797
New best model found at epoch 14 with validation loss: 2.5797
Epoch [15/50]


Training Epoch 15: 100%|██████████| 1250/1250 [00:57<00:00, 21.84it/s]
Validation Epoch 15: 100%|██████████| 32/32 [00:00<00:00, 44.53it/s]


	Train Loss: 4.5667
	Validation Loss: 2.8250
Epoch [16/50]


Training Epoch 16: 100%|██████████| 1250/1250 [00:57<00:00, 21.80it/s]
Validation Epoch 16: 100%|██████████| 32/32 [00:00<00:00, 47.15it/s]


	Train Loss: 4.4161
	Validation Loss: 2.0269
New best model found at epoch 16 with validation loss: 2.0269
Epoch [17/50]


Training Epoch 17: 100%|██████████| 1250/1250 [00:56<00:00, 22.03it/s]
Validation Epoch 17: 100%|██████████| 32/32 [00:00<00:00, 44.66it/s]


	Train Loss: 4.2456
	Validation Loss: 2.1245
Epoch [18/50]


Training Epoch 18: 100%|██████████| 1250/1250 [00:57<00:00, 21.74it/s]
Validation Epoch 18: 100%|██████████| 32/32 [00:00<00:00, 48.30it/s]


	Train Loss: 4.1350
	Validation Loss: 3.2325
Epoch [19/50]


Training Epoch 19: 100%|██████████| 1250/1250 [00:56<00:00, 21.93it/s]
Validation Epoch 19: 100%|██████████| 32/32 [00:00<00:00, 45.38it/s]


	Train Loss: 3.9925
	Validation Loss: 2.0602
Epoch [20/50]


Training Epoch 20: 100%|██████████| 1250/1250 [00:56<00:00, 22.14it/s]
Validation Epoch 20: 100%|██████████| 32/32 [00:00<00:00, 46.44it/s]


	Train Loss: 3.8593
	Validation Loss: 1.7861
New best model found at epoch 20 with validation loss: 1.7861
Epoch [21/50]


Training Epoch 21: 100%|██████████| 1250/1250 [00:56<00:00, 21.94it/s]
Validation Epoch 21: 100%|██████████| 32/32 [00:00<00:00, 46.72it/s]


	Train Loss: 3.8091
	Validation Loss: 2.3042
Epoch [22/50]


Training Epoch 22: 100%|██████████| 1250/1250 [00:56<00:00, 22.11it/s]
Validation Epoch 22: 100%|██████████| 32/32 [00:00<00:00, 45.54it/s]


	Train Loss: 3.6892
	Validation Loss: 2.0953
Epoch [23/50]


Training Epoch 23: 100%|██████████| 1250/1250 [00:57<00:00, 21.82it/s]
Validation Epoch 23: 100%|██████████| 32/32 [00:00<00:00, 44.35it/s]


	Train Loss: 3.5794
	Validation Loss: 2.7149
Epoch [24/50]


Training Epoch 24: 100%|██████████| 1250/1250 [00:57<00:00, 21.79it/s]
Validation Epoch 24: 100%|██████████| 32/32 [00:00<00:00, 50.00it/s]


	Train Loss: 3.5500
	Validation Loss: 1.8643
Epoch [25/50]


Training Epoch 25: 100%|██████████| 1250/1250 [00:57<00:00, 21.85it/s]
Validation Epoch 25: 100%|██████████| 32/32 [00:00<00:00, 47.47it/s]


	Train Loss: 3.4996
	Validation Loss: 1.5198
New best model found at epoch 25 with validation loss: 1.5198
Epoch [26/50]


Training Epoch 26: 100%|██████████| 1250/1250 [00:56<00:00, 22.19it/s]
Validation Epoch 26: 100%|██████████| 32/32 [00:00<00:00, 49.05it/s]


	Train Loss: 3.4187
	Validation Loss: 1.2515
New best model found at epoch 26 with validation loss: 1.2515
Epoch [27/50]


Training Epoch 27: 100%|██████████| 1250/1250 [00:57<00:00, 21.76it/s]
Validation Epoch 27: 100%|██████████| 32/32 [00:00<00:00, 46.03it/s]


	Train Loss: 3.3704
	Validation Loss: 1.8476
Epoch [28/50]


Training Epoch 28: 100%|██████████| 1250/1250 [00:56<00:00, 22.02it/s]
Validation Epoch 28: 100%|██████████| 32/32 [00:00<00:00, 46.60it/s]


	Train Loss: 3.2975
	Validation Loss: 2.2541
Epoch [29/50]


Training Epoch 29: 100%|██████████| 1250/1250 [00:56<00:00, 22.20it/s]
Validation Epoch 29: 100%|██████████| 32/32 [00:00<00:00, 46.24it/s]


	Train Loss: 3.2735
	Validation Loss: 1.3264
Epoch [30/50]


Training Epoch 30: 100%|██████████| 1250/1250 [00:55<00:00, 22.40it/s]
Validation Epoch 30: 100%|██████████| 32/32 [00:00<00:00, 48.58it/s]


	Train Loss: 3.2566
	Validation Loss: 1.6136
Epoch [31/50]


Training Epoch 31: 100%|██████████| 1250/1250 [00:55<00:00, 22.68it/s]
Validation Epoch 31: 100%|██████████| 32/32 [00:00<00:00, 48.91it/s]


	Train Loss: 3.1661
	Validation Loss: 1.6990
Epoch [32/50]


Training Epoch 32: 100%|██████████| 1250/1250 [00:56<00:00, 22.24it/s]
Validation Epoch 32: 100%|██████████| 32/32 [00:00<00:00, 49.79it/s]


	Train Loss: 2.8914
	Validation Loss: 1.1418
New best model found at epoch 32 with validation loss: 1.1418
Epoch [33/50]


Training Epoch 33: 100%|██████████| 1250/1250 [00:56<00:00, 22.25it/s]
Validation Epoch 33: 100%|██████████| 32/32 [00:00<00:00, 49.70it/s]


	Train Loss: 2.8518
	Validation Loss: 1.1471
Epoch [34/50]


Training Epoch 34: 100%|██████████| 1250/1250 [00:55<00:00, 22.36it/s]
Validation Epoch 34: 100%|██████████| 32/32 [00:00<00:00, 48.03it/s]


	Train Loss: 2.8225
	Validation Loss: 0.8814
New best model found at epoch 34 with validation loss: 0.8814
Epoch [35/50]


Training Epoch 35: 100%|██████████| 1250/1250 [00:55<00:00, 22.39it/s]
Validation Epoch 35: 100%|██████████| 32/32 [00:00<00:00, 49.04it/s]


	Train Loss: 2.7856
	Validation Loss: 1.0900
Epoch [36/50]


Training Epoch 36: 100%|██████████| 1250/1250 [00:57<00:00, 21.92it/s]
Validation Epoch 36: 100%|██████████| 32/32 [00:00<00:00, 47.61it/s]


	Train Loss: 2.7603
	Validation Loss: 0.8978
Epoch [37/50]


Training Epoch 37: 100%|██████████| 1250/1250 [00:56<00:00, 21.94it/s]
Validation Epoch 37: 100%|██████████| 32/32 [00:00<00:00, 45.89it/s]


	Train Loss: 2.7584
	Validation Loss: 1.0288
Epoch [38/50]


Training Epoch 38: 100%|██████████| 1250/1250 [00:57<00:00, 21.62it/s]
Validation Epoch 38: 100%|██████████| 32/32 [00:00<00:00, 44.98it/s]


	Train Loss: 2.7570
	Validation Loss: 1.2676
Epoch [39/50]


Training Epoch 39: 100%|██████████| 1250/1250 [00:56<00:00, 22.00it/s]
Validation Epoch 39: 100%|██████████| 32/32 [00:00<00:00, 46.57it/s]


	Train Loss: 2.7522
	Validation Loss: 1.1792
Epoch [40/50]


Training Epoch 40: 100%|██████████| 1250/1250 [00:55<00:00, 22.40it/s]
Validation Epoch 40: 100%|██████████| 32/32 [00:00<00:00, 50.11it/s]


	Train Loss: 2.5921
	Validation Loss: 0.9328
Epoch [41/50]


Training Epoch 41: 100%|██████████| 1250/1250 [00:55<00:00, 22.50it/s]
Validation Epoch 41: 100%|██████████| 32/32 [00:00<00:00, 49.18it/s]


	Train Loss: 2.5355
	Validation Loss: 0.8069
New best model found at epoch 41 with validation loss: 0.8069
Epoch [42/50]


Training Epoch 42: 100%|██████████| 1250/1250 [00:56<00:00, 22.11it/s]
Validation Epoch 42: 100%|██████████| 32/32 [00:00<00:00, 48.00it/s]


	Train Loss: 2.5702
	Validation Loss: 0.9021
Epoch [43/50]


Training Epoch 43: 100%|██████████| 1250/1250 [00:55<00:00, 22.39it/s]
Validation Epoch 43: 100%|██████████| 32/32 [00:00<00:00, 48.15it/s]


	Train Loss: 2.5241
	Validation Loss: 0.9639
Epoch [44/50]


Training Epoch 44: 100%|██████████| 1250/1250 [00:55<00:00, 22.41it/s]
Validation Epoch 44: 100%|██████████| 32/32 [00:00<00:00, 47.85it/s]


	Train Loss: 2.5525
	Validation Loss: 0.8848
Epoch [45/50]


Training Epoch 45: 100%|██████████| 1250/1250 [00:55<00:00, 22.36it/s]
Validation Epoch 45: 100%|██████████| 32/32 [00:00<00:00, 48.15it/s]


	Train Loss: 2.5133
	Validation Loss: 0.9142
Epoch [46/50]


Training Epoch 46: 100%|██████████| 1250/1250 [00:55<00:00, 22.46it/s]
Validation Epoch 46: 100%|██████████| 32/32 [00:00<00:00, 45.72it/s]


	Train Loss: 2.5259
	Validation Loss: 0.8874
Epoch [47/50]


Training Epoch 47: 100%|██████████| 1250/1250 [00:56<00:00, 21.98it/s]
Validation Epoch 47: 100%|██████████| 32/32 [00:00<00:00, 50.50it/s]


	Train Loss: 2.4741
	Validation Loss: 0.7187
New best model found at epoch 47 with validation loss: 0.7187
Epoch [48/50]


Training Epoch 48: 100%|██████████| 1250/1250 [00:56<00:00, 22.09it/s]
Validation Epoch 48: 100%|██████████| 32/32 [00:00<00:00, 46.24it/s]


	Train Loss: 2.4533
	Validation Loss: 0.7356
Epoch [49/50]


Training Epoch 49: 100%|██████████| 1250/1250 [00:56<00:00, 22.22it/s]
Validation Epoch 49: 100%|██████████| 32/32 [00:00<00:00, 45.35it/s]


	Train Loss: 2.4329
	Validation Loss: 0.8785
Epoch [50/50]


Training Epoch 50: 100%|██████████| 1250/1250 [00:56<00:00, 22.32it/s]
Validation Epoch 50: 100%|██████████| 32/32 [00:00<00:00, 44.20it/s]


	Train Loss: 2.4682
	Validation Loss: 0.9097
Loaded checkpoint from: /content/drive/MyDrive/two4two_sickones_models_pytorch/sick_ones_bendbias_v3_2class_normal/mobilenet_regression/torch_mobilenetv2/tmp/best_model.pth
Evaluation Loss: 0.7187
R-squared for target 0: 0.9567
R-squared for target 1: 0.8745
R-squared for target 2: 0.9378
R-squared for target 3: 0.9070
Training Run complete! Val loss = 0.7187 | Val R-squared (avg) = 0.9190 | Epoch = 47
------------------------------
New best model found at Run 1 with validation loss: 0.7187

Loaded checkpoint from: /content/drive/MyDrive/two4two_sickones_models_pytorch/sick_ones_bendbias_v3_2class_normal/mobilenet_regression/torch_mobilenetv2/final/best_model.pth
Evaluation Loss: 0.7187
R-squared for target 0: 0.9567
R-squared for target 1: 0.8745
R-squared for target 2: 0.9378
R-squared for target 3: 0.9070
Training complete! Final Val loss = 0.7187 | Final Val R-squared (avg) = 0.9190
------------------------------


In [None]:
# Ensure the best model is loaded and on the correct device
# This part should be executed after the training section if you haven't already.
model = load_mobilenetv2_regression(num_targets=num_regression_targets,
                          pretrained=False,
                        checkpoint_path=checkpoint_path / 'final' / 'best_model.pth')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval(); # Set the model to evaluation mode

Loaded checkpoint from: /content/drive/MyDrive/two4two_sickones_models_pytorch/sick_ones_bendbias_v3_2class_normal/mobilenet_regression/torch_mobilenetv2/final/best_model.pth


In [None]:
train_preds, _, _ = evaluate_model(model, train_eval_dataloader, criterion, device, num_regression_targets)
test_preds, _, _ = evaluate_model(model, test_dataloader, criterion, device, num_regression_targets)
eval_preds, _, _ = evaluate_model(model, eval_dataloader, criterion, device, num_regression_targets)

Evaluation Loss: 2.3460
R-squared for target 0: 0.8828
R-squared for target 1: 0.6280
R-squared for target 2: 0.7179
R-squared for target 3: 0.6173
Evaluation Loss: 0.7617
R-squared for target 0: 0.9575
R-squared for target 1: 0.8780
R-squared for target 2: 0.9324
R-squared for target 3: 0.9125
Evaluation Loss: 2.9083
R-squared for target 0: 0.8639
R-squared for target 1: 0.5723
R-squared for target 2: 0.4901
R-squared for target 3: 0.4477


In [None]:
sample_idx = 5
print(test_dataset[sample_idx][1])
print(test_preds[sample_idx])

tensor([1.1597, 0.5904, 0.3541, 0.3838])
[1.1382922  0.56760174 0.30983067 0.36736593]
