In [11]:
import os
from pathlib import Path
import torch, numpy as np, pandas as pd
from fastai.vision.all import *
device = torch.device("cpu")

Download and map!

In [12]:
df_labels = pd.read_csv(Path('data/image_labels.csv'))

Split training and validation sets

In [13]:
from sklearn.model_selection import train_test_split

# Drop NaNs and build the initial label dictionary
initial_nan_labels_count = df_labels['target'].isna().sum()
if initial_nan_labels_count > 0:
    print(f"DEBUG: Found {initial_nan_labels_count} NaN values in 'target' column. Dropping rows with NaN targets.")
    df_labels.dropna(subset=['target'], inplace=True)
else:
    print("DEBUG: No NaN values found in 'target' column (good).")

# Split into train and validation
train_df, valid_df = train_test_split(df_labels, test_size=0.2, random_state=42)

train_label_dict = dict(zip(train_df['filename'], train_df['target']))
valid_label_dict = dict(zip(valid_df['filename'], valid_df['target']))
all_label_dict = {**train_label_dict, **valid_label_dict}

DEBUG: No NaN values found in 'target' column (good).


Now lets make the datablock. For augmentations, we'll do all except warp (that might make the phyisquese look too different). We can see some of our datablock's examples with show_batch.

In [14]:
print(f"DEBUG: Train labels: {len(train_label_dict)} | Valid labels: {len(valid_label_dict)}")

# Get all image files
path = Path('images')
print(f"DEBUG: Image path set to: {path}")

all_image_files = get_image_files(path)
print(f"DEBUG: Total image files found by get_image_files: {len(all_image_files)}")

# Filter image files to only those with matching labels
processable_image_files = [f for f in all_image_files if f.name in all_label_dict]
print(f"DEBUG: Processable image files (with matching labels): {len(processable_image_files)}")

# Safety check
if len(processable_image_files) == 0:
    print("CERROR: No processable image files found (no images match labels or vice-versa).")
    if all_image_files and all_label_dict:
        print(f"  Sample image file: {all_image_files[0].name}")
        print(f"  Sample label key: {next(iter(all_label_dict.keys()))}")
        if all_image_files[0].name not in all_label_dict and all_image_files[0].name.split('.')[0] in [k.split('.')[0] for k in all_label_dict.keys()]:
            print(" Filename extensions might differ between image files and label keys.")
    raise ValueError("Cannot create DataLoaders: No matching image files and labels.")

# Helper function to get label
def get_y_func(fn):
    key = fn.name
    if key not in all_label_dict:
        print(f"DEBUG ERROR: Label not found for: {key} during get_y_func call. This should not happen if pre-filtered.")
        raise ValueError(f"Label not found for: {key}")
    return all_label_dict[key]

# Generate index lists for DataBlock IndexSplitter
filename_to_index = {f.name: i for i, f in enumerate(processable_image_files)}
valid_idxs = [filename_to_index[fname] for fname in valid_df['filename'] if fname in filename_to_index]
splitter = IndexSplitter(valid_idxs)

def convert_to_rgb(img):
    return img.convert('RGB')

# Transformations
item_tfms = RandomResizedCrop(244, min_scale=0.75)

batch_tfms = aug_transforms(
    do_flip=False,
    max_rotate=2,     
    max_zoom=1.05,    
    max_lighting=0.1, 
    max_warp=0.,
    p_affine=0.3,     
    p_lighting=0.3   
)

dblock = DataBlock(
    blocks=(ImageBlock, RegressionBlock),
    get_items=lambda _: processable_image_files,
    splitter=splitter,
    get_y=get_y_func,
    item_tfms=item_tfms,
    batch_tfms=batch_tfms,
    n_inp=1
)

print("DEBUG: Attempting to create DataLoaders...")
try:
    dls = dblock.dataloaders(path, bs=32)
    print("DEBUG: DataLoaders created successfully.")
    print(f"DEBUG: Number of training batches: {len(dls.train)}")
    print(f"DEBUG: Number of validation batches: {len(dls.valid)}")
except Exception as e:
    print(f"CRITICAL ERROR: Failed to create DataLoaders: {e}")
    raise


DEBUG: Train labels: 1269 | Valid labels: 318
DEBUG: Image path set to: images
DEBUG: Total image files found by get_image_files: 1315
DEBUG: Processable image files (with matching labels): 1030
DEBUG: Attempting to create DataLoaders...
DEBUG: DataLoaders created successfully.
DEBUG: Number of training batches: 25
DEBUG: Number of validation batches: 7


Train it!

In [15]:
import torch
import torch.nn as nn

class HuberLoss(nn.Module):
    def __init__(self, delta=1.0):
        super().__init__()
        self.delta = delta

    def forward(self, input, target):
        abs_error = torch.abs(input - target)
        quadratic = torch.minimum(abs_error, torch.tensor(self.delta))
        linear = abs_error - quadratic
        loss = 0.5 * quadratic**2 + self.delta * linear
        return loss.mean()

In [18]:
import timm
from fastai.vision.all import *

model_name='efficientnet_b3'

# Create model with correct input size
model = timm.create_model(model_name)

def mae(preds, targs):
    # Ensure target shape matches preds
    if targs.ndim == 1:
        targs = targs.unsqueeze(1)
    return nn.L1Loss()(preds, targs)

class RMCELoss(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, input, target):
        error_cubed = torch.abs(input - target) ** 3
        mean_cubed_error = torch.mean(error_cubed)
        loss = torch.pow(mean_cubed_error, 1/3)
        return loss  # must be positive

rmce_loss = RMCELoss()

learn = Learner(dls, model, metrics=[rmse, mae])

In [19]:
learn.fine_tune(20)

epoch,train_loss,valid_loss,_rmse,mae,time


RuntimeError: The size of tensor a (32000) must match the size of tensor b (32) at non-singleton dimension 0

##### Now we have a working model! For example, it predicts this picture at 13% bodyfat (not so far off in my opinion)

In [None]:
bf,_,probs = learn.predict(PILImage.create('images/90_image_1.jpg'))
print(f"Bodyfat prediction: {probs[0]:.4f}")

Finally, export the model

In [None]:
folder_tag=""
path = f"model/{model_name}{folder_tag}/model.pkl"
learn.export(path)