In [5]:
from os import path

PROJECT_ROOT = path.abspath(path.join(globals()['_dh'][0], '..'))
DATALAKE_PATH = path.abspath(path.join(PROJECT_ROOT, '..', '..', 'datalake', 'imgproj'))
MODEL_DIR = path.abspath(path.join(PROJECT_ROOT, 'imgproj', 'classifier'))

In [2]:
import pandas as pd
from imgproj.classifier.img_configuration import ModelConf

FQFN_PROCESSED_DF = path.join(DATALAKE_PATH, 'processed', f'processed_20250202.{ModelConf.image_size[0]}px.jsonl')

df = pd.read_json(f'file://{FQFN_PROCESSED_DF}', orient='records', lines=True, convert_dates=False)
original_len = df.shape[0]

print(f'ImgProj df shape={df.shape}')
print(f'ImgProj df columns={df.columns}')

ImgProj df shape=(502, 6)
ImgProj df columns=Index(['file_name', 'fqfn', 'img_grey', 'img_height', 'img_width', 'label'], dtype='object')


In [3]:
for label in df['label'].unique():
    labeled_df = df[df['label'] == label]
    print(f'for label {label}: {labeled_df.shape[0]} #records')

for label 0: 250 #records
for label 1: 252 #records


In [6]:
from datetime import datetime

from imgproj.classifier.img_configuration import ModelConf
from imgproj.classifier.img_classifier import ImgClassifier
from imgproj.classifier.onnx_exporter import TMPL_WEIGHTS_FILE_NAME

run_id = f'{datetime.now():%Y-%m-%dT%H-%M}'

print(f'Model Class: {ImgClassifier.__name__}')
model_conf = ModelConf()
fqfn_model_save = path.join(MODEL_DIR, TMPL_WEIGHTS_FILE_NAME.format(ImgClassifier.__name__, ModelConf.image_size[0]))
trained_model = ImgClassifier(model_conf=model_conf)
trained_model.load_model_weights(fqfn_model_save)

print(f'Model {trained_model.__class__.__name__} number of parameters = {trained_model.parameter_count:,}')

Model Class: ImgClassifier
Loaded pretrained weights for efficientnet-b0
Model ImgClassifier number of parameters = 4,008,253


In [7]:
import traceback
import torch
from torch.utils.tensorboard import SummaryWriter

from trainer.grad_cam import GradCAM, instantiate_gram_cam, write_cam_to_tensorboard

grad_cam: GradCAM = instantiate_gram_cam(trained_model.model)

sample_size: int = 50
with SummaryWriter(log_dir=f'tensorboard.run/grad_cam_{run_id}', comment='gradient class activation maps') as writer:
    for label in df['label'].unique():
        df_sub = df[df['label'] == label].sample(n=sample_size)
        df_sub.reset_index(drop=True, inplace=True)
    
        for idx, row in df_sub.iterrows():
            img_grey = row['img_grey']  # Extract the grayscale image (ndarray)
            label = row['label']        # Extract the label (0 or 1)
        
            # Convert the grayscale image to a tensor with the shape [1, img_size, img_size]
            img_tensor = torch.tensor(img_grey, dtype=torch.float32).unsqueeze(0)  # Unsqueeze to add channel dimension [1, H, W]

            try:
                write_cam_to_tensorboard(writer, grad_cam, img_tensor, label, idx)
            except Exception as e:
                traceback.print_exc(limit=10)