In [1]:
import pandas as pd

DATA = '/kaggle/input/lumbar-coordinate-pretraining-dataset/coords_pretrain.csv'
coords_df = pd.read_csv(filepath_or_buffer=DATA)
coords_df.head()

Unnamed: 0,filename,source,x,y,level,relative_x,relative_y
0,1_t2.jpg,spider,139,175,L5/S1,0.542969,0.683594
1,1_t2.jpg,spider,133,157,L4/L5,0.519531,0.613281
2,1_t2.jpg,spider,132,131,L3/L4,0.515625,0.511719
3,1_t2.jpg,spider,131,102,L2/L3,0.511719,0.398438
4,1_t2.jpg,spider,134,84,L1/L2,0.523438,0.328125


Let'd so some EDA before we proceed. Do our different sources have comparable photos in them?

In [2]:
import warnings
from plotly import express

warnings.filterwarnings(action='ignore', category=FutureWarning)
express.scatter(data_frame=coords_df.sample(frac=0.25, random_state=2024), x='relative_x', y='relative_y', color='source', hover_name='filename', height=800)

It doesn't look like the different sources contain very similar pictures; the TSEG pictures cluster tightly in the middle, while the LSD and SPIDER pictures are more spread out but in different directions.

In [3]:
express.scatter(data_frame=coords_df.sample(frac=0.25, random_state=2024), x='x', y='y', color='source', hover_name='filename', height=800)

We get essentially the same plot if whether we use x/y or their relative equivalents; it looks like they contain the same information.

In [4]:
express.scatter(data_frame=coords_df.sample(frac=0.25, random_state=2024), x='x', y='y', color='level', hover_name='filename', height=800)

If we plot the levels we see that the X/Y values predict the level, but not perfectly. This is probably not surprising.

Let's load up our images and get embeddings from ResNet. We would expect that the embeddings we get from ResNet would tell us what the source of the data is but not the level, as it would probably encode brightness and colors rather than fine differences in vertebrae.

In [5]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
import numpy as np


DEVICE = torch.device('cpu')
OUTPUT_SIZE = 512

model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
extraction_layer = model._modules.get('avgpool')
model.to(DEVICE)
model.eval()

scaler = transforms.Resize((224, 224))
normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.2, 0.2, 0.2])
to_tensor = transforms.ToTensor()

def get_vec(arg, model, extraction_layer):
    image = normalize(to_tensor(scaler(arg))).unsqueeze(0).to(DEVICE)
    result = torch.zeros(1, OUTPUT_SIZE, 1, 1)
    def copy_data(m, i, o):
        result.copy_(o.data)
    hooked = extraction_layer.register_forward_hook(copy_data)
    with torch.no_grad():
        model(image)
    hooked.remove()
    return result


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 120MB/s]


In [6]:
import base64
import pandas as pd
from arrow import now
from glob import glob
from io import BytesIO
from os.path import basename
from PIL import Image

DATA = '/kaggle/input/lumbar-coordinate-pretraining-dataset/data'
STOP = 1000
THUMBNAIL_SIZE = (128, 128)


def embed(model, filename: str):
    with Image.open(fp=filename, mode='r') as image:
        return get_vec(arg=image.convert('RGB'), model=model, extraction_layer=extraction_layer).numpy().reshape(OUTPUT_SIZE,)


# https://stackoverflow.com/a/952952
def flatten(arg):
    return [x for xs in arg for x in xs]

def png(filename: str) -> str:
    with Image.open(fp=filename, mode='r') as image:
        buffer = BytesIO()
        # our images are pretty big; let's shrink the hover images to thumbnail size
        image.resize(size=THUMBNAIL_SIZE).convert('RGB').save(buffer, format='png')
        return 'data:image/png;base64,' + base64.b64encode(buffer.getvalue()).decode()

def get_picture_from_glob(arg: str, tag: str, stop: int) -> list:
    time_get = now()
    result = [pd.Series(data=[tag, basename(input_file), embed(model=model, filename=input_file), ],
                        index=['tag', 'name', 'value', ])
        for index, input_file in enumerate(glob(pathname=arg)) if index < stop and input_file.endswith('.jpg')]
    print('encoded {} rows of {}  in {}'.format(len(result), tag, now() - time_get))
    return result

time_start = now()
data_dict = {basename(folder) : folder + '/*.*' for folder in glob(DATA + '/*')}
df = pd.DataFrame(data=flatten(arg=[get_picture_from_glob(arg=value, tag=key, stop=STOP) for key, value in data_dict.items()]))
print('done in {}'.format(now() - time_start))

encoded 0 rows of processed_spider  in 0:00:00.107254
encoded 0 rows of processed_lsd  in 0:00:00.206992
encoded 210 rows of processed_spider_jpgs  in 0:00:13.605317
encoded 34 rows of processed_osf_jpgs  in 0:00:01.983980
encoded 0 rows of processed_osf  in 0:00:00.014759
encoded 0 rows of processed_tseg  in 0:00:00.184027
encoded 478 rows of processed_tseg_jpgs  in 0:00:29.043296
encoded 515 rows of processed_lsd_jpgs  in 0:00:32.357527
done in 0:01:17.632671


In [7]:
import arrow
from umap import UMAP

time_start = arrow.now()
umap = UMAP(random_state=2024, verbose=True, n_jobs=1, low_memory=False, n_epochs=201)
df[['x', 'y']] = umap.fit_transform(X=df['value'].apply(func=pd.Series))
print('done with UMAP in {}'.format(arrow.now() - time_start))

2024-08-15 16:39:15.260570: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-15 16:39:15.260789: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-15 16:39:15.446917: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


UMAP(low_memory=False, n_epochs=201, n_jobs=1, random_state=2024, verbose=True)
Thu Aug 15 16:39:27 2024 Construct fuzzy simplicial set
Thu Aug 15 16:39:30 2024 Finding Nearest Neighbors
Thu Aug 15 16:39:34 2024 Finished Nearest Neighbor Search
Thu Aug 15 16:39:38 2024 Construct embedding


Epochs completed:   0%|            0/201 [00:00]

	completed  0  /  201 epochs
	completed  20  /  201 epochs
	completed  40  /  201 epochs
	completed  60  /  201 epochs
	completed  80  /  201 epochs
	completed  100  /  201 epochs
	completed  120  /  201 epochs
	completed  140  /  201 epochs
	completed  160  /  201 epochs
	completed  180  /  201 epochs
	completed  200  /  201 epochs
Thu Aug 15 16:39:40 2024 Finished embedding
done with UMAP in 0:00:12.792502


In [8]:
from plotly import express

express.scatter(data_frame=df, x='x', y='y', color='tag', height=800)

This scatter plot makes it pretty clear that our embeddings have captured the source of the data pretty well. Let's try plotting a sample according to the level. To do that we need to join our two DataFrames.

In [9]:
t_df = df.merge(right=coords_df, left_on='name', right_on='filename')
t_df.columns

Index(['tag', 'name', 'value', 'x_x', 'y_x', 'filename', 'source', 'x_y',
       'y_y', 'level', 'relative_x', 'relative_y'],
      dtype='object')

In [10]:
t_df['level'].value_counts().to_dict()

{'L5/S1': 1121, 'L4/L5': 1121, 'L3/L4': 1121, 'L2/L3': 1121, 'L1/L2': 1121}

In [11]:
express.scatter(data_frame=t_df.sample(frac=0.2), x='x_x', y='y_x', color='level')

Obviously our embeddings do not capture the level, which tells us which vertebrae are captured in the picture.