In [1]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
import numpy as np


DEVICE = torch.device('cpu')
OUTPUT_SIZE = 512

model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
# model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)

extraction_layer = model._modules.get('avgpool')
model.to(DEVICE)
model.eval()

scaler = transforms.Resize((224, 224))
normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.2, 0.2, 0.2])
to_tensor = transforms.ToTensor()

def get_vec(arg, model, extraction_layer):
    image = normalize(to_tensor(scaler(arg))).unsqueeze(0).to(DEVICE)
    result = torch.zeros(1, OUTPUT_SIZE, 1, 1)
    def copy_data(m, i, o):
        result.copy_(o.data)
    hooked = extraction_layer.register_forward_hook(copy_data)
    with torch.no_grad():
        model(image)
    hooked.remove()
    return result

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 118MB/s]


In [2]:
import arrow
import base64
import pandas as pd
from glob import glob
from io import BytesIO
from os.path import basename
from PIL import Image

STOP = 1000
THUMBNAIL_SIZE = (128, 128)
TRAIN = '/kaggle/input/mri-for-brain-tumor-with-bounding-boxes/Train'
VAL = '/kaggle/input/mri-for-brain-tumor-with-bounding-boxes/Val'


def embed(model, filename: str):
    with Image.open(fp=filename, mode='r') as image:
        return get_vec(arg=image.convert('RGB'), model=model, extraction_layer=extraction_layer).numpy().reshape(OUTPUT_SIZE,)


# https://stackoverflow.com/a/952952
def flatten(arg):
    return [x for xs in arg for x in xs]

def png(filename: str) -> str:
    with Image.open(fp=filename, mode='r') as image:
        buffer = BytesIO()
        # our images are pretty big; let's shrink the hover images to thumbnail size
        image.resize(size=THUMBNAIL_SIZE).convert('RGB').save(buffer, format='png')
        return 'data:image/png;base64,' + base64.b64encode(buffer.getvalue()).decode()

def get_picture_from_glob(arg: str, tag: str, stop: int) -> list:
    time_get = arrow.now()
    result = [pd.Series(data=[tag, basename(input_file), embed(model=model, filename=input_file), png(filename=input_file)],
                        index=['tag', 'name', 'value', 'png'])
        for index, input_file in enumerate(glob(pathname=arg)) if index < stop and input_file.endswith('.jpg')]
    print('encoded {} rows of {}  in {}'.format(len(result), tag, arrow.now() - time_get))
    return result

time_start = arrow.now()
train_dict = {basename(folder) : folder + '/images/*.*' for folder in glob(TRAIN + '/*')}
val_dict = {basename(folder) : folder + '/images/*.*' for folder in glob(VAL + '/*')}
train_df = pd.DataFrame(data=flatten(arg=[get_picture_from_glob(arg=value, tag=key, stop=STOP) for key, value in train_dict.items()]))
val_df = pd.DataFrame(data=flatten(arg=[get_picture_from_glob(arg=value, tag=key, stop=STOP) for key, value in val_dict.items()]))
print('done in {}'.format(arrow.now() - time_start))

encoded 1000 rows of Pituitary  in 0:01:18.566897
encoded 711 rows of No Tumor  in 0:00:53.782628
encoded 1000 rows of Meningioma  in 0:01:15.157719
encoded 1000 rows of Glioma  in 0:01:15.242042
encoded 136 rows of Pituitary  in 0:00:10.407675
encoded 100 rows of No Tumor  in 0:00:07.791820
encoded 140 rows of Meningioma  in 0:00:10.511658
encoded 136 rows of Glioma  in 0:00:10.420127
done in 0:05:22.398230


In [3]:
import arrow
from umap import UMAP

time_start = arrow.now()
umap = UMAP(random_state=2024, verbose=True, n_jobs=1, low_memory=False, n_epochs=201)
train_df[['x', 'y']] = umap.fit_transform(X=train_df['value'].apply(func=pd.Series))
val_df[['x', 'y']] = umap.transform(X=val_df['value'].apply(func=pd.Series))
print('done with UMAP in {}'.format(arrow.now() - time_start))

2024-08-21 14:22:50.824038: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-21 14:22:50.824247: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-21 14:22:51.005135: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


UMAP(low_memory=False, n_epochs=201, n_jobs=1, random_state=2024, verbose=True)
Wed Aug 21 14:23:04 2024 Construct fuzzy simplicial set
Wed Aug 21 14:23:21 2024 Finding Nearest Neighbors
Wed Aug 21 14:23:26 2024 Finished Nearest Neighbor Search
Wed Aug 21 14:23:30 2024 Construct embedding


Epochs completed:   0%|            0/201 [00:00]

	completed  0  /  201 epochs
	completed  20  /  201 epochs
	completed  40  /  201 epochs
	completed  60  /  201 epochs
	completed  80  /  201 epochs
	completed  100  /  201 epochs
	completed  120  /  201 epochs
	completed  140  /  201 epochs
	completed  160  /  201 epochs
	completed  180  /  201 epochs
	completed  200  /  201 epochs
Wed Aug 21 14:23:35 2024 Finished embedding


Epochs completed:   0%|            0/67 [00:00]

	completed  0  /  67 epochs
	completed  6  /  67 epochs
	completed  12  /  67 epochs
	completed  18  /  67 epochs
	completed  24  /  67 epochs
	completed  30  /  67 epochs
	completed  36  /  67 epochs
	completed  42  /  67 epochs
	completed  48  /  67 epochs
	completed  54  /  67 epochs
	completed  60  /  67 epochs
	completed  66  /  67 epochs
done with UMAP in 0:00:42.511327


In [4]:
import warnings

warnings.filterwarnings(action='ignore', category=FutureWarning)

In [5]:
from plotly import express

express.scatter(data_frame=pd.concat(objs=[train_df, val_df]), x='x', y='y', color='tag')

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

logreg = LogisticRegression(max_iter=2000, tol=1e-12).fit(X=train_df['value'].apply(func=pd.Series), y=train_df['tag'])
print('model fit in {} iterations'.format(logreg.n_iter_[0]))

print('accuracy: {:5.4f}'.format(accuracy_score(y_true=val_df['tag'], y_pred=logreg.predict(X=val_df['value'].apply(func=pd.Series)))))

model fit in 1498 iterations
accuracy: 0.9219


In [7]:
from sklearn.metrics import classification_report

print(classification_report(y_true=val_df['tag'], y_pred=logreg.predict(X=val_df['value'].apply(func=pd.Series))))

              precision    recall  f1-score   support

      Glioma       0.92      0.90      0.91       136
  Meningioma       0.86      0.93      0.89       140
    No Tumor       0.98      0.91      0.94       100
   Pituitary       0.96      0.95      0.95       136

    accuracy                           0.92       512
   macro avg       0.93      0.92      0.92       512
weighted avg       0.92      0.92      0.92       512

