In [1]:
%load_ext rich
%load_ext ensurewd

In [2]:
from pathlib import Path

import pandas as pd
import polars as pl
from torchvision import models

from utils.config import get_conf
from utils.features_extract import (
    get_images_features,
    get_transformers,
)
from utils.func import get_path_list
from utils.models import VGG16Extractor

In [3]:
# path to dataset
TRAIN_PATH = Path('../data/raw/histopathologic-cancer-detection/train/')
TEST_PATH = Path('../data/raw/histopathologic-cancer-detection/test/')

FEATURES_MATRIX_CSV = Path(
    '../data/raw/histopathologic-cancer-detection/feature_matrix_train.csv',
)

TRAIN_LABELS_PATH_CSV = Path(
    '../data/raw/histopathologic-cancer-detection/train_labels.csv'
)

CONF_PATH = Path('../data/conf.toml')

dataset_conf = get_conf(conf_path=CONF_PATH)

histopathologic_cancer_conf = dataset_conf['histopatological_cancer_detection']
histopathologic_cancer_conf

[1m{[0m[32m'features_matrix'[0m: [32m' '[0m[1m}[0m

In [4]:
# train and test path list

train_images_path_list = get_path_list(TRAIN_PATH)
test_images_path_list = get_path_list(TEST_PATH)

### Using a custom model to extract images features

In [5]:
# getting model
vgg16 = models.vgg16(weights=True)



In [6]:
# building our custom model
device = 'cuda'  # cpu
features_extractor_model = VGG16Extractor(vgg16).to(device)

#### Building feature matrix

In [7]:
# transformer
transformeres = get_transformers()
transformeres


[1;35mCompose[0m[1m([0m
    [1;35mCenterCrop[0m[1m([0m[33msize[0m=[1m([0m[1;36m32[0m, [1;36m32[0m[1m)[0m[1m)[0m
    [1;35mResize[0m[1m([0m[33msize[0m=[1;36m448[0m, [33minterpolation[0m=[35mbilinear[0m, [33mmax_size[0m=[3;35mNone[0m, [33mantialias[0m=[3;92mTrue[0m[1m)[0m
    [1;35mToTensor[0m[1m([0m[1m)[0m
[1m)[0m

In [8]:
# get a list of feature from train dataset

img_features = get_images_features(
    features_extractor_model,
    train_images_path_list[:10],  # using just 10, because it'll take a while
    transformeres,
)
img_features[0], img_features[0].shape


[1m([0m
    [1;35mtensor[0m[1m([0m[1m[[0m[1m[[0m [1;36m0.2443[0m, [1;36m-0.2892[0m, [1;36m-2.0232[0m,  [33m...[0m, [1;36m-1.1370[0m,  [1;36m0.0349[0m, [1;36m-1.2414[0m[1m][0m[1m][0m,
       [33mdevice[0m=[32m'cuda:0'[0m[1m)[0m,
    [1;35mtorch.Size[0m[1m([0m[1m[[0m[1;36m1[0m, [1;36m4096[0m[1m][0m[1m)[0m
[1m)[0m