In [9]:
import numpy as np
from prototypes.utility.data import DataLoader
from prototypes.utility.data import ProjectConfiguration
import matplotlib.pyplot as plt
from prototypes.classical.segmentation.transformers import BlackBarsRemover, OtsuThresholdingSegmentation
from prototypes.classical.descriptors.texture import LBPTransformer, GaborTransformer, HoGTransformer
from prototypes.classical.descriptors.vetorizer import PCAVectorizer
import cv2
import PIL
import pandas as pd
from prototypes.deeplearning.dataloader.IsicDataLoader import metadata_transform

%load_ext autoreload
%autoreload 2

config = ProjectConfiguration("../config.json")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
config.get_value("TRAIN_METADATA")

'/home/matias/workspace/datasets/isic-2024-challenge/train-metadata.csv'

In [11]:
metadata_df = pd.read_csv(config.get_value("TRAIN_METADATA"), engine="python")

In [12]:
metadata_array = metadata_transform(df=metadata_df, extra_files_path = config.get_value("METADATA_FILES_PATH"))

In [13]:
metadata_array

{'ISIC_0015670': array([-0.5470537 , -0.5470537 , -0.5470537 , ..., -0.00697135,
         0.10501152, -0.5470537 ], dtype=float32),
 'ISIC_0015845': array([-0.92970616, -0.51713145, -0.7031564 , ..., -0.31696093,
         3.1748338 ,  0.8735382 ], dtype=float32),
 'ISIC_0015864': array([-0.3612966 , -0.6954847 , -0.3465459 , ..., -0.13937764,
         0.7611448 ,  0.90236354], dtype=float32),
 'ISIC_0015902': array([-0.39055046, -0.35792956, -0.06180392, ..., -0.08978106,
         1.6245862 ,  0.6150575 ], dtype=float32),
 'ISIC_0024200': array([ 2.1314888 , -0.73679036,  0.76159763, ..., -1.157558  ,
        -1.1815768 , -0.91098547], dtype=float32),
 'ISIC_0035502': array([-0.06586265, -0.06586265, -0.06586265, ..., -0.06564999,
        -0.06554567, -0.06562934], dtype=float32)}

In [13]:
m = np.mean(metadata_array, axis=1).reshape(-1, 1)

In [14]:
metadata_array -= m

In [15]:
metadata_array[:5]

array([[-3.8019714e-01, -3.8019714e-01, -3.8019714e-01, ...,
        -4.8450232e-03,  7.2982013e-02, -3.8019714e-01],
       [-3.5846379e+00, -1.9938869e+00, -2.7111373e+00, ...,
        -1.2220960e+00,  1.2241104e+01,  3.3680730e+00],
       [-1.2512994e+00, -2.4087124e+00, -1.2002125e+00, ...,
        -4.8271465e-01,  2.6361170e+00,  3.1252079e+00],
       [-1.5721912e+00, -1.4408731e+00, -2.4879646e-01, ...,
        -3.6142063e-01,  6.5398979e+00,  2.4759617e+00],
       [ 3.9234893e+00, -1.3562300e+00,  1.4018934e+00, ...,
        -2.1307483e+00, -2.1749606e+00, -1.6768756e+00]], dtype=float32)

In [5]:
colums = ["tbp_lv_area_perim_ratio",
          "tbp_lv_eccentricity",
          "tbp_lv_minorAxisMM",
          #Volume sphere
          "tbp_lv_x",
          "tbp_lv_y",
          "tbp_lv_z",
          "tbp_lv_deltaA",
          "tbp_lv_deltaB",
          "tbp_lv_deltaL",
          "tbp_lv_deltaLB",
          "tbp_lv_dnn_lesion_confidence",
          "age_approx",
          "sex"]

In [6]:
metadata_df.groupby(by=["target", "sex"]).sex.count()

target  sex   
0       female    123887
        male      265272
1       female       109
        male         274
Name: sex, dtype: int64

In [7]:
metadata_df.groupby(by=["target", "age_approx"]).sex.count()

target  age_approx
0       5.0               1
        15.0            624
        20.0           1741
        25.0           3431
        30.0          10343
        35.0          11034
        40.0          30465
        45.0          22943
        50.0          47429
        55.0          53751
        60.0          53334
        65.0          53713
        70.0          38207
        75.0          30354
        80.0          21072
        85.0           8832
1       20.0              1
        25.0              2
        30.0              3
        35.0              3
        40.0             15
        45.0             26
        50.0             26
        55.0             44
        60.0             83
        65.0             67
        70.0             48
        75.0             24
        80.0             24
        85.0             15
Name: sex, dtype: int64

In [8]:
metadata_df.query("tbp_lv_dnn_lesion_confidence>99").groupby(by=["target", "tbp_lv_dnn_lesion_confidence"]).sex.count()

target  tbp_lv_dnn_lesion_confidence
0       99.000019                        1
        99.000030                        1
        99.000070                        1
        99.000080                        1
        99.000084                        1
                                        ..
1       99.999976                        2
        99.999980                        2
        99.999988                        4
        99.999990                        4
        100.000000                      21
Name: sex, Length: 56142, dtype: int64

In [9]:
(metadata_df["age_approx"] * metadata_df["clin_size_long_diam_mm"] * metadata_df["tbp_lv_symm_2axis"]).values[:10]

array([107.70285714,  18.8571438 ,  73.8285792 ,  43.86526946,
        47.06193492,  98.84433962,  99.19597701,  57.74837545,
        55.20786517,  42.64716157])

In [10]:
metadata_df.sex.apply(lambda x: 0 if x == "male" else 1).values[:5]

array([0, 0, 0, 0, 0])

In [11]:
lesion_color_difference = np.sqrt((metadata_df['tbp_lv_deltaA']**2 + metadata_df["tbp_lv_deltaB"]**2 + metadata_df['tbp_lv_deltaL']**2).values)
border_complexity = (metadata_df['tbp_lv_norm_border'] + metadata_df['tbp_lv_symm_2axis']).values
color_uniformity = (metadata_df['tbp_lv_color_std_mean'] / (metadata_df['tbp_lv_radial_color_std_max'] + 1e-5)).values

features = np.vstack([lesion_color_difference, border_complexity, color_uniformity])

In [12]:
color_uniformity[:5]

array([0.        , 0.        , 0.        , 0.77406408, 0.        ])

In [13]:
features.shape

(3, 401059)

In [14]:
mean = np.mean(features, axis=1).reshape(-1, 1)
std = np.std(features, axis=1).reshape(-1, 1)

In [15]:
(features - mean)[:5]

array([[-1.57219066e+00, -1.44087257e+00, -2.48795913e-01, ...,
        -3.61419226e-01,  6.53989826e+00,  2.47596282e+00],
       [ 3.92348982e+00, -1.35622965e+00,  1.40189385e+00, ...,
        -2.13074791e+00, -2.17496025e+00, -1.67687525e+00],
       [-2.53965502e+02, -2.53965502e+02, -2.53965502e+02, ...,
        -2.53145512e+02, -2.52743227e+02, -2.53065877e+02]])

In [16]:
from prototypes.deeplearning.dataloader.IsicDataLoader import metadata_transform


In [17]:
metadata_dict, mean, std = metadata_transform(metadata_df)

UFuncTypeError: Cannot cast ufunc 'subtract' output from dtype('O') to dtype('float32') with casting rule 'same_kind'