# Attributes to split evaluation

This notebook converts metadata values fomr CMBs into categories to split evaluation analysis

In [73]:
import os
import sys
import argparse
import traceback


import logging
import numpy as np
import pandas as pd
import ast

In [85]:
gt_radiomics_metadata_csv = (
    "/storage/evo1/jorge/MicrobleedNet/data-misc/csv/CMB_radiomics_metadata.csv"
)
gt_cmb_metadata_csv = (
    "/storage/evo1/jorge/MicrobleedNet/data-misc/csv/CMB_metadata_all.csv"
)
all_studies_csv = "/storage/evo1/jorge/MicrobleedNet/data-misc/csv/ALL_studies.csv"

all_studies_df = pd.read_csv(all_studies_csv)
GT_metadata = pd.read_csv(gt_cmb_metadata_csv)
GT_metadata_radiomics = pd.read_csv(gt_radiomics_metadata_csv)

# Convert string representations of tuples to actual tuples
GT_metadata["CM"] = GT_metadata["CM"].apply(lambda x: tuple(ast.literal_eval(x)))
GT_metadata_radiomics["CM"] = GT_metadata_radiomics["CM"].apply(
    lambda x: tuple(ast.literal_eval(x))
)

GT_metadata_all = pd.merge(
    GT_metadata, GT_metadata_radiomics, on=["seriesUID", "CM"], how="inner"
).drop(
    columns=[
        "firstorder_10Percentile",
        "firstorder_90Percentile",
        "firstorder_Energy",
        "firstorder_Entropy",
        "firstorder_InterquartileRange",
        "firstorder_Kurtosis",
        "firstorder_Maximum",
        "firstorder_MeanAbsoluteDeviation",
        "firstorder_Mean",
        "firstorder_Median",
        "firstorder_Minimum",
        "firstorder_Range",
        "firstorder_RobustMeanAbsoluteDeviation",
        "firstorder_RootMeanSquared",
        "firstorder_Skewness",
        "firstorder_TotalEnergy",
        "firstorder_Uniformity",
        "firstorder_Variance",
    ]
)

  GT_metadata = pd.read_csv(gt_cmb_metadata_csv)


In [86]:
GT_metadata_all.columns

Index(['seriesUID', 'cmb_id', 'CM', 'size', 'radius', 'processed_id',
       'RB_label', 'Location', 'Multiple', '<5mm', 'Cause', 'Uncertain',
       'Other', 'seq_type', 'res_level', 'field_strength', 'TE', 'subject',
       'patientUID', 'Dataset', 'shape_Elongation', 'shape_Flatness',
       'shape_LeastAxisLength', 'shape_MajorAxisLength',
       'shape_Maximum2DDiameterColumn', 'shape_Maximum2DDiameterRow',
       'shape_Maximum2DDiameterSlice', 'shape_Maximum3DDiameter',
       'shape_MeshVolume', 'shape_MinorAxisLength', 'shape_Sphericity',
       'shape_SurfaceArea', 'shape_SurfaceVolumeRatio', 'shape_VoxelVolume',
       'count_dict', 'com_label'],
      dtype='object')

In [87]:
GT_metadata_all.columns

Index(['seriesUID', 'cmb_id', 'CM', 'size', 'radius', 'processed_id',
       'RB_label', 'Location', 'Multiple', '<5mm', 'Cause', 'Uncertain',
       'Other', 'seq_type', 'res_level', 'field_strength', 'TE', 'subject',
       'patientUID', 'Dataset', 'shape_Elongation', 'shape_Flatness',
       'shape_LeastAxisLength', 'shape_MajorAxisLength',
       'shape_Maximum2DDiameterColumn', 'shape_Maximum2DDiameterRow',
       'shape_Maximum2DDiameterSlice', 'shape_Maximum3DDiameter',
       'shape_MeshVolume', 'shape_MinorAxisLength', 'shape_Sphericity',
       'shape_SurfaceArea', 'shape_SurfaceVolumeRatio', 'shape_VoxelVolume',
       'count_dict', 'com_label'],
      dtype='object')

# Shape

In [12]:
GT_metadata_radiomics.columns

Index(['seriesUID', 'CM', 'shape_Elongation', 'shape_Flatness',
       'shape_LeastAxisLength', 'shape_MajorAxisLength',
       'shape_Maximum2DDiameterColumn', 'shape_Maximum2DDiameterRow',
       'shape_Maximum2DDiameterSlice', 'shape_Maximum3DDiameter',
       'shape_MeshVolume', 'shape_MinorAxisLength', 'shape_Sphericity',
       'shape_SurfaceArea', 'shape_SurfaceVolumeRatio', 'shape_VoxelVolume',
       'firstorder_10Percentile', 'firstorder_90Percentile',
       'firstorder_Energy', 'firstorder_Entropy',
       'firstorder_InterquartileRange', 'firstorder_Kurtosis',
       'firstorder_Maximum', 'firstorder_MeanAbsoluteDeviation',
       'firstorder_Mean', 'firstorder_Median', 'firstorder_Minimum',
       'firstorder_Range', 'firstorder_RobustMeanAbsoluteDeviation',
       'firstorder_RootMeanSquared', 'firstorder_Skewness',
       'firstorder_TotalEnergy', 'firstorder_Uniformity',
       'firstorder_Variance', 'count_dict', 'com_label'],
      dtype='object')

In [17]:
GT_metadata_radiomics['Sphericity_level'] = GT_metadata_radiomics['shape_Sphericity'].apply(lambda x: "high" if x > 0.8 else "low") 
GT_metadata_radiomics['Sphericity_level'].value_counts()

Sphericity_level
low     545
high    412
Name: count, dtype: int64

In [None]:
GT_metadata_radiomics['Sphericity_level'] = GT_metadata_radiomics['shape_Sphericity'].apply(lambda x: "high" if x > 0.8 else "low") 
GT_metadata_radiomics['Sphericity_level'].value_counts()

In [68]:
# They cannot be less than 2mm3 in diameter, cna be used to filter out FPs
GT_metadata_radiomics_minimum_size = GT_metadata_radiomics[
    (GT_metadata_radiomics['shape_MeshVolume']<4.3) |
    (GT_metadata_radiomics['shape_Maximum3DDiameter']<2)
    ]

In [72]:
# not too big not too small
GT_metadata_radiomics_minimum_size = GT_metadata_radiomics[
    ~((GT_metadata_radiomics["shape_MeshVolume"] < 4.3)
    | (GT_metadata_radiomics["shape_Maximum3DDiameter"] > 10))
]
GT_metadata_radiomics_minimum_size.shape


(0, 40)

In [27]:
GT_metadata_radiomics['volume_level'] = GT_metadata_radiomics['shape_VoxelVolume'].apply(lambda x: "high" if x > 10 else "low") 
GT_metadata_radiomics['volume_level'].value_counts()

volume_level
high    509
low     448
Name: count, dtype: int64

In [30]:
GT_metadata_radiomics['elongation_level'] = GT_metadata_radiomics['shape_Elongation'].apply(lambda x: "high" if x > 0.6 else "low") 
GT_metadata_radiomics['elongation_level'].value_counts()

elongation_level
high    490
low     467
Name: count, dtype: int64

In [53]:
GT_metadata_radiomics['3d_diameter'] = GT_metadata_radiomics['shape_Maximum3DDiameter'].apply(lambda x: "high" if x > 5 else "low") 
GT_metadata_radiomics['3d_diameter'].value_counts()

3d_diameter
low     628
high    329
Name: count, dtype: int64

In [67]:
GT_metadata_radiomics = GT_metadata_radiomics[GT_metadata_radiomics['shape_Maximum3DDiameter'] > 10 ] 
GT_metadata_radiomics.shape


(41, 40)

## Correlations analysis

In [92]:
GT_metadata_all['seriesUID'].nunique()

206

In [94]:
import pandas as pd

# Define the list of columns to average
cols2average = [
    'shape_Elongation', 'shape_Flatness', 'shape_LeastAxisLength', 'shape_MajorAxisLength',
    'shape_Maximum2DDiameterColumn', 'shape_Maximum2DDiameterRow', 'shape_Maximum2DDiameterSlice',
    'shape_Maximum3DDiameter', 'shape_MeshVolume', 'shape_MinorAxisLength', 'shape_Sphericity',
    'shape_SurfaceArea', 'shape_SurfaceVolumeRatio', 'shape_VoxelVolume'
]

if all(col in GT_metadata_all.columns for col in cols2average + ['seriesUID']):
    GT_metadata_shape_averaged = GT_metadata_all.groupby('seriesUID')[cols2average].mean()

    # keep the 'seriesUID' in the resulting DataFrame as a column:
    GT_metadata_shape_averaged.reset_index(inplace=True)

else:
    print("One or more columns are missing in the DataFrame.")

In [95]:
GT_metadata_shape_averaged

Unnamed: 0,seriesUID,shape_Elongation,shape_Flatness,shape_LeastAxisLength,shape_MajorAxisLength,shape_Maximum2DDiameterColumn,shape_Maximum2DDiameterRow,shape_Maximum2DDiameterSlice,shape_Maximum3DDiameter,shape_MeshVolume,shape_MinorAxisLength,shape_Sphericity,shape_SurfaceArea,shape_SurfaceVolumeRatio,shape_VoxelVolume
0,CRB-1.2.826.1.3680043.9.5282.150415.14832.1601...,0.407189,0.335430,2.128091,6.947582,7.255805,2.768069,7.320148,7.420816,23.609809,2.573052,0.732609,52.507282,2.496319,24.885417
1,CRB-1.2.826.1.3680043.9.5282.150415.178.178232...,0.568950,0.562332,3.878406,6.897007,7.158911,4.527693,7.382412,7.449832,69.453125,3.924050,0.800365,102.091695,1.469937,71.125000
2,CRB-1.2.826.1.3680043.9.5282.150415.24477.2447...,0.506075,0.496857,3.465689,6.976521,6.979823,4.265564,6.882465,7.193663,55.148438,3.530106,0.783664,89.520144,1.622376,56.687500
3,CRB-1.2.826.1.3680043.9.5282.150415.25794.2579...,0.303347,0.285929,1.980913,6.928442,6.184658,2.392789,6.254607,6.304635,19.164062,2.101440,0.773399,44.694905,2.343712,20.250000
4,CRB-1.2.826.1.3680043.9.5282.150415.34194.3419...,0.401419,0.374057,2.705514,7.248148,6.846874,3.328493,6.801250,6.983311,39.588542,2.903433,0.765543,70.186811,2.001287,40.881944
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201,VALDO-323-CMB-1,0.531117,0.379409,1.776391,4.681997,4.743416,2.549510,4.609772,4.847680,11.239583,2.486689,0.711570,34.100942,3.034004,12.125000
202,VALDO-324-CMB-1,0.448336,0.409668,2.719653,6.638681,6.500000,3.354102,6.708204,6.873864,28.807292,2.976362,0.704826,64.476859,2.238213,30.125000
203,VALDO-325-CMB-3,0.482751,0.377214,2.793345,7.432325,7.429988,4.025686,7.547480,7.973855,48.866319,3.418583,0.662355,90.546700,2.396970,50.416667
204,VALDO-326-CMB-1,0.561686,0.449359,2.457290,5.468434,6.324555,3.354102,6.324555,6.403124,19.630208,3.071542,0.692844,50.792401,2.587461,21.000000


# Location