In [1]:
import os
import re
import nibabel as nib
import numpy as np
from itertools import combinations
from typing import List, Dict
import pandas as pd
from reidentification_utils import extract_Nyxus_features, NYXUS_ALL_FEATURES, NYXUS_SHAPE_FEATURES, NYXUS_TEXTURE_FEATURES, NYXUS_FIRSTORDER_FEATURES
image_dir = "/home/ubuntu/data/ADNI_dataset/BrainIAC_processed/images/"
features_dir = "/home/ubuntu/data/ADNI_dataset/Nyxus_features/"
info_csv = "/home/ubuntu/data/ADNI_dataset/BrainIAC_input_csv/brainiac_ADNI_info.csv"
output_dir = "./Nyxus_reidentification_analysis"


In [2]:

features = extract_Nyxus_features(
    features_dir=features_dir,
    features_group="All",
)

In [9]:
print(features.shape)
features.head()

(2154, 213)


Unnamed: 0,3COV,3COVERED_IMAGE_INTENSITY_RANGE,3ENERGY,3ENTROPY,3EXCESS_KURTOSIS,3HYPERFLATNESS,3HYPERSKEWNESS,3INTEGRATED_INTENSITY,3INTERQUARTILE_RANGE,3KURTOSIS,...,3GLRLM_RP_AVE,3GLRLM_GLV_AVE,3GLRLM_RV_AVE,3GLRLM_RE_AVE,3GLRLM_LGLRE_AVE,3GLRLM_HGLRE_AVE,3GLRLM_SRLGLE_AVE,3GLRLM_SRHGLE_AVE,3GLRLM_LRLGLE_AVE,3GLRLM_LRHGLE_AVE
0,0.400906,1.0,2301380000.0,3.546647,-0.660348,7.84037,-3.151001,51160326.0,24.021602,2.339652,...,0.865729,124.656749,92.492988,6.107536,0.048823,739.917288,0.006578,631.633038,97.373531,1626.862842
1,0.358455,1.0,98983070000.0,3.697837,-0.729154,7.006858,-2.490378,329857389.0,146.599836,2.270846,...,0.889835,139.463468,84.084943,6.171508,0.044354,956.798536,0.00415,835.263599,88.447246,1864.132667
2,0.325405,1.0,449207800000.0,3.824521,-0.420261,9.7072,-3.898365,761772722.0,246.60637,2.579739,...,0.902472,183.674668,98.940859,6.252429,0.041877,1419.528106,0.002807,1257.316712,103.717678,2535.312711
3,0.352849,1.0,581862500000.0,3.916081,-0.709736,7.570138,-2.976383,782879249.0,372.845157,2.290264,...,0.903361,210.496278,85.78381,6.354939,0.043486,1445.349678,0.00296,1272.909979,90.170565,2707.717574
4,0.404911,1.0,281014700000.0,4.019599,-0.972615,5.3462,-2.159163,552893437.0,291.346501,2.027385,...,0.906923,240.18043,100.859065,6.411892,0.044676,1413.006059,0.003065,1255.283586,105.940932,2517.603266


In [5]:
feature_stats = features.describe().T
feature_stats['range'] = feature_stats['max'] - feature_stats['min']
feature_stats = feature_stats.sort_values('max', ascending=False)

# Display features with largest values
print("Features sorted by maximum value (largest first):")
print(feature_stats[['min', 'max', 'mean', 'std', 'range']].head(20))

Features sorted by maximum value (largest first):
                                min           max          mean           std  \
3ENERGY                1.371705e+09  7.422024e+12  2.735574e+11  3.595565e+11   
3UNIFORMITY            3.971416e+09  6.061973e+10  1.528411e+10  7.464122e+09   
3INTEGRATED_INTENSITY  3.796785e+07  3.226548e+09  5.199919e+08  2.633022e+08   
3GLCM_CLUPROM          1.548014e+05  5.734879e+06  2.655908e+06  1.346736e+06   
3GLCM_CLUPROM_AVE      1.518259e+05  5.638995e+06  2.607038e+06  1.322809e+06   
3MESH_VOLUME           5.169236e+05  5.272790e+06  1.041551e+06  1.734010e+05   
3VOLUME_CONVEXHULL     5.169236e+05  5.272790e+06  1.041551e+06  1.734010e+05   
3GLSZM_LAHGLE          1.705860e+04  3.887262e+06  5.825651e+05  5.684904e+05   
3GLCM_HOM2             9.854199e+05  2.620377e+06  2.043079e+06  1.978216e+05   
3GLDZM_LDHGLE          3.558213e+05  2.378178e+06  1.405700e+06  4.309786e+05   
3VOXEL_VOLUME          7.549042e+05  1.808473e+06  1.383341

In [6]:
def get_feature_type(col_name):
    if col_name in NYXUS_SHAPE_FEATURES:
        return 'Shape'
    elif col_name in NYXUS_TEXTURE_FEATURES:
        return 'Texture'
    elif col_name in NYXUS_FIRSTORDER_FEATURES:
        return 'Firstorder'

feature_stats['feature_type'] = feature_stats.index.map(get_feature_type)
print(feature_stats.groupby('feature_type')[['min', 'max', 'mean', 'std', 'range']].agg(['mean', 'min', 'max']))

                       min                                       max  \
                      mean           min           max          mean   
feature_type                                                           
Firstorder    1.494747e+08 -5.448179e+00  3.971416e+09  2.079409e+11   
Shape         1.321478e+05  5.845454e-04  7.549042e+05  9.339433e+05   
Texture      -7.294390e+05 -6.182294e+07  9.854199e+05 -1.098471e+05   

                                                  mean                \
                       min           max          mean           min   
feature_type                                                           
Firstorder   -2.123458e-01  7.422024e+12  8.037822e+09 -2.759119e+00   
Shape         3.961125e-02  5.272790e+06  2.539321e+05  3.015115e-02   
Texture      -2.214803e+07  5.734879e+06 -5.030676e+05 -4.815299e+07   

                                     std                          \
                       max          mean       min           max  

In [3]:
from sklearn.preprocessing import StandardScaler
standard_features = StandardScaler().fit_transform(features)

In [7]:
feature_stats = standard_features.describe().T
feature_stats['range'] = feature_stats['max'] - feature_stats['min']
feature_stats = feature_stats.sort_values('max', ascending=False)

feature_stats['feature_type'] = feature_stats.index.map(get_feature_type)
print(feature_stats.groupby('feature_type')[['min', 'max', 'mean', 'std', 'range']].agg(['mean', 'min', 'max']))

AttributeError: 'numpy.ndarray' object has no attribute 'describe'