## Dataset Analysis

In [1]:
import os
import pandas as pd

Read extracted features dataset into a data frame

In [2]:
data_dir = os.path.join(os.getcwd(), "data", "extracted_features.csv")
data_df = pd.read_csv(data_dir)

# Shape of data frame
print('Data frame dimensions are:', data_df.shape)

Data frame dimensions are: (2734, 128)


### Preprocessing

Drop columns that have only 1 unique value

In [3]:
drop_columns = []
count = 0

for column in data_df.columns:
    unique_values = data_df[column].nunique()
    if unique_values == 1:
        drop_columns.append(column)
        count += 1

print("There are", count, "columns with unique values.")
print("These columns are:", drop_columns)

There are 13 columns with unique values.
These columns are: ['diagnostics_Versions_PyRadiomics', 'diagnostics_Versions_Numpy', 'diagnostics_Versions_SimpleITK', 'diagnostics_Versions_PyWavelet', 'diagnostics_Versions_Python', 'diagnostics_Configuration_Settings', 'diagnostics_Configuration_EnabledImageTypes', 'diagnostics_Image-original_Dimensionality', 'diagnostics_Image-original_Spacing', 'diagnostics_Image-original_Size', 'diagnostics_Image-original_Minimum', 'diagnostics_Mask-original_Spacing', 'diagnostics_Mask-original_Size']


In [4]:
data_df.drop(columns = drop_columns, inplace = True)

# Shape of data frame
print('New data frame dimensions are:', data_df.shape)

New data frame dimensions are: (2734, 115)


### Analysis

Calculate how many images there are per patient, which are later grouped by the number of images to show frequency

In [5]:
filtered_df = data_df[data_df['class'] == 1]
count_df = filtered_df['patient'].value_counts()

# Count by number of patients
val = count_df.value_counts().sort_index()
val = val.rename_axis('# Of Images').reset_index(name='Frequency')

val

Unnamed: 0,# Of Images,Frequency
0,1,60
1,2,365
2,3,22
3,4,51
4,5,7
5,6,3
6,7,4
7,8,5
8,9,1
9,10,3


Remove all categorical values as they are not important for further numerical analysis

In [6]:
categorical_feats = data_df.dtypes[data_df.dtypes == "object"].index
categorical_column_names = list(categorical_feats)

print("Number of categorical features:", len(categorical_column_names))
print("Categorical column names:", categorical_column_names)

Number of categorical features: 7
Categorical column names: ['diagnostics_Image-original_Hash', 'diagnostics_Mask-original_Hash', 'diagnostics_Mask-original_BoundingBox', 'diagnostics_Mask-original_CenterOfMassIndex', 'diagnostics_Mask-original_CenterOfMass', 'name', 'provider']


In [7]:
df_analysis = data_df.drop(columns = categorical_column_names)

# Drop class column as well
df_analysis.drop(columns = ["class"], inplace = True)

Split data frame into 2 smaller data frames based on the image class

In [8]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

class_0_df = df_analysis[data_df['class'] == 0]
class_1_df = df_analysis[data_df['class'] == 1]

In [9]:
# Formatting the display to see more detailed description
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [10]:
# Statistical values of columns for first data frame
class_0_df.describe()

Unnamed: 0,diagnostics_Image-original_Mean,diagnostics_Image-original_Maximum,diagnostics_Mask-original_VoxelNum,diagnostics_Mask-original_VolumeNum,original_shape2D_Elongation,original_shape2D_MajorAxisLength,original_shape2D_MaximumDiameter,original_shape2D_MeshSurface,original_shape2D_MinorAxisLength,original_shape2D_Perimeter,original_shape2D_PerimeterSurfaceRatio,original_shape2D_PixelSurface,original_shape2D_Sphericity,original_firstorder_10Percentile,original_firstorder_90Percentile,original_firstorder_Energy,original_firstorder_Entropy,original_firstorder_InterquartileRange,original_firstorder_Kurtosis,original_firstorder_Maximum,original_firstorder_MeanAbsoluteDeviation,original_firstorder_Mean,original_firstorder_Median,original_firstorder_Minimum,original_firstorder_Range,original_firstorder_RobustMeanAbsoluteDeviation,original_firstorder_RootMeanSquared,original_firstorder_Skewness,original_firstorder_TotalEnergy,original_firstorder_Uniformity,original_firstorder_Variance,original_glcm_Autocorrelation,original_glcm_ClusterProminence,original_glcm_ClusterShade,original_glcm_ClusterTendency,original_glcm_Contrast,original_glcm_Correlation,original_glcm_DifferenceAverage,original_glcm_DifferenceEntropy,original_glcm_DifferenceVariance,original_glcm_Id,original_glcm_Idm,original_glcm_Idmn,original_glcm_Idn,original_glcm_Imc1,original_glcm_Imc2,original_glcm_InverseVariance,original_glcm_JointAverage,original_glcm_JointEnergy,original_glcm_JointEntropy,original_glcm_MCC,original_glcm_MaximumProbability,original_glcm_SumAverage,original_glcm_SumEntropy,original_glcm_SumSquares,original_gldm_DependenceEntropy,original_gldm_DependenceNonUniformity,original_gldm_DependenceNonUniformityNormalized,original_gldm_DependenceVariance,original_gldm_GrayLevelNonUniformity,original_gldm_GrayLevelVariance,original_gldm_HighGrayLevelEmphasis,original_gldm_LargeDependenceEmphasis,original_gldm_LargeDependenceHighGrayLevelEmphasis,original_gldm_LargeDependenceLowGrayLevelEmphasis,original_gldm_LowGrayLevelEmphasis,original_gldm_SmallDependenceEmphasis,original_gldm_SmallDependenceHighGrayLevelEmphasis,original_gldm_SmallDependenceLowGrayLevelEmphasis,original_glrlm_GrayLevelNonUniformity,original_glrlm_GrayLevelNonUniformityNormalized,original_glrlm_GrayLevelVariance,original_glrlm_HighGrayLevelRunEmphasis,original_glrlm_LongRunEmphasis,original_glrlm_LongRunHighGrayLevelEmphasis,original_glrlm_LongRunLowGrayLevelEmphasis,original_glrlm_LowGrayLevelRunEmphasis,original_glrlm_RunEntropy,original_glrlm_RunLengthNonUniformity,original_glrlm_RunLengthNonUniformityNormalized,original_glrlm_RunPercentage,original_glrlm_RunVariance,original_glrlm_ShortRunEmphasis,original_glrlm_ShortRunHighGrayLevelEmphasis,original_glrlm_ShortRunLowGrayLevelEmphasis,original_glszm_GrayLevelNonUniformity,original_glszm_GrayLevelNonUniformityNormalized,original_glszm_GrayLevelVariance,original_glszm_HighGrayLevelZoneEmphasis,original_glszm_LargeAreaEmphasis,original_glszm_LargeAreaHighGrayLevelEmphasis,original_glszm_LargeAreaLowGrayLevelEmphasis,original_glszm_LowGrayLevelZoneEmphasis,original_glszm_SizeZoneNonUniformity,original_glszm_SizeZoneNonUniformityNormalized,original_glszm_SmallAreaEmphasis,original_glszm_SmallAreaHighGrayLevelEmphasis,original_glszm_SmallAreaLowGrayLevelEmphasis,original_glszm_ZoneEntropy,original_glszm_ZonePercentage,original_glszm_ZoneVariance,original_ngtdm_Busyness,original_ngtdm_Coarseness,original_ngtdm_Complexity,original_ngtdm_Contrast,original_ngtdm_Strength,patient
count,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1366.0,1367.0,1367.0,1367.0,1367.0,1367.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1364.0,1367.0,1364.0,1364.0,1364.0,1367.0,1364.0,1364.0,1364.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0
mean,94.752,255.0,50523.693,1.176,0.663,295.315,312.015,50523.141,190.61,923.095,0.049,50523.693,0.776,77.708,216.675,1255007627.518,2.897,78.614,2.752,249.79,43.438,148.174,149.763,14.126,235.664,33.653,158.165,-0.098,1255007627.518,0.163,2943.68,44.773,1070.999,-7.558,19.056,0.479,0.935,0.409,1.068,0.298,0.806,0.802,0.995,0.964,-0.525,0.965,0.359,6.151,0.087,4.253,0.939,0.172,12.302,3.746,4.884,4.266,19966.121,0.392,0.499,7338.563,4.907,44.73,5.487,255.977,0.46,0.077,0.328,14.075,0.023,2569.877,0.148,4.27,42.116,24.909,1575.364,2.99,0.065,5.309,4908.673,0.279,0.386,16.279,0.522,22.271,0.031,2564.24,0.149,4.262,42.033,24.856,1571.916,2.985,0.066,4897.907,0.281,0.524,22.231,0.032,5.312,0.387,16.279,48.492,2926.117,15.751,0.034,0.066,965.213
std,27.413,0.0,56971.959,0.57,0.191,161.714,175.185,56971.981,121.929,555.934,0.268,56971.959,0.136,39.2,42.877,1467677418.771,0.396,28.792,1.079,18.858,12.59,40.64,47.867,24.754,31.821,12.327,39.172,0.63,1467677418.771,0.07,1508.438,19.366,958.416,58.974,9.787,0.207,0.056,0.113,0.168,0.123,0.048,0.051,0.002,0.011,0.096,0.051,0.076,1.515,0.068,0.534,0.058,0.105,3.029,0.462,2.454,0.488,22806.956,0.053,0.058,8764.527,2.465,19.348,0.787,135.009,0.538,0.076,0.073,5.707,0.029,3099.66,0.041,1.641,15.237,52.514,4162.527,16.261,0.048,0.551,6024.007,0.07,0.089,43.649,0.08,8.469,0.022,3098.589,0.044,1.648,15.323,52.468,4158.605,16.244,0.053,6021.769,0.078,0.083,8.502,0.032,0.529,0.093,43.649,71.143,54034.149,4.776,0.014,0.12,809.069
min,20.228,255.0,2.0,1.0,0.0,4.619,3.606,1.0,0.0,5.657,0.008,2.0,0.219,0.0,67.0,17170.0,0.918,0.0,1.0,85.0,5.111,19.541,0.0,0.0,15.0,0.0,35.159,-3.298,17170.0,0.093,37.556,1.0,0.0,-601.415,0.0,0.0,0.402,0.0,-0.0,0.0,0.627,0.599,0.973,0.909,-0.826,0.0,0.0,1.0,0.031,-0.0,0.0,0.06,2.0,-0.0,0.0,0.918,2.0,0.336,0.0,1.0,0.222,2.5,1.0,2.5,0.066,0.013,0.134,0.866,0.004,10.565,0.094,0.239,3.087,1.913,4.0,0.075,0.015,0.0,13.261,0.049,0.065,0.0,0.162,0.765,0.007,1.0,0.094,0.222,2.5,1.0,2.5,0.075,0.015,2.0,0.049,0.162,0.765,0.007,0.918,0.065,0.0,0.0,0.0,0.0,0.0,0.0,1.0
25%,76.129,255.0,10580.0,1.0,0.546,158.527,164.508,10579.5,92.728,456.718,0.018,10580.0,0.711,49.0,188.0,246851961.0,2.722,59.0,2.201,255.0,34.868,119.389,114.0,0.0,230.0,25.013,130.823,-0.443,246851961.0,0.121,1883.729,29.767,430.616,-28.879,11.949,0.359,0.927,0.338,0.967,0.241,0.777,0.772,0.995,0.959,-0.588,0.964,0.318,5.093,0.055,4.032,0.933,0.11,10.185,3.57,3.118,4.101,4030.77,0.367,0.477,1707.034,3.125,29.814,5.016,153.152,0.177,0.035,0.283,10.043,0.011,631.724,0.121,3.097,30.778,9.931,309.565,0.4,0.036,5.036,1216.485,0.23,0.333,4.356,0.469,16.212,0.018,628.386,0.121,3.093,30.661,9.866,308.696,0.4,0.036,1204.343,0.23,0.469,16.164,0.018,5.036,0.333,4.356,10.567,0.0,13.137,0.023,0.016,75.0
50%,93.43,255.0,30920.0,1.0,0.69,280.814,294.267,30920.0,163.87,846.441,0.028,30920.0,0.823,73.0,230.0,769091772.0,2.99,77.0,2.501,255.0,43.658,150.397,150.0,0.0,251.0,32.579,161.71,-0.068,769091772.0,0.142,2830.465,43.607,902.082,-1.563,18.483,0.433,0.952,0.395,1.054,0.275,0.81,0.807,0.996,0.966,-0.54,0.979,0.359,6.224,0.068,4.353,0.955,0.14,12.448,3.86,4.728,4.388,12002.075,0.379,0.506,4550.0,4.749,43.584,5.53,232.228,0.284,0.053,0.317,14.138,0.017,1570.305,0.138,4.282,42.32,14.85,613.121,0.741,0.05,5.371,2923.797,0.27,0.38,7.225,0.519,22.357,0.025,1558.557,0.138,4.281,42.24,14.833,612.996,0.737,0.05,2916.013,0.27,0.519,22.308,0.025,5.371,0.38,7.225,24.93,0.001,15.551,0.033,0.033,692.0
75%,111.945,255.0,69037.0,1.0,0.804,409.267,427.293,69036.25,261.854,1272.645,0.046,69037.0,0.882,101.0,255.0,1817606612.0,3.18,94.0,2.949,255.0,51.051,177.944,184.0,19.5,255.0,40.5,187.076,0.295,1817606612.0,0.177,3738.309,58.313,1447.414,21.332,24.225,0.563,0.965,0.472,1.173,0.334,0.835,0.834,0.997,0.971,-0.48,0.987,0.408,7.262,0.09,4.597,0.968,0.189,14.524,4.07,6.176,4.592,26782.117,0.399,0.526,9440.437,6.209,58.238,5.943,332.936,0.52,0.089,0.367,18.072,0.026,3311.147,0.163,5.273,52.504,23.407,1429.081,1.682,0.075,5.653,6364.125,0.323,0.441,14.001,0.577,28.175,0.037,3308.748,0.163,5.271,52.47,23.388,1426.202,1.68,0.075,6348.897,0.323,0.577,28.165,0.037,5.653,0.441,14.001,55.388,0.002,18.331,0.042,0.075,1869.0
max,224.354,255.0,262143.0,8.0,1.0,748.147,723.371,262142.5,591.2,4339.164,5.657,262143.0,0.939,234.0,255.0,13491198707.0,3.442,255.0,15.285,255.0,119.356,248.25,255.0,221.0,255.0,119.356,248.529,2.477,13491198707.0,0.687,14828.451,107.745,10195.52,390.23,91.235,3.905,1.0,1.009,1.851,2.888,1.0,1.0,1.0,1.0,0.0,0.995,0.532,10.282,1.0,5.345,1.0,1.0,20.563,4.329,23.087,4.863,122985.979,1.0,0.778,74221.893,23.071,107.651,8.366,837.313,6.056,0.741,1.0,34.073,0.625,23352.421,0.529,12.538,88.925,951.858,92317.005,500.823,0.601,6.94,57450.257,0.597,0.767,847.99,0.787,49.458,0.372,23352.421,0.556,12.538,88.925,951.858,92317.005,500.823,0.625,57450.257,1.0,1.0,49.458,0.625,6.94,1.0,847.99,575.792,1000000.0,41.251,0.145,2.744,2182.0


In [11]:
# Statistical values of columns for first data frame
class_1_df.describe()

Unnamed: 0,diagnostics_Image-original_Mean,diagnostics_Image-original_Maximum,diagnostics_Mask-original_VoxelNum,diagnostics_Mask-original_VolumeNum,original_shape2D_Elongation,original_shape2D_MajorAxisLength,original_shape2D_MaximumDiameter,original_shape2D_MeshSurface,original_shape2D_MinorAxisLength,original_shape2D_Perimeter,original_shape2D_PerimeterSurfaceRatio,original_shape2D_PixelSurface,original_shape2D_Sphericity,original_firstorder_10Percentile,original_firstorder_90Percentile,original_firstorder_Energy,original_firstorder_Entropy,original_firstorder_InterquartileRange,original_firstorder_Kurtosis,original_firstorder_Maximum,original_firstorder_MeanAbsoluteDeviation,original_firstorder_Mean,original_firstorder_Median,original_firstorder_Minimum,original_firstorder_Range,original_firstorder_RobustMeanAbsoluteDeviation,original_firstorder_RootMeanSquared,original_firstorder_Skewness,original_firstorder_TotalEnergy,original_firstorder_Uniformity,original_firstorder_Variance,original_glcm_Autocorrelation,original_glcm_ClusterProminence,original_glcm_ClusterShade,original_glcm_ClusterTendency,original_glcm_Contrast,original_glcm_Correlation,original_glcm_DifferenceAverage,original_glcm_DifferenceEntropy,original_glcm_DifferenceVariance,original_glcm_Id,original_glcm_Idm,original_glcm_Idmn,original_glcm_Idn,original_glcm_Imc1,original_glcm_Imc2,original_glcm_InverseVariance,original_glcm_JointAverage,original_glcm_JointEnergy,original_glcm_JointEntropy,original_glcm_MCC,original_glcm_MaximumProbability,original_glcm_SumAverage,original_glcm_SumEntropy,original_glcm_SumSquares,original_gldm_DependenceEntropy,original_gldm_DependenceNonUniformity,original_gldm_DependenceNonUniformityNormalized,original_gldm_DependenceVariance,original_gldm_GrayLevelNonUniformity,original_gldm_GrayLevelVariance,original_gldm_HighGrayLevelEmphasis,original_gldm_LargeDependenceEmphasis,original_gldm_LargeDependenceHighGrayLevelEmphasis,original_gldm_LargeDependenceLowGrayLevelEmphasis,original_gldm_LowGrayLevelEmphasis,original_gldm_SmallDependenceEmphasis,original_gldm_SmallDependenceHighGrayLevelEmphasis,original_gldm_SmallDependenceLowGrayLevelEmphasis,original_glrlm_GrayLevelNonUniformity,original_glrlm_GrayLevelNonUniformityNormalized,original_glrlm_GrayLevelVariance,original_glrlm_HighGrayLevelRunEmphasis,original_glrlm_LongRunEmphasis,original_glrlm_LongRunHighGrayLevelEmphasis,original_glrlm_LongRunLowGrayLevelEmphasis,original_glrlm_LowGrayLevelRunEmphasis,original_glrlm_RunEntropy,original_glrlm_RunLengthNonUniformity,original_glrlm_RunLengthNonUniformityNormalized,original_glrlm_RunPercentage,original_glrlm_RunVariance,original_glrlm_ShortRunEmphasis,original_glrlm_ShortRunHighGrayLevelEmphasis,original_glrlm_ShortRunLowGrayLevelEmphasis,original_glszm_GrayLevelNonUniformity,original_glszm_GrayLevelNonUniformityNormalized,original_glszm_GrayLevelVariance,original_glszm_HighGrayLevelZoneEmphasis,original_glszm_LargeAreaEmphasis,original_glszm_LargeAreaHighGrayLevelEmphasis,original_glszm_LargeAreaLowGrayLevelEmphasis,original_glszm_LowGrayLevelZoneEmphasis,original_glszm_SizeZoneNonUniformity,original_glszm_SizeZoneNonUniformityNormalized,original_glszm_SmallAreaEmphasis,original_glszm_SmallAreaHighGrayLevelEmphasis,original_glszm_SmallAreaLowGrayLevelEmphasis,original_glszm_ZoneEntropy,original_glszm_ZonePercentage,original_glszm_ZoneVariance,original_ngtdm_Busyness,original_ngtdm_Coarseness,original_ngtdm_Complexity,original_ngtdm_Contrast,original_ngtdm_Strength,patient
count,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0,1367.0
mean,83.254,254.627,261121.0,1.0,1.0,590.051,721.956,261120.5,590.051,2042.828,0.008,261121.0,0.887,29.887,144.459,2660685951.203,2.631,60.775,4.377,254.566,36.212,83.229,77.266,0.014,254.552,26.076,95.723,0.778,2660685951.203,0.203,2257.671,19.983,836.758,36.126,14.148,0.386,0.928,0.35,0.966,0.247,0.831,0.829,0.997,0.971,-0.532,0.957,0.32,3.866,0.116,3.851,0.941,0.216,7.732,3.436,3.634,3.95,110381.324,0.423,0.466,52882.042,3.636,20.175,5.95,109.323,1.577,0.223,0.292,6.744,0.049,15331.911,0.179,3.55,23.01,485.771,7044.742,412.895,0.155,5.41,22987.371,0.244,0.334,82.65,0.483,12.013,0.066,15331.911,0.179,3.55,23.01,485.771,7044.742,412.895,0.155,22987.371,0.244,0.483,12.013,0.066,5.41,0.334,82.65,390.679,1463.058,18.539,0.021,0.007,965.213
std,31.695,9.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.395,44.238,1878009465.02,0.423,27.262,5.966,9.803,12.711,31.71,36.107,0.44,9.812,11.252,32.053,0.768,1878009465.02,0.079,1386.145,12.811,817.837,51.923,8.806,0.181,0.05,0.128,0.194,0.076,0.057,0.059,0.001,0.01,0.109,0.052,0.092,1.263,0.077,0.61,0.043,0.105,2.527,0.5,2.199,0.527,19250.413,0.074,0.07,20607.27,2.201,12.83,0.917,83.773,1.159,0.14,0.074,4.445,0.023,5812.443,0.056,1.777,12.098,10059.185,141619.852,9983.433,0.091,0.571,14087.335,0.075,0.106,1060.684,0.081,6.595,0.033,5812.443,0.056,1.777,12.098,10059.185,141619.852,9983.433,0.091,14087.335,0.075,0.081,6.595,0.033,0.571,0.106,1060.684,131.252,38235.935,5.964,0.013,0.009,809.069
min,0.0,0.0,261121.0,1.0,1.0,590.051,721.956,261120.5,590.051,2042.828,0.008,261121.0,0.887,0.0,0.0,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-12.407,0.0,0.098,0.0,1.0,0.0,-443.321,0.0,0.0,0.602,0.0,-0.0,0.0,0.62,0.59,0.988,0.926,-0.885,0.0,0.0,1.0,0.038,-0.0,0.697,0.072,2.0,-0.0,0.0,0.037,89172.127,0.341,0.004,25632.784,0.0,1.0,3.118,8.98,0.068,0.01,0.112,0.112,0.001,482.88,0.101,0.0,1.0,2.953,49.924,0.076,0.012,-0.0,337.291,0.046,0.002,0.0,0.0,0.0,0.0,482.88,0.101,0.0,1.0,2.953,49.924,0.076,0.012,337.291,0.046,0.0,0.0,0.0,-0.0,0.002,0.0,0.0,0.0,0.0,0.0,0.0,1.0
25%,60.414,255.0,261121.0,1.0,1.0,590.051,721.956,261120.5,590.051,2042.828,0.008,261121.0,0.887,13.0,111.0,1379494081.0,2.354,41.0,2.845,255.0,26.828,60.423,52.0,0.0,255.0,18.166,72.684,0.453,1379494081.0,0.151,1221.511,11.269,270.668,13.185,7.552,0.27,0.905,0.263,0.85,0.201,0.8,0.797,0.996,0.966,-0.612,0.944,0.258,2.953,0.072,3.482,0.922,0.144,5.906,3.15,1.985,3.675,97745.604,0.374,0.435,39456.758,1.986,11.43,5.398,61.565,0.671,0.11,0.24,3.479,0.031,10851.277,0.141,2.236,14.331,13.104,169.192,2.239,0.089,5.043,13254.432,0.193,0.261,6.346,0.428,7.083,0.041,10851.277,0.141,2.236,14.331,13.104,169.192,2.239,0.089,13254.432,0.193,0.428,7.083,0.041,5.043,0.261,6.346,304.319,0.0,14.195,0.01,0.004,75.0
50%,79.691,255.0,261121.0,1.0,1.0,590.051,721.956,261120.5,590.051,2042.828,0.008,261121.0,0.887,23.0,141.0,2212232846.0,2.649,55.0,3.705,255.0,34.139,79.524,72.0,0.0,255.0,23.502,92.044,0.8,2212232846.0,0.19,1923.497,16.979,586.989,28.764,12.134,0.348,0.941,0.332,0.959,0.236,0.836,0.835,0.997,0.972,-0.543,0.968,0.319,3.707,0.097,3.94,0.952,0.193,7.413,3.493,3.121,4.03,104416.785,0.4,0.481,49603.221,3.129,17.163,5.997,90.542,1.337,0.196,0.28,5.697,0.045,14541.346,0.172,3.208,20.26,23.434,263.04,8.392,0.136,5.449,19504.281,0.229,0.326,13.707,0.473,10.596,0.06,14541.346,0.172,3.208,20.26,23.434,263.04,8.392,0.136,19504.281,0.229,0.473,10.596,0.06,5.449,0.326,13.707,388.584,0.0,17.797,0.018,0.005,692.0
75%,100.869,255.0,261121.0,1.0,1.0,590.051,721.956,261120.5,590.051,2042.828,0.008,261121.0,0.887,42.0,177.0,3438241648.0,2.956,73.0,4.877,255.0,43.251,100.837,96.0,0.0,255.0,31.35,114.749,1.151,3438241648.0,0.237,2932.835,25.49,1141.038,54.966,18.448,0.449,0.965,0.412,1.074,0.281,0.87,0.869,0.998,0.978,-0.465,0.984,0.381,4.569,0.137,4.304,0.971,0.262,9.138,3.804,4.686,4.33,116949.813,0.448,0.515,61785.659,4.689,25.642,6.574,132.728,2.189,0.305,0.329,8.893,0.065,18909.3,0.21,4.448,29.171,46.779,417.585,26.345,0.203,5.796,28500.394,0.279,0.395,31.828,0.529,15.489,0.088,18909.3,0.21,4.448,29.171,46.779,417.585,26.345,0.203,28500.394,0.279,0.529,15.489,0.088,5.796,0.395,31.828,469.678,0.0,22.315,0.029,0.007,1869.0
max,248.045,255.0,261121.0,1.0,1.0,590.051,721.956,261120.5,590.051,2042.828,0.008,261121.0,0.887,255.0,255.0,16312464178.0,3.397,183.0,205.589,255.0,88.665,248.191,255.0,16.0,255.0,85.351,249.942,3.769,16312464178.0,1.0,8802.107,116.297,8535.038,501.823,58.314,1.525,1.0,0.936,1.669,0.807,1.0,1.0,1.0,1.0,0.0,0.996,0.521,10.712,1.0,5.055,1.0,1.0,21.424,4.293,14.62,4.845,259085.0,0.992,0.597,261121.0,14.623,116.284,8.98,1040.206,8.98,1.0,0.586,33.22,0.133,36899.197,1.0,12.161,109.069,261121.0,4280101.913,261121.0,1.0,7.725,93027.438,1.0,0.682,29501.387,0.749,44.928,0.232,36899.197,1.0,12.161,109.069,261121.0,4280101.913,261121.0,1.0,93027.438,1.0,0.749,44.928,0.232,7.725,0.682,29501.387,981.509,1000000.0,43.676,0.085,0.263,2182.0


Calculate means for every column of both data frames in order to compare them

In [12]:
df0_mean = class_0_df.describe().loc['mean']
df1_mean = class_1_df.describe().loc['mean']

mean_df = pd.DataFrame({'Column Name': df_analysis.columns, 'mean_0': df0_mean, 'mean_1': df1_mean})
mean_df = mean_df.reset_index(drop=True)

In [13]:
# Initialize the column with zeros
mean_df['Difference_%'] = 0  

# Calculating absolute differences because there are negative values
condition = mean_df['mean_1'] > mean_df['mean_0']
mean_df.loc[condition, 'Difference_%'] = abs(mean_df['mean_1'] / mean_df['mean_0'] * 100)

condition = mean_df['mean_0'] > mean_df['mean_1']
mean_df.loc[condition, 'Difference_%'] = abs(mean_df['mean_0'] / mean_df['mean_1'] * 100)

In [14]:
sorted_mean_df = mean_df.sort_values('Difference_%', ascending=False)
sorted_mean_df.head(30)

Unnamed: 0,Column Name,mean_0,mean_1,Difference_%
23,original_firstorder_Minimum,14.126,0.014,101631.579
91,original_glszm_LargeAreaLowGrayLevelEmphasis,2.985,412.895,13833.479
75,original_glrlm_LongRunLowGrayLevelEmphasis,2.99,412.895,13808.408
89,original_glszm_LargeAreaEmphasis,24.856,485.771,1954.319
73,original_glrlm_LongRunEmphasis,24.909,485.771,1950.202
105,original_ngtdm_Strength,0.066,0.007,1011.617
101,original_ngtdm_Busyness,48.492,390.679,805.649
27,original_firstorder_Skewness,-0.098,0.778,791.064
59,original_gldm_GrayLevelNonUniformity,7338.563,52882.042,720.605
10,original_shape2D_PerimeterSurfaceRatio,0.049,0.008,630.271
