In [None]:
#!pip install numpy pyradiomics slicerio
!pip install seaborn

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import seaborn as sns

In [None]:
# Read features.csv

study = "Baseline"

df = pd.read_csv("allfeatures_notnorm_" + study + ".csv")
features_list = ['shape', 'firstorder', 'glcm', 'glrlm', 'glszm', 'gldm', 'ngtdm']
image_types = ['original', 'exponential', 'gradient', 'lbp-2D', 'lbp-3D-m1', 'lbp-3D-m2', 'lbp-3D-k' , 'logarithm', 'square', 'squareroot', 'wavelet-LLH', 'wavelet-LHL', 'wavelet-LHH', 'wavelet-HLH', 'wavelet-HLL', 'wavelet-HHL', 'wavelet-HHH', 'wavelet-LLL']
#image_types = ['original']
labels_list = [1,2,3]

def plot_graph(df_label_features, dir_name, label):
    # draw a boxplot for each feature in features_for_cistern on one chart
    for column in df_label_features.columns:
        if column != 'image':
            print(column)
            fig = plt.figure(figsize=(5,5))
            # df_label_features.boxplot(column=column)
            # y = df_label_features[column]
            # x = np.random.randint(0,5, size=len(y))
            # plt.plot(x, y, 'r.', alpha=0.3)
            # plt.title('BoxPlot of {} for {}'.format(column,label))

            sns.set(style="whitegrid")
            ax = sns.boxplot(y=column, data=df_label_features, showfliers = False, palette="Set3")
            ax = sns.stripplot(y=column, data=df_label_features, color=".25")
            
            # remove y-axis title
            ax.set(ylabel='')
            plt.title('BoxPlot of {} for {}'.format(column,label))
            plt.savefig(dir_name + '/boxplot_{}.png'.format(column),  bbox_inches = 'tight')
            plt.close(fig)

# Extract features

for label in labels_list:
    print(label)
    df_label = df[(df['label'] == label)]
    for type in image_types:
        print(type)
        for feature in features_list:
            print(feature)
            df_label_features = df_label.filter(regex=type + "_" +feature)
            dir_name = study + '/label_' + str(label) + '/' + type + '/' + feature
            os.makedirs(dir_name, exist_ok=True)
            plot_graph(df_label_features, dir_name, label)

In [None]:
# combined boxplot for Baseline and Controls

# Read features.csv

df_baseline = pd.read_csv("allfeatures_notnorm_Baseline.csv")
df_baseline['target'] = "Baseline"

df_followup = pd.read_csv("allfeatures_notnorm_Controls.csv")
df_followup['target'] = "Controls"

df = pd.concat([df_baseline, df_followup]  , ignore_index=True) 

features_list = ['shape', 'firstorder', 'glcm', 'glrlm', 'glszm', 'gldm', 'ngtdm']
image_types = ['original', 'exponential', 'gradient', 'lbp-2D', 'lbp-3D-m1', 'lbp-3D-m2', 'lbp-3D-k', 'logarithm', 'square', 'squareroot', 'wavelet-LLH', 'wavelet-LHL', 'wavelet-LHH', 'wavelet-HLH', 'wavelet-HLL', 'wavelet-HHL', 'wavelet-HHH', 'wavelet-LLL']
#image_types = ['original']
labels_list = [1,2,3]

def plot_graph(df_label_features, dir_name, label):
    # draw a boxplot for each feature in features_for_cistern on one chart
    for column in df_label_features.columns:
        if column != 'image':
            print(column)
            fig = plt.figure(figsize=(5,5))

            sns.set(style="whitegrid")
            ax = sns.boxplot(x="target", y=column, data=df_label_features, showfliers = False, palette="Set3")
            ax = sns.stripplot(x="target", y=column, data=df_label_features, color=".25")
            
            # remove y-axis title
            ax.set(ylabel='')
            plt.title('BoxPlot of {} for {}'.format(column,label))
            plt.savefig(dir_name + '/boxplot_{}.png'.format(column),  bbox_inches = 'tight')
            plt.close(fig)

# Extract features

for label in labels_list:
    print(label)
    df_label = df[(df['label'] == label)]
    for type in image_types:
        print(type)
        for feature in features_list:
            print(feature)
            df_label_features = df_label.filter(regex=type + "_" +feature)
            df_label_features['target'] = df_label['target']
            dir_name = "Both/" + 'label_' + str(label) + '/' + type + '/' + feature
            os.makedirs(dir_name, exist_ok=True)
            plot_graph(df_label_features, dir_name, label)

In [None]:
# combined violinplot for Baseline and Followup

# Read features.csv

df_baseline = pd.read_csv("allfeatures_notnorm_Baseline.csv")
df_baseline['target'] = "Baseline"

df_followup = pd.read_csv("allfeatures_notnorm_Controls.csv")
df_followup['target'] = "Controls"

df = pd.concat([df_baseline, df_followup]  , ignore_index=True) 

features_list = ['shape', 'firstorder', 'glcm', 'glrlm', 'glszm', 'gldm', 'ngtdm']
image_types = ['original', 'exponential', 'gradient', 'lbp-2D', 'lbp-3D-m1', 'lbp-3D-m2', 'lbp-3D-k', 'logarithm', 'square', 'squareroot', 'wavelet-LLH', 'wavelet-LHL', 'wavelet-LHH', 'wavelet-HLH', 'wavelet-HLL', 'wavelet-HHL', 'wavelet-HHH', 'wavelet-LLL']
#image_types = ['original']
labels_list = [1,2,3]

def plot_graph(df_label_features, dir_name, label):
    # draw a violinplot for each feature in features_for_cistern on one chart
    for column in df_label_features.columns:
        if column != 'image':
            print(column)
            fig = plt.figure(figsize=(5,5))

            sns.set(style="whitegrid")
            ax = sns.violinplot(x="target", y=column, data=df_label_features, palette="Set3", cut=0)
            ax = sns.stripplot(x="target", y=column, data=df_label_features, color=".25")
            
            # remove y-axis title
            ax.set(ylabel='')
            plt.title('Violinplot of {} for {}'.format(column,label))
            plt.savefig(dir_name + '/violinplot_{}.png'.format(column),  bbox_inches = 'tight')
            plt.close(fig)

# Extract features

for label in labels_list:
    print(label)
    df_label = df[(df['label'] == label)]
    for type in image_types:
        print(type)
        for feature in features_list:
            print(feature)
            df_label_features = df_label.filter(regex=type + "_" +feature)
            df_label_features['target'] = df_label['target']
            dir_name = "Both_violinplot/" + 'label_' + str(label) + '/' + type + '/' + feature
            os.makedirs(dir_name, exist_ok=True)
            plot_graph(df_label_features, dir_name, label)

In [None]:
# combined violinplot for Baseline and Followup for normed data where 1 is normed and 2 and 3 are not normed

# Read features.csv

df_baseline = pd.read_csv("allfeatures_norm_Baseline.csv")
df_baseline['target'] = "Patients"

df_followup = pd.read_csv("allfeatures_norm_Controls.csv")
df_followup['target'] = "Controls"

df = pd.concat([df_baseline, df_followup]  , ignore_index=True) 

features_list = ['shape', 'firstorder', 'glcm', 'glrlm', 'glszm', 'gldm', 'ngtdm']
image_types = ['original', 'exponential', 'gradient', 'lbp-2D', 'lbp-3D-m1', 'lbp-3D-m2', 'lbp-3D-k', 'logarithm', 'square', 'squareroot', 'wavelet-LLH', 'wavelet-LHL', 'wavelet-LHH', 'wavelet-HLH', 'wavelet-HLL', 'wavelet-HHL', 'wavelet-HHH', 'wavelet-LLL']
#image_types = ['original']
labels_list = [1]

def plot_graph(df_label_features, dir_name, label):
    # draw a violinplot for each feature in features_for_cistern on one chart
    for column in df_label_features.columns:
        if column != 'image':
            print(column)
            fig = plt.figure(figsize=(5,5))

            sns.set(style="whitegrid")
            ax = sns.violinplot(x="target", y=column, data=df_label_features, palette="Set3", cut=0)
            ax = sns.stripplot(x="target", y=column, data=df_label_features, color=".25")
            
            # remove y-axis title
            ax.set(ylabel='')
            plt.title('Violinplot of {} for cistern'.format(column,label))
            plt.savefig(dir_name + '/violinplot_{}.png'.format(column),  bbox_inches = 'tight')
            plt.close(fig)

# Extract features

for label in labels_list:
    print(label)
    df_label = df[(df['label'] == label)]
    for type in image_types:
        print(type)
        for feature in features_list:
            print(feature)
            df_label_features = df_label.filter(regex=type + "_" +feature)
            df_label_features['target'] = df_label['target']
            dir_name = "Both_violinplot/" + 'label_' + str(label) + '/' + type + '/' + feature
            os.makedirs(dir_name, exist_ok=True)
            plot_graph(df_label_features, dir_name, label)

In [None]:
# combined histplot for Baseline and Followup

# Read features.csv

df_baseline = pd.read_csv("allfeatures_notnorm_Baseline.csv")
df_baseline['target'] = "Baseline"

df_followup = pd.read_csv("allfeatures_notnorm_Controls.csv")
df_followup['target'] = "Controls"

df = pd.concat([df_baseline, df_followup]  , ignore_index=True) 

features_list = ['shape', 'firstorder', 'glcm', 'glrlm', 'glszm', 'gldm', 'ngtdm']
image_types = ['original', 'exponential', 'gradient', 'lbp-2D', 'lbp-3D-m1', 'lbp-3D-m2', 'lbp-3D-k', 'logarithm', 'square', 'squareroot', 'wavelet-LLH', 'wavelet-LHL', 'wavelet-LHH', 'wavelet-HLH', 'wavelet-HLL', 'wavelet-HHL', 'wavelet-HHH', 'wavelet-LLL']
#image_types = ['original']
labels_list = [1,2,3]

def plot_graph(df_label_features, dir_name, label):
    # draw a violinplot for each feature in features_for_cistern on one chart
    for column in df_label_features.columns:
        if column != 'image':
            print(column)
            fig = plt.figure(figsize=(5,5))

            sns.set(style="whitegrid")
            sns.histplot(data=df_label_features, x=column, hue="target", element="step", common_norm=False, stat="density", kde=False)
            
            # ax = sns.violinplot(x="target", y=column, data=df_label_features, palette="Set3", cut=0)
            # ax = sns.stripplot(x="target", y=column, data=df_label_features, color=".25")
            
            # # remove y-axis title
            # ax.set(ylabel='')
            plt.title('Histplot of {} for {}'.format(column,label))
            plt.savefig(dir_name + '/histplot_{}.png'.format(column),  bbox_inches = 'tight')
            plt.close(fig)

# Extract features

for label in labels_list:
    print(label)
    df_label = df[(df['label'] == label)]
    for type in image_types:
        print(type)
        for feature in features_list:
            print(feature)
            df_label_features = df_label.filter(regex=type + "_" +feature)
            df_label_features['target'] = df_label['target']
            dir_name = "Both_histplot/" + 'label_' + str(label) + '/' + type + '/' + feature
            os.makedirs(dir_name, exist_ok=True)
            plot_graph(df_label_features, dir_name, label)

In [None]:
# Read features.csv

df_baseline = pd.read_csv("allfeatures_notnorm_Baseline.csv")
df_baseline['target'] = "Baseline"

df_followup = pd.read_csv("allfeatures_notnorm_Controls.csv")
df_followup['target'] = "Controls"

df = pd.concat([df_baseline, df_followup]  , ignore_index=True) 

In [None]:
# Run KS test to see if the distributions of each column in df_baseline and df_followup are the same

from scipy.stats import ks_2samp

features_list = ['shape', 'firstorder', 'glcm', 'glrlm', 'glszm', 'gldm', 'ngtdm']
image_types = ['original', 'exponential', 'gradient', 'lbp-2D', 'lbp-3D', 'logarithm', 'square', 'squareroot', 'wavelet-LLH', 'wavelet-LHL', 'wavelet-LHH', 'wavelet-HLH', 'wavelet-HLL', 'wavelet-HHL', 'wavelet-HHH', 'wavelet-LLL']

for type in image_types:
    for feature in features_list:
        df_baseline_features = df_baseline.filter(regex=type + "_" +feature)
        df_followup_features = df_followup.filter(regex=type + "_" +feature)
        for column in df_baseline_features.columns:
            if column != 'image':
                #print(column)
                #print(ks_2samp(df_baseline_features[column], df_followup_features[column]))
                # print features that are significantly different
                if ks_2samp(df_baseline_features[column], df_followup_features[column])[1] < 0.05:
                    print(column)
                    #print(ks_2samp(df_baseline_features[column], df_followup_features[column]))