In [4]:


from monai.inferers import sliding_window_inference
import matplotlib.pyplot as plt
import numpy as np
from monai.transforms import Compose, Invertd, SaveImaged
from monai.inferers import sliding_window_inference
from monai.data import decollate_batch
import torch
import json
from utils import PairFinder
from quant import masked_SUV_img

In [2]:
config_file = '/students/2023-2024/master/Shahpouri/DL_PET/config.json'

with open(config_file, 'r') as f:
    config = json.load(f)

ga_data_dir = config["ga_data_dir"]
fdg_data_dir = config["fdg_data_dir"]
log_dir = config["log_dir"]
ga_output_dir = config["ga_output_dir"]
artifact_dir = config["artifacts"]
artifact_output = config ["artifact_output"]
fdg_output_dir = config['fdg_output_dir']

In [3]:
from data_preparation import ExternalRadioSetHandling
from data_preparation import LoaderFactory

data_handler = ExternalRadioSetHandling(data_dir=fdg_data_dir, test_ratio=1)
train_files, val_files, test_files = data_handler.get_split_data()

print(f"Test data length: {len(test_files)}")
print(f"Validation data length: {len(val_files)}")
print(f"Train data length: {len(train_files)}")


loader_factory = LoaderFactory(
    train_files=None,
    val_files=None,
    test_files=test_files,
    patch_size = [168, 168, 16],
    spacing = [4.07, 4.07, 3.00],
    # spacing = [1.92, 1.92, 3.27], # For fdg data
    spatial_size = (168, 168, 400)
    # spatial_size = (336, 336, 640) # for Fdg data
    # spatial_size = (168, 168, 600) # for Fdg data
    )

# train_loader = loader_factory.get_loader('train', batch_size=4, num_workers=2, shuffle=True)
# val_loader = loader_factory.get_loader('val', batch_size=1, num_workers=2, shuffle=False)
test_loader = loader_factory.get_loader('test', batch_size=1, num_workers=2, shuffle=False)

Test data length: 98
Validation data length: 0
Train data length: 0


In [5]:
hint1 = 'dl3_18'
hint1 = 'dl4_23'
pair_finder_dl3_18 = PairFinder(f'{ga_data_dir}/NAC', f'{ga_data_dir}/MAC', ga_output_dir, hint1)
_, center_pairs_dyn  = pair_finder_dl3_18.find_file_triples()


hint2 = 'dl_final'
hint2 = 'final_4_26'
pair_finder_adcm = PairFinder(f'{ga_data_dir}/NAC',f'{ga_data_dir}/MAC', ga_output_dir, hint2)
_, center_pairs_adcm  = pair_finder_adcm.find_file_triples()

In [29]:
def load_and_prepare_data(pairs):
    concatenated_pred = []
    concatenated_ref = []
    for pair in pairs:
        # Apply the mask and get the predicted image data
        _, masked_predicted_img, masked_reference_img = masked_SUV_img(
            pair['nac'], pair['predicted'], pair['reference'], nac_factor=2, mac_factor=5, mask_val=0.3)
        concatenated_pred.extend(masked_predicted_img.ravel())  # Flatten and collect data
        concatenated_ref.extend(masked_reference_img.ravel())
    return concatenated_pred, concatenated_ref

# Obtain paths from the dictionaries for each center
center = 'C1'
adcm_paths = center_pairs_adcm[center]
imcm_paths = center_pairs_dyn[center]

# Load and prepare the data for statistical testing
imcm_data, mac_data = load_and_prepare_data(imcm_paths)
adcm_data, _ = load_and_prepare_data(adcm_paths)



In [25]:
import numpy as np
from scipy.stats import mannwhitneyu
from quant import load_nifti_image

# Perform the Mann-Whitney U Test
stat, p_value = mannwhitneyu(adcm_data, imcm_data, alternative='two-sided')

print("U-statistic:", stat)
print("P-value:", p_value)

# Interpretation of the result
alpha = 0.05  # significance level
if p_value < alpha:
    print("Reject the null hypothesis - suggest the distribution of the two samples are significantly different.")
else:
    print("Do not reject the null hypothesis - suggest the distribution of the two samples are not significantly different.")


U-statistic: 187094849484524.5
P-value: 7.686508384218115e-23
Reject the null hypothesis - suggest the distribution of the two samples are significantly different.


In [31]:
stat, p_value = mannwhitneyu(mac_data, imcm_data, alternative='two-sided')

print("U-statistic:", stat)
print("P-value:", p_value)

alpha = 0.02  # significance level
if p_value < alpha:
    print("Reject the null hypothesis - suggest the distribution of the two samples are significantly different.")
else:
    print("Do not reject the null hypothesis - suggest the distribution of the two samples are not significantly different.")

U-statistic: 187998483815357.5
P-value: 1.2603516982102084e-58
Reject the null hypothesis - suggest the distribution of the two samples are significantly different.


In [None]:
stat, p_value = mannwhitneyu(mac_data, adcm_data, alternative='two-sided')

print("U-statistic:", stat)
print("P-value:", p_value)

alpha = 0.05  # significance level
if p_value < alpha:
    print("Reject the null hypothesis - suggest the distribution of the two samples are significantly different.")
else:
    print("Do not reject the null hypothesis - suggest the distribution of the two samples are not significantly different.")

------------------
# Step1: Mannwhitneyu

In [33]:
import pandas as pd


file_path = 'Results/combined_data.csv'
combined_data = pd.read_csv(file_path)

combined_data.head()

Unnamed: 0,Mean Error (SUV),Mean Absolure Error (SUV),Relative Error (SUV%),Absolure Relative Error (SUV%),Root Mean Squared Error,Peak Signal-to-Noise Ratio,Structual Similarity Index,Center,Dataset
0,-0.391218,1.948693,-3.45576,47.764912,7.159046,47.464313,0.857655,C1,ADCM
1,0.121809,1.248694,19.078541,48.54764,3.226209,39.599713,0.91165,C1,ADCM
2,0.593124,2.198104,17.919191,63.56306,6.74468,37.347102,0.837137,C2,ADCM
3,1.273347,2.40849,26.668924,63.049635,8.056123,35.262026,0.813911,C2,ADCM
4,2.883905,3.928055,37.376147,64.617671,20.673146,36.070237,0.899456,C3,ADCM


In [32]:
from scipy.stats import mannwhitneyu

# Compare IMCM and MAC
stat_imcm, p_value_imcm = mannwhitneyu(imcm_data, mac_data, alternative='two-sided')

# Compare ADCM and MAC
stat_adcm, p_value_adcm = mannwhitneyu(adcm_data, mac_data, alternative='two-sided')

print("IMCM vs MAC - U-statistic:", stat_imcm, "P-value:", p_value_imcm)
print("ADCM vs MAC - U-statistic:", stat_adcm, "P-value:", p_value_adcm)


IMCM vs MAC - U-statistic: 186875549033538.5 P-value: 1.2603516982102084e-58
ADCM vs MAC - U-statistic: 186529828166413.5 P-value: 5.639571034021006e-150


-------------
# Step 2: False Discovery Rate (FDR) Correction
We'll apply the Benjamini-Hochberg procedure to correct the p-values obtained from multiple comparisons to control the false discovery rate.

In [None]:
from statsmodels.stats.multitest import multipletests

# Combine p-values from both tests
p_values = [p_value_imcm, p_value_adcm]

# Apply Benjamini-Hochberg correction
_, p_adjusted, _, _ = multipletests(p_values, alpha=0.05, method='fdr_bh')

print("Adjusted P-values:", p_adjusted)


----------
# Step 3: Intraclass Correlation Coefficient (ICC)
To assess consistency, specifically the reproducibility between IMCM, ADCM, and MAC, we use ICC. Python's pingouin library provides a convenient function to compute ICC from a dataframe.

In [None]:
import pingouin as pg
import pandas as pd

# Prepare a DataFrame for ICC calculation
df = pd.DataFrame({
    'MAC': mac_data,
    'IMCM': imcm_data,
    'ADCM': adcm_data
})

# Calculate ICC
icc_results = pg.intraclass_corr(data=df, targets='MAC', raters=['IMCM', 'ADCM'], ratings='folded', model='twoway', type=1)
icc_value = icc_results.set_index('Type').at['ICC1', 'ICC']

print("ICC value:", icc_value)
print(icc_results[['ICC', 'CI95%']])


---------------
# Step 4: Classify Reproducibility Based on ICC
Based on the ICC value, classify the reproducibility:

In [None]:
def classify_icc(icc_val):
    if icc_val < 0.40:
        return 'Poor reproducibility'
    elif 0.40 <= icc_val < 0.59:
        return 'Fair reproducibility'
    elif 0.60 <= icc_val < 0.74:
        return 'Good reproducibility'
    else:
        return 'Excellent reproducibility'

reproducibility = classify_icc(icc_value)
print("Reproducibility:", reproducibility)
