In [1]:
import pandas as pd
import re
import os

In [None]:
csv_path = r'/home/omen/Documents/VS_data/vs_paths.csv'
data_dir = r'\home\omen\Documents\VS_data'
df = pd.read_csv(csv_path)

In [3]:
weight_folders = list(df['image_path'])
weight_folders = [os.path.join(data_dir,p) for p in weight_folders]
weight_folders = [p.replace("\\", "/") for p in weight_folders]
len(weight_folders)

301

In [4]:
weight_folders[6]

'D:/VSdata/Vestibular-Schwannoma-MC-RC/VS-SEG-013/12-23-1991-NA-t1of10External Images for PACS-95314/2.000000-T2T2 AXIAL CISS-10876'

In [5]:
seg_paths = list(df['SegmentationPath'])
seg_paths = [os.path.join(data_dir,p) for p in seg_paths]
seg_paths = [p.replace("\\", "/") for p in seg_paths]
len(seg_paths)

301

In [6]:
def extract_patient_name(path):
    pattern = r"/(?P<patient_name>(?:VS-(?:SEG|MC-RC)-)?\d+)/(?P<date_of_mri>\d{2}-\d{2}-\d{4})"
    match = re.search(pattern, path)
    if match:
        return match.group(1)
    return None

def extract_patient_year(path):
  pattern = r"/(?P<patient_name>(?:VS-(?:SEG|MC-RC)-)?\d+)/(?P<date_of_mri>\d{2}-\d{2}-\d{4})"
  match = re.search(pattern, path)
  if match:
    return match.group(2)
  return None

In [7]:
x = []

for i in range(len(weight_folders)):
# for i in range(6):

    patient_id = extract_patient_name(weight_folders[i])
    if patient_id is None:
        print(f"Warning: Could not extract patient ID from {weight_folders[i]}. Skipping.")
        continue
    
    patient_year = extract_patient_year(weight_folders[i])
    if patient_year is None:
        print(f"Warning: Could not extract patient year from {weight_folders[i]}. Skipping.")
        continue

    # try:
    #     patient_year = int(patient_year[-4:])
    # except ValueError:
    #     print(f"Warning: Invalid patient year format: {patient_year}. Skipping.")
    #     continue
    
    x.append(f"{patient_id}_{patient_year}")

In [8]:
# x
from datetime import datetime
from collections import defaultdict

# Step 1: Organize by ID
id_to_dates = defaultdict(list)
for item in x:
    id_part, date_part = item.split('_')
    date_obj = datetime.strptime(date_part, "%m-%d-%Y")
    id_to_dates[id_part].append((date_obj, item))
# print(id_to_dates)

# Step 2: Select earliest date for each ID
unique_patient = [min(dates)[1] for dates in id_to_dates.values()]

len(unique_patient)


124

In [None]:
# rf_csv = r"C:\Users\Acer\Desktop\3dVS\RFcsv\volume_data7.csv"
# rf_data = pd.read_csv(rf_csv, index_col=0)
# rf_data

In [None]:
import os
os.chdir('/home/omen/Documents/Nafisha/VS/VS_refactored_code/newDynUnet')

In [None]:
from utils.utils import load_spacing, resample_pair, resample_segmentation_to_image
from utils.transformation import transform
from model import DynUNet

In [11]:
import SimpleITK as sitk
import numpy as np
reader = sitk.ImageSeriesReader()
# from utils.utils import load_spacing, resample_pair

In [14]:
import matplotlib.pyplot as plt

def plot_image_and_mask(image_data, mask_data, slice_num: int = None, cmap='gray'):

    if slice_num is None:
        slice_num = image_data.shape[0] // 2

    fig, axs = plt.subplots(1, 3, figsize=(15, 5))

    axs[0].imshow(image_data[slice_num,:, :], cmap=cmap)
    axs[0].set_title(f"Image Slice {slice_num}")
    axs[0].axis('off')

    axs[1].imshow(mask_data[slice_num,:, :], cmap=cmap)
    axs[1].set_title(f"Mask Slice {slice_num}")
    axs[1].axis('off')

    axs[2].imshow(image_data[slice_num,:, :], cmap=cmap)
    axs[2].imshow(mask_data[slice_num,:, :], cmap='Reds', alpha=0.4)
    axs[2].set_title(f"Overlay Slice {slice_num}")
    axs[2].axis('off')

    plt.tight_layout()
    plt.show()

In [None]:
import torch

model= DynUNet(spatial_dims=3,
    in_channels=1,
    out_channels=1,
    kernel_size=[3, 3, 3, 3, 3, 3],
    strides=[1, 2, 2, 2, 2, 2],
    upsample_kernel_size=[2, 2, 2, 2, 2],
    res_block=True,
).to('cuda')

checkpointPath = "/home/omen/Documents/Nafisha/VS/DynUnetCheckpoints/version10.pth"
model.load_state_dict(torch.load(checkpointPath))

In [None]:
count = 0

learnt_feature_dict = {}
for i in range(len(weight_folders)):
    patient_id = extract_patient_name(weight_folders[i])
    study_date = extract_patient_year(weight_folders[i])
    id_date = f'{patient_id}_{study_date}'

    if id_date in unique_patient:
        # print(id_date)

        dicom_series = reader.GetGDCMSeriesFileNames(weight_folders[i])
        reader.SetFileNames(dicom_series)
        reference_image = reader.Execute()

        segmentation_image = sitk.ReadImage(seg_paths[i])
        segmentation_resampled = resample_segmentation_to_image(segmentation_image, reference_image)

        mask_data = sitk.GetArrayFromImage(segmentation_resampled).astype(np.uint8)
        image_data = sitk.GetArrayFromImage(reference_image).astype(np.float32) 

        spacing = load_spacing(reference_image)
        image_data, mask_data = resample_pair(image_data, mask_data, spacing)

        data_dict = {
            "image": image_data[np.newaxis, ...],  # (1, D, H, W)
            "mask": mask_data[np.newaxis, ...]     # (1, D, H, W)
        }

        transformed = transform(data_dict)

        transformed_image = transformed["image"].unsqueeze(0).to('cuda')
        transformed_mask = transformed["mask"].unsqueeze(0)
        # print(transformed_image.shape, transformed_mask.shape)

        with torch.no_grad():
            encoder_outputs, _ = model(transformed_image)
        learnt_feature = encoder_outputs[-1].squeeze().flatten()

        learnt_feature_dict[patient_id] = learnt_feature
        # print(learnt_feature.shape)
        # break
    if i%10==0:
        print(i)


torch.Size([262144])


In [38]:
learnt_feature_dict

{'VS-SEG-003': metatensor([-0.0044, -0.0047, -0.0046,  ...,  0.1734, -0.0026,  0.8034],
        grad_fn=<AliasBackward0>),
 'VS-SEG-010': metatensor([-0.0044, -0.0047, -0.0046,  ...,  0.1734, -0.0026,  0.8034],
        grad_fn=<AliasBackward0>),
 'VS-SEG-013': metatensor([-0.0044, -0.0047, -0.0046,  ...,  0.1734, -0.0026,  0.8034],
        grad_fn=<AliasBackward0>),
 'VS-SEG-021': metatensor([-0.0044, -0.0047, -0.0046,  ...,  0.1734, -0.0026,  0.8034],
        grad_fn=<AliasBackward0>),
 'VS-SEG-025': metatensor([-0.0044, -0.0047, -0.0046,  ...,  0.1734, -0.0026,  0.8034],
        grad_fn=<AliasBackward0>),
 'VS-SEG-028': metatensor([-0.0044, -0.0047, -0.0046,  ...,  0.1734, -0.0026,  0.8034],
        grad_fn=<AliasBackward0>),
 'VS-SEG-030': metatensor([-0.0044, -0.0047, -0.0046,  ...,  0.1734, -0.0026,  0.8034],
        grad_fn=<AliasBackward0>),
 'VS-SEG-031': metatensor([-0.0044, -0.0047, -0.0046,  ...,  0.1734, -0.0026,  0.8034],
        grad_fn=<AliasBackward0>),
 'VS-SEG-032': m

In [None]:
deep_features = torch.stack(list(learnt_feature_dict.values())).detach().cpu().numpy()
df = pd.DataFrame(deep_features, index=list(learnt_feature_dict.keys()))

In [None]:
import pandas as pd
volume_data = pd.read_csv('/home/omen/Documents/Nafisha/VS/VS_refactored_code/RandomForest/CSVs/RFselected.csv', index_col=0)
len(volume_data)

(124, 50)

In [None]:
merged_df = pd.concat([volume_data, df], axis=1)

Unnamed: 0_level_0,volume_diff,change,original_shape_MajorAxisLength,original_shape_Maximum2DDiameterRow,original_shape_Maximum3DDiameter,original_glcm_Imc1,original_glcm_MCC,original_gldm_DependenceNonUniformity,original_glrlm_GrayLevelNonUniformity,original_glrlm_RunLengthNonUniformity,original_glszm_LargeAreaHighGrayLevelEmphasis,original_ngtdm_Coarseness
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
VS-MC-RC-001,-51.0,0,11.098302,10.667506,11.824483,-0.055402,0.324320,53.124629,222.335594,281.082912,1.741270e+04,0.005272
VS-MC-RC-002,1069.0,1,12.050322,8.669793,14.842891,-0.147962,0.454266,380.436242,41.646779,1070.016494,2.363640e+03,0.005885
VS-MC-RC-004,-13391.0,0,32.442076,33.636311,38.487053,-0.402398,0.941602,16772.235332,18344.919598,94705.574083,3.764821e+08,0.000073
VS-MC-RC-005,2256.0,1,15.793601,15.119270,21.244994,-0.084973,0.817496,3718.285922,397.743860,11970.271692,1.321722e+04,0.000656
VS-MC-RC-006,205.0,1,21.052694,14.840822,24.568533,-0.100471,0.581540,815.913548,976.411879,4895.986528,5.086712e+05,0.000963
...,...,...,...,...,...,...,...,...,...,...,...,...
VS-SEG-195,,0,11.024049,6.088697,11.833204,-0.665496,0.974734,108.379310,3.299489,167.773191,5.449440e+03,0.023994
VS-SEG-197,1251.0,1,15.134468,13.550974,19.078826,-0.083603,0.490837,296.263580,136.934265,1598.800575,1.087350e+05,0.004280
VS-SEG-199,1102.0,1,20.328248,20.264196,30.923181,-0.092100,0.519013,4153.128225,6676.043156,23640.329189,2.321828e+06,0.000146
VS-SEG-218,1870.0,1,24.390526,27.083334,31.787093,-0.238635,0.774154,3271.633887,8545.320466,11915.651285,8.674691e+06,0.000149


In [None]:
dest_path = r'/home/omen/Documents/Nafisha/VS/VS_refactored_code/RandomForest/CSVs/DFcsv.csv'
merged_df.to_csv(dest_path)