In [3]:
!pip install pydicom

Collecting pydicom
  Downloading pydicom-2.3.1-py3-none-any.whl (2.0 MB)
     |████████████████████████████████| 2.0 MB 9.1 MB/s            
[?25hInstalling collected packages: pydicom
Successfully installed pydicom-2.3.1


In [4]:
import numpy as np
import os
import pydicom
from pydicom.dataset import Dataset, FileMetaDataset
from pydicom.uid import ExplicitVRLittleEndian
from datetime import datetime

def create_synthetic_dicom_dataset(num_samples=10, oct_shape=(64, 64, 64), fundus_shape=(256, 256), save_dir='synthetic_dicom_dataset'):
    oct_dir = os.path.join(save_dir, 'OCT')
    fundus_dir = os.path.join(save_dir, 'Fundus')
    os.makedirs(oct_dir, exist_ok=True)
    os.makedirs(fundus_dir, exist_ok=True)

    for i in range(num_samples):
        oct_image = np.random.rand(*oct_shape) * 255
        oct_image = oct_image.astype(np.uint8)
        for j in range(oct_shape[0]):
            save_image_as_dicom(oct_image[j], os.path.join(oct_dir, f'oct_{i}_slice_{j}.dcm'), 'OCT')

        fundus_image = np.random.rand(*fundus_shape) * 255
        fundus_image = fundus_image.astype(np.uint8)
        save_image_as_dicom(fundus_image, os.path.join(fundus_dir, f'fundus_{i}.dcm'), 'Fundus')

def save_image_as_dicom(image, file_path, modality):
    file_meta = FileMetaDataset()
    file_meta.MediaStorageSOPClassUID = pydicom.uid.generate_uid()
    file_meta.MediaStorageSOPInstanceUID = pydicom.uid.generate_uid()
    file_meta.TransferSyntaxUID = ExplicitVRLittleEndian

    ds = Dataset()
    ds.file_meta = file_meta
    ds.is_little_endian = True
    ds.is_implicit_VR = False

    dt = datetime.now()
    ds.ContentDate = dt.strftime('%Y%m%d')
    ds.ContentTime = dt.strftime('%H%M%S.%f')

    ds.PatientName = "SyntheticData"
    ds.PatientID = "123456"
    ds.Modality = modality
    ds.SeriesInstanceUID = pydicom.uid.generate_uid()
    ds.StudyInstanceUID = pydicom.uid.generate_uid()
    ds.ImageType = ["ORIGINAL", "PRIMARY"]

    ds.PixelData = image.tobytes()
    ds.Rows, ds.Columns = image.shape
    ds.SamplesPerPixel = 1
    ds.PhotometricInterpretation = "MONOCHROME2"
    ds.BitsAllocated = 8
    ds.BitsStored = 8
    ds.HighBit = 7
    ds.PixelRepresentation = 0

    ds.save_as(file_path)

create_synthetic_dicom_dataset()




In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("/projects/COMPXR/pranay/Eyes/Datasets/image_data_experiment_512_final.csv")

In [7]:
df

Unnamed: 0,Original Path,Class,Type,New Filename,Unnamed: 4,Unnamed: 5,Disease,Real Y/N
0,/projects/COMPXR/pranay/Eyes/Datasets/Diff_Gen...,DME,Diffusion,334773500395255107.png,334773500395255107,,CNV,Y
1,/projects/COMPXR/pranay/Eyes/Datasets/OCT/zipp...,DME,Original,367547604713575248.png,367547604713575248,,DME,Y
2,/projects/COMPXR/pranay/Eyes/Datasets/Diff_Gen...,DRUSEN,Diffusion,780657238357548120.png,780657238357548120,,DRUSEN,Y
3,/projects/COMPXR/pranay/Eyes/Datasets/OCT/zipp...,DME,Original,1015057264465140849.png,1015057264465140849,,CNV,N
4,/projects/COMPXR/pranay/Eyes/Datasets/Diff_Gen...,DRUSEN,Diffusion,1024381007564714710.png,1024381007564714710,,DRUSEN,N
...,...,...,...,...,...,...,...,...
75,/projects/COMPXR/pranay/Eyes/Datasets/OCT/zipp...,DRUSEN,Original,16777956987204134626.png,16777956987204134626,,DRUSEN,Y
76,/projects/COMPXR/pranay/Eyes/Datasets/Diff_Gen...,DME,Diffusion,17162727318726466690.png,17162727318726466690,,DME,N
77,/projects/COMPXR/pranay/Eyes/Datasets/Diff_Gen...,NORMAL,Diffusion,17744808273742457831.png,17744808273742457831,,NORMAL,N
78,/projects/COMPXR/pranay/Eyes/Datasets/Diff_Gen...,CNV,Diffusion,18253888542873269655.png,18253888542873269655,,CNV,Y


In [11]:


# Assuming 'df' is your DataFrame
# Step 1: Drop rows where either 'Class', 'Disease', or 'Real' is NaN
df_cleaned = df.dropna(subset=['Class', 'Disease', 'Real  Y/N'])

# Step 2: Group by 'Type' and calculate accuracies
for type in df_cleaned['Type'].unique():
    type_df = df_cleaned[df_cleaned['Type'] == type]

    # Accuracy for 'Class' vs 'Disease'
    class_disease_accuracy = type_df.apply(lambda row: row['Class'].lower() in row['Disease'].lower(), axis=1).mean()
    print(f"Accuracy of Disease Detection: {type}: {class_disease_accuracy * 100}%")

    # Accuracy for 'Real' Y/N
    if type in ['Diffusion']:
        correct_label = 'N'
    else:  # Assuming 'Original' should be 'Y'
        correct_label = 'Y'
    real_accuracy = (type_df['Real  Y/N'].str.strip() == correct_label).mean()
    print(f"Accuracy for Synthetic Detection: {type}: {real_accuracy * 100}%")



Accuracy of Disease Detection: Diffusion: 95.0%
Accuracy for Synthetic Detection: Diffusion: 65.0%
Accuracy of Disease Detection: Original: 87.5%
Accuracy for Synthetic Detection: Original: 40.0%


In [5]:

# Assuming 'df' is your DataFrame
# Step 1: Drop rows where 'Class', 'Disease', or 'Real' is NaN

# Assuming 'df' is your DataFrame
# Step 1: Drop rows where 'Class', 'Disease', or 'Real Y/N' is NaN
df_cleaned = df.dropna(subset=['Class', 'Disease', 'Real  Y/N'])

# Step 2: Group by 'Type' and then by each 'Class' within each 'Type'
for type in df_cleaned['Type'].unique():
    type_df = df_cleaned[df_cleaned['Type'] == type]
    print(f"\nType: {type}")

    for class_name in type_df['Class'].unique():
        class_df = type_df[type_df['Class'] == class_name]

        # Modify here: Check if 'Disease' contains 'Class' (case-insensitive)
        class_disease_accuracy = class_df.apply(lambda row: class_name.lower() in row['Disease'].lower(), axis=1).mean()
        print(f"  Accuracy of Class '{class_name}' vs Disease: {class_disease_accuracy * 100}%")

    # Accuracy for 'Real' Y/N within each type
        if type in ['Diffusion']:
            correct_label = 'N'
        else:  # Assuming 'Original' should be 'Y'
            correct_label = 'Y'
        real_accuracy = (class_df['Real  Y/N'] == correct_label).mean()
        print(f"  Accuracy for Real Y/N for '{class_name}' for Type {type}: {real_accuracy * 100}%")



Type: Diffusion
  Accuracy of Class 'DME' vs Disease: 90.0%
  Accuracy for Real Y/N for 'DME' for Type Diffusion: 50.0%
  Accuracy of Class 'DRUSEN' vs Disease: 100.0%
  Accuracy for Real Y/N for 'DRUSEN' for Type Diffusion: 80.0%
  Accuracy of Class 'CNV' vs Disease: 90.0%
  Accuracy for Real Y/N for 'CNV' for Type Diffusion: 80.0%
  Accuracy of Class 'NORMAL' vs Disease: 100.0%
  Accuracy for Real Y/N for 'NORMAL' for Type Diffusion: 30.0%

Type: Original
  Accuracy of Class 'DME' vs Disease: 90.0%
  Accuracy for Real Y/N for 'DME' for Type Original: 40.0%
  Accuracy of Class 'DRUSEN' vs Disease: 90.0%
  Accuracy for Real Y/N for 'DRUSEN' for Type Original: 50.0%
  Accuracy of Class 'NORMAL' vs Disease: 100.0%
  Accuracy for Real Y/N for 'NORMAL' for Type Original: 20.0%
  Accuracy of Class 'CNV' vs Disease: 70.0%
  Accuracy for Real Y/N for 'CNV' for Type Original: 50.0%


In [None]:
def custom_match(row):
    if row['column1'] == 'ORIGINAL' and row['column2'] == 'Y':
        return True
    elif row['column1'] in ['Stylegan3', 'diffusion'] and row['column2'] == 'N':
        return True
    else:
        return False