In [None]:
# CT_EXCEL_FILE = '/mnt/fast-data/mjc/AutoRECIST/Inputs/PDS_AMGEN_20020408_CIA-LAB_Image_And_Contour_2022-01-10.xlsx'
# CT_EXCEL_FILE = '/mnt/fast-data/mjc/AutoRECIST/Inputs/PDS_AUTO_RECIST CIA-LAB Image and Contour 2020-10-01.xlsx'
CT_EXCEL_FILE ='/mnt/fast-data/mjc/AutoRECIST/Inputs/PDS New 1033 CUIMC cases 2021-12-22.xlsx'
SAVE_PATH = '/mnt/fast-disk1/refine_gt/'

import pandas as pd
df_all = pd.read_excel(CT_EXCEL_FILE)

print(df_all)



In [None]:
df = df_all # a liver lesions case

df = df.drop_duplicates(subset=['Image File Path'], keep='first')
print(df)

In [None]:
# coding: utf-8



from __future__ import division
from __future__ import print_function

# In[110]:

import numpy as np
import pandas as pd
import pydicom
import os
import matplotlib.pyplot as plt
import collections
# from tqdm import tqdm_notebook as tqdm
from datetime import datetime

from math import ceil, floor
import cv2
import sys
# from sklearn.model_selection import ShuffleSplit

def window_image(img, window_center,window_width, intercept, slope):
    
#     window_center,window_width = 50 ,100
    img = (img*slope +intercept)
    img_min = window_center - window_width//2
    img_max = window_center + window_width//2
    img[img<img_min] = img_min
    img[img>img_max] = img_max
    return img 


def get_first_of_dicom_field_as_int(x):
    #get x[0] as in int is x is a 'pydicom.multival.MultiValue', otherwise get int(x)
    if type(x) == pydicom.multival.MultiValue:
        return int(x[0])
    else:
        return int(x)

def get_windowing(data):
    dicom_fields = [data[('0028','1050')].value, #window center
                    data[('0028','1051')].value, #window width
                    data[('0028','1052')].value, #intercept
                    data[('0028','1053')].value] #slope
    return [get_first_of_dicom_field_as_int(x) for x in dicom_fields]

def _normalize(img):
    if img.max() == img.min():
        return np.zeros(img.shape)-1
    return 2 * (img - img.min())/(img.max() - img.min()) - 1

def normalize_minmax(img):
    mi, ma = img.min(), img.max()
    if mi == ma:
        return np.zeros(img.shape)-1
    return 2*(img - mi) / (ma - mi) - 1

def getName(s):
    ix1 = s.rfind('/')
    ix2 = s.rfind('.')
    return s[ix1:ix2]


def _read(path, desired_size = (512,512)):
    """Will be used in DataGenerator"""

    try:
        data = pydicom.read_file(path)
        image = data.pixel_array
        window_center , window_width, intercept, slope = get_windowing(data)
        
        image_windowed = window_image(image, window_center, window_width, intercept, slope)
        img = normalize_minmax(image_windowed)

    except:
        img = np.zeros(desired_size[:2])-1
    
    if img.shape[:2] != desired_size[:2]:
        print("image shape is not desired size. Interpolation is done.")
        img = cv2.resize(img, desired_size[:2], interpolation=cv2.INTER_LINEAR)
    
    
    return img



import os
import numpy as np
import pydicom

D_dir2header_df = {}


def get_dicom_header_df(image_dir , labels = []):
    global D_dir2header_df
    if image_dir in D_dir2header_df:
        return D_dir2header_df[image_dir]

    # image_dir = row['Image File Path']


    labels = ['ImageName','InstanceNumber',
            'BitsAllocated', 'BitsStored', 'Columns', 'HighBit', 
            'ImageOrientationPatient_0', 'ImageOrientationPatient_1', 'ImageOrientationPatient_2',
            'ImageOrientationPatient_3', 'ImageOrientationPatient_4', 'ImageOrientationPatient_5',
            'ImagePositionPatient_0', 'ImagePositionPatient_1', 'ImagePositionPatient_2',
            'Modality', 'PatientID', 'PhotometricInterpretation', 'PixelRepresentation',
            'PixelSpacing_0', 'PixelSpacing_1', 'RescaleIntercept', 'RescaleSlope', 'Rows', 'SOPInstanceUID',
            'SamplesPerPixel', 'SeriesInstanceUID', 'StudyID', 'StudyInstanceUID', 
            'WindowCenter', 'WindowWidth', 
        ] if not labels else labels

    data = {l: [] for l in labels}
    
    ctList = os.listdir(image_dir)
    ctList.sort()

    for image in ctList:
        if '.dcm' not in image:
            continue
        if os.path.getsize(os.path.join(image_dir, image)) < 5*1024:
            print('%s size < 5kb skiped!'%os.path.join(image_dir, image) )
            continue
        data["ImageName"].append(image)

        ds = pydicom.dcmread(os.path.join(image_dir, image))
        for metadata in ds.dir():
            if metadata not in data and metadata not in ['ImageOrientationPatient','ImagePositionPatient','PixelSpacing']:
                continue
            if metadata != "PixelData":
                metadata_values = getattr(ds, metadata)
                if type(metadata_values) == pydicom.multival.MultiValue and metadata not in ["WindowCenter", "WindowWidth"]:
                    for i, v in enumerate(metadata_values):
                        data[f"{metadata}_{i}"].append(v)  
                else:
                    if type(metadata_values) == pydicom.multival.MultiValue and metadata in ["WindowCenter", "WindowWidth"]:
                        data[metadata].append(metadata_values[0])
                    else:
                        data[metadata].append(metadata_values)

    df_image = pd.DataFrame(data).set_index("InstanceNumber")
    D_dir2header_df[image_dir] = df_image
    return df_image


# In[2]:

def InstanceNumber2file_name(df_image, num):
    return df_image.loc[num,'ImageName']

def InstanceNumber2data_element(df_image, num, label):
    return df_image.loc[num , label]

    
def get_SliceThickness(df_image):
    flag = False
    L = df_image['ImagePositionPatient_2'].tolist()
    thick = list( np.diff(L) )
    res = float( max(set(thick), key=thick.count) )
    res = -res if res < 0 else res
    
    L.sort()
    thick2 = list( np.diff(L) )
    res2 = float( max(set(thick2), key=thick2.count) )
    if res2 ==0 and res==0:
        result = 0
        flag = True
        print('Warning intv is 0')
        print(df_image['ImagePositionPatient_2'])
    if res2 == res:
        result = res
    else:
        result = res
        flag = True
        print('Warning intv may wrong',res,res2)
        print(df_image['ImagePositionPatient_2'])
    
    return result 

def InstanceNumber2windows_min_max(df_image,num):
    try:     
        WL = InstanceNumber2data_element(df_image, num, 'WindowCenter')
        WW = InstanceNumber2data_element(df_image, num, 'WindowWidth')
    except:
        print("Warning! Window Center or Width is empty! Now use default values")
        WL , WW = 250 , 1500
        
    minHU = int( WL-WW/2 )
    maxHU = minHU + int(WW)
    return [minHU , maxHU]


class ASerial:
    P=-1
    D=-1
    S=-1
    name = ''
    def __init__(self, path_str):
        self.path = path_str
        self.getP()
        self.getD()
        self.getS()
        self.convert_path()
        
    def getP(self, target = 'DeepLesion_', L=6):
        ix = self.path.rfind(target) + len(target)
        ss = self.path[ix:ix+L]
        self.P = int(ss)
        
    def getD(self, target = '/D', L=6):
        ix = self.path.rfind(target) + len(target)
        ss = self.path[ix:ix+L]
        self.D = int(ss)
        
    def getS(self, target = '/S', L=6):
        ix = self.path.rfind(target) + len(target)
        ss = self.path[ix:ix+L]
        self.S = int(ss)
        
    def convert_path(self):
        self.name = '%06d_%02d_%02d'%(self.P, self.D, self.S)




import os
import cv2
import json, yaml
import numpy as np
from PIL import Image
# from collections import OrderedDict
from pycocotools import mask as cocomask
from pycocotools import coco as cocoapi



def replacer(s, newstring, index, nofail=False):
    # raise an error if index is outside of the string
    if not nofail and index not in range(len(s)):
        raise ValueError("index outside given string")

    # if not erroring, but the index is still not in the correct range..
    if index < 0:  # add it to the beginning
        return newstring + s
    if index > len(s):  # add it to the end
        return s + newstring

    # insert the new string between "slices" of the original
    return s[:index] + newstring + s[index + 1:]

def convert_file_name(name,S='/'):
    ix = name.rfind('_')
    return replacer(name,S,ix)

def file_name2id(name):
    name.replace('.png','')
    name.replace('_','')
    return int('1' + name)
    
def get_image_size( s ):
    num = list( map( int , s.split(',')))
    return num[0] , num[1]

def get_spacing( s ):
    num = list( map( float , s.split(',')))
    return num[0] , num[1] , num[2]


def get_z_position( df ):
    s = df.loc['Normalized_lesion_location']
    num = list( map( float , s.split(',')))
    return num[2]
    
def get_slice_no( df ):
    s = df.loc['Key_slice_index']
    return int(s)

def get_windows( df ):
    s = df.loc[ 'DICOM_windows']
    num = list( map( float , s.split(',')))
    return num


def get_segmentation():
    return []

def get_bbox( df ):
    s = df.loc['Bounding_boxes']
    num = list( map( float , s.split(',')))
    num[2] = num[2]-num[0]
    num[3] = num[3]-num[1]
    return num 

def get_noise( df ):
    s = df.loc['Possibly_noisy']
    num = int(s)
    return num

def get_area( df ):
    s = df.loc['Lesion_diameters_Pixel_']
    num = list( map( float , s.split(',')))
    return num[0]*num[1]
    



newcats = [{'supercategory': 'DeepLesion', 'id': 1, 'name': 'abdomen'},
           {'supercategory': 'DeepLN', 'id': 2, 'name': 'abdomen LN'},
           {'supercategory': 'DeepLesion', 'id': 3, 'name': 'adrenal'},
           {'supercategory': 'DeepLN', 'id': 4, 'name': 'axillary LN'},
           {'supercategory': 'DeepLesion', 'id': 5, 'name': 'bone'},
           {'supercategory': 'DeepLN', 'id': 6, 'name': 'inguinal LN'},
           {'supercategory': 'DeepLesion', 'id': 7, 'name': 'kidney'},
           {'supercategory': 'DeepLesion', 'id': 8, 'name': 'liver'},
           {'supercategory': 'DeepLesion', 'id': 9, 'name': 'lung'},
           {'supercategory': 'DeepLN', 'id': 10, 'name': 'mediastinum LN'},
           {'supercategory': 'DeepLN', 'id': 11, 'name': 'neck LN'},
           {'supercategory': 'DeepLesion', 'id': 12, 'name': 'ovary'},
           {'supercategory': 'DeepLesion', 'id': 13, 'name': 'pancreas'},
           {'supercategory': 'DeepLN', 'id': 14, 'name': 'pelvic LN'},
           {'supercategory': 'DeepLesion', 'id': 15, 'name': 'pelvis'},
           {'supercategory': 'DeepLesion', 'id': 16, 'name': 'pleural'},
           {'supercategory': 'DeepLN', 'id': 17, 'name': 'retroperitoneal LN'},
           {'supercategory': 'DeepLesion', 'id': 18, 'name': 'soft tissue'},
           {'supercategory': 'DeepLesion', 'id': 19, 'name': 'spleen'},
           {'supercategory': 'DeepLesion', 'id': 20, 'name': 'stomach'},
           {'supercategory': 'DeepLesion', 'id': 21, 'name': 'thyroid'} ]

def get_21_lesion_location_cls():
    D_cls = {}
    for d in newcats:
        id_ = d['id']
        name = d['name']
        D_cls[name] = id_
    return D_cls

D_cls = get_21_lesion_location_cls()

def get_category_id( location , Dict ):
    return Dict[location]


# In[4]:

def replace_png_path(s):
    cs = s.replace('AutoRecist/Inputs' , 'AutoRecist/Pngs')
    return cs

In [None]:
# In[5]:

import os
import cv2
import json, yaml
import numpy as np
from PIL import Image
from collections import OrderedDict
from pycocotools import mask as cocomask
from pycocotools import coco as cocoapi


class DeepLesion():
    """
        DL class to convert annotations to COCO Json format
    """
    def __init__(self, df,image_id_start=0,annotation_id_start=0, savename='a.json'):
        self.image_id_start = image_id_start
        self.annotation_id_start = annotation_id_start
        self.df = df 
        self.info = {"year" : 2021,
                     "version" : "2.0",
                     "description" : "Covert Weasis to Json format",
                     "contributor" : "HY,JM,BZ,LS,FSA",
                     "url" : "http:// /",
                     "date_created" : "20211129"
                    }
        self.licenses = [{"id": 1,
                          "name": "Attribution-NonCommercial",
                          "url": "http:// /"
                         }]

        self.categories = newcats
        
        self.images, self.annotations = self.__get_image_annotation_pairs__(self.df)
        json_data = {"info" : self.info,
                     "images" : self.images,
                     "licenses" : self.licenses,
                     "annotations" : self.annotations,
                     "categories" : self.categories}

        with open(savename, "w") as jsonfile:
            json.dump(json_data, jsonfile, sort_keys=True, indent=4)
            
    def change_df(self , df , savename = 'temp.json'):
        self.df = df 

        self.images, self.annotations = self.__get_image_annotation_pairs__(self.df)
        json_data = {"info" : self.info,
                     "images" : self.images,
                     "licenses" : self.licenses,
                     "annotations" : self.annotations,
                     "categories" : self.categories}

        with open(savename, "w") as jsonfile:
            json.dump(json_data, jsonfile, sort_keys=True, indent=4)
            print( 'Saved %s'%savename )
        
            
    def __get_image_annotation_pairs__(self,df):
        images = []
        annotations = []
        self.file_name_dict = {}
        for i , row in df.iterrows():
            try:
                print(i)
                df_image = get_dicom_header_df( row['Image File Path'] )
                png_folder = replace_png_path(row['Image File Path'] )
                
                for one in df_image.index.values.tolist():
#                     file_name = InstanceNumber2file_name(df_image, one)
#                     file_name = os.path.join( row['Image File Path'] , file_name)
                    file_name = os.path.join(png_folder, '%03d.png'%one)
                    file_name = file_name.replace('/mnt/fast-disk1/mjc/AutoRecist/','')

                    if file_name in self.file_name_dict:
                        oneimageid = self.file_name_dict[file_name]
                    else:
                        oneimage = {}
                        oneimage['file_name'] = file_name
                        self.image_id_start += 1
                        oneimageid = self.image_id_start
                        oneimage['id'] = oneimageid

                        oneimage['height'] , oneimage['width'] = int(InstanceNumber2data_element(df_image,one,'Rows')), int( InstanceNumber2data_element(df_image,one,'Columns') )

                        oneimage['slice_no'] = int(one)
                        oneimage['spacing'] = float( InstanceNumber2data_element(df_image,one,'PixelSpacing_0') )
                        oneimage['slice_intv'] = float( get_SliceThickness(df_image) )
                        oneimage['z_position'] = 0.5
                        oneimage['windows'] = InstanceNumber2windows_min_max(df_image,one)

                        images.append(oneimage)
                        self.file_name_dict[file_name] = oneimageid


            except Exception as e: print(e)
        
        return images, annotations
            
    

In [None]:
def pd_str_replace(df , col, ori, new):
    if isinstance(col , str):
        df[col] = df[col].str.replace(ori,new, case = False) 
    elif isinstance(col, list):
        for one in col:
            pd_str_replace(df , one, ori, new)
    else:
        raise('col instance should be str or list')

pd_str_replace(df, ['Image File Path' ], "X:" , "/mnt/X-drive")
pd_str_replace(df, ['Image File Path' ], r"\\" , "/")
pd_str_replace(df, ['Image File Path'], "/mnt/X-drive/ClinicalTrials/FNIH_VOLPACK", "/mnt/fast-disk1/mjc/AutoRecist/Inputs")
pd_str_replace(df, ['Image File Path'], "/mnt/X-drive/ClinicalTrialDone/FNIH_VOLPACK", "/mnt/fast-disk1/mjc/AutoRecist/Inputs")
pd_str_replace(df, ['Image File Path'], "/mnt/X-drive/ClinicalTrials", "/mnt/fast-disk1/mjc/AutoRecist/Inputs")


print('Initial Image Process')
dataset = DeepLesion(df,savename='/mnt/fast-data/mjc/AutoRECIST/Annotations/inference.json')
print('Image Process is Done')
print('Total of {} slice images was Processed.'.format(len(dataset.images)))

In [None]:
## This cell only use for CUIMC 1033 dataset because some dicom file reading issues.

# targets = [
#     'COU-AA-302_21460599/D2011_04_29/E5244/CT/S0002_4122',
# ]


targets = ['METNET0652/D2019_02_07/E5753/CT/S0002_2664',
'METNET3195/D2015_06_30/E6772/CT/S0007_6775',
'METNET3196/D2015_05_28/E0676/CT/S0006_0743',
'METNET3199/D2015_08_27/E8663/CT/S0007_8666',
'METNET3200/D2015_08_05/E0012/CT/S0009_0015',
'METNET3201/D2015_09_01/E0653/CT/S0007_0656',
'METNET2933/D2015_06_16/E7835/CT/S0004_7836',
'METNET2934/D2015_04_24/E0185/CT/S0007_0188',
'METNET2935/D2015_03_12/E1888/CT/S0007_2033',
'METNET2936/D2015_08_20/E6259/CT/S0009_6823',
'METNET2937/D2015_02_09/E2120/CT/S0010_2277',
'METNET2938/D2015_05_21/E0892/CT/S0004_0895',
'METNET2939/D2015_02_17/E0844/CT/S0004_0847',
'METNET5124/D2014_09_16/E9455/CT/S0003_9458',
'METNET5126/D2014_07_25/E5869/CT/S0003_5872',
'METNET5127/D2014_04_09/E6205/CT/S0005_6545',
'METNET5622/D2013_10_04/E2982/CT/S0010_2983',
'METNET5624/D2013_09_10/E0668/CT/S0008_0671',
'METNET5625/D2013_04_08/E7573/CT/S0003_7576',
'METNET5629/D2013_11_21/E4705/CT/S0008_4708',
'METNET5762/D2014_07_11/E2887/CT/S0004_2890',
'METNET5763/D2014_12_12/E2180/CT/S0004_2183',
'METNET5856/D2014_03_14/E2011/CT/S0002_2015',]

def compare_str(a,b):
    if a in b:
        return True
    if b in a:
        return True
    return False



def del_items(coco_images, ids , name_str = 'id'):
    newimages = []
    for image in coco_images:
        file_id = image[name_str]
        if file_id not in ids:
            newimages.append(image)
            
    print ('previous length was {} but new length is {}'.format( len(coco_images) , len(newimages) ) )
    return newimages


import json
json1_path = '/mnt/fast-data/mjc/AutoRECIST/Annotations/%s.json'


cocos = []

for oneset in ['inference']:
    for path in [json1_path ]:
        annotation_path = path%oneset
        print(annotation_path)
        json_file = open(annotation_path)
        coco = json.load(json_file)
        json_file.close()
        print('images len: %d annotations len: %d' %( len(coco['images']) , len(coco['annotations']) ) )
        cocos.append(coco)
        
image_ids = []
for image in coco['images']:
    file_name = image['file_name']
    for t in targets:
        if compare_str(t,file_name):
            image_ids.append(image['id'])
            
print(image_ids)

newimages = del_items(coco['images'] , image_ids , name_str='id')
newannos = del_items(coco['annotations'] , image_ids , name_str='image_id')

savename = '/mnt/fast-data/mjc/AutoRECIST/Annotations/inference.json'
json_data = {"info" : coco['info'],
             "images" : newimages,
             "licenses" : coco['licenses'],
             "annotations" : newannos,
             "categories" : coco['categories']}
with open(savename, "w") as jsonfile:
    json.dump(json_data, jsonfile, sort_keys=True, indent=4)
    print( 'Saved %s'%savename )

In [None]:

get_ipython().system('rm ./cache/inference_gt_roidb.pkl')

get_ipython().magic('reload_ext autoreload')
get_ipython().magic('autoreload 2')

import warnings
warnings.filterwarnings('ignore')

import argparse
import os

import logging
import numpy as np

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from tensorboardX import SummaryWriter

import _init_paths
import models
import dataset
from config import cfg
from config import update_config
from core.seg_function import validate_seg_wo_loss as validate
from core.oneshot_function import calib_bn_seg as calib_bn
from utils.utils import get_model_summary
from utils.utils import create_logger, FullModel

from dataset.roidb import combined_roidb_for_training
from roi_data.loader import RoiDataLoader

from PIL import Image
import torch.nn.functional as F
from utils.utils import get_confusion_matrix

import time
t = time.time()


def convert_name(name):
    new = name.replace('/','_')
    return new

def get_palette(n):
    palette = [0] * (n * 3)
    for j in range(0, n):
        lab = j
        palette[j * 3 + 0] = 0
        palette[j * 3 + 1] = 0
        palette[j * 3 + 2] = 0
        i = 0
        while lab:
            palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
            palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
            palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
            i += 1
            lab >>= 3
    return palette
    
# def save_pred(preds, sv_path, name):

#     preds = preds.cpu().numpy().copy()
#     preds = np.asarray(np.argmax(preds, axis=1), dtype=np.uint8)
#     for i in range(preds.shape[0]):
#         cv2.imwrite(os.path.join(sv_path, convert_name(name[i])) , preds[i])

        
def save_pred( preds, sv_path, name):
    palette = get_palette(256)
    preds = preds.cpu().numpy().copy()
    preds = np.asarray(np.argmax(preds, axis=1), dtype=np.uint8)
    for i in range(preds.shape[0]):
        pred = preds[i]
        save_img = Image.fromarray(pred)
        save_img.putpalette(palette)
        save_img.save(os.path.join(sv_path, convert_name(name[i]) ))
        

def testval_lesion(config, test_dataset, testloader, model,
            sv_dir='', sv_pred=True, device = None):
    model.eval()
    confusion_matrix = np.zeros(
        (config.DATASET.NUM_CLASSES, config.DATASET.NUM_CLASSES))
    with torch.no_grad():
        for index, batch in enumerate(testloader):
            image, label, _, name = batch
            size = label.size()
            if device is None:
                image = image.cuda()
                label = label.long().cuda()
            else:
                image = image.to(device)
                label = label.long().to(device)

            pred = model(image)
            if pred.size()[-2] != size[-2] or pred.size()[-1] != size[-1]:
                pred = F.upsample(pred, (size[-2], size[-1]),
                                  mode='bilinear')

            confusion_matrix += get_confusion_matrix(
                label,
                pred,
                size,
                config.DATASET.NUM_CLASSES,
                config.TRAIN.IGNORE_LABEL)

            if sv_pred:
                sv_path = os.path.join(sv_dir, 'test_val_results')
                if not os.path.exists(sv_path):
                    os.makedirs(sv_path)
                save_pred(pred, sv_path, name)

            if index % 100 == 0:
                logging.info('processing: %d batches' % index)
                pos = confusion_matrix.sum(1)
                res = confusion_matrix.sum(0)
                tp = np.diag(confusion_matrix)
                IoU_array = (tp / np.maximum(1.0, pos + res - tp))
                mean_IoU = IoU_array.mean()
#                 logging.info('mIoU: %.4f' % (mean_IoU))

    pos = confusion_matrix.sum(1)
    res = confusion_matrix.sum(0)
    tp = np.diag(confusion_matrix)
    pixel_acc = tp.sum() / pos.sum()
    mean_acc = (tp / np.maximum(1.0, pos)).mean()
    IoU_array = (tp / np.maximum(1.0, pos + res - tp))
    mean_IoU = IoU_array.mean()

    return mean_IoU, IoU_array, pixel_acc, mean_acc


def parse_args(l):
    parser = argparse.ArgumentParser(description='Test segmentation network')

    parser.add_argument('--cfg',
                        help='experiment configure file name',
                        required=True,
                        type=str)
    parser.add_argument('opts',
                        help="Modify config options using the command-line",
                        default=None,
                        nargs=argparse.REMAINDER)
    parser.add_argument('--bn_calib',
                        action='store_true')
    parser.add_argument('--mask_path',
                        help='the path of a mask.npy',
                        default=None,
                        type=str)
    args = parser.parse_args(l)
    update_config(cfg, args)

    return args



# experiment_name = 'Lesion_Q5_9Slices_scalenet_seg_test'
# mask_name = 'mask_1988'
# arglist = ['--cfg', '../experiments/lesion_Q5/%s.yaml'%experiment_name ,  
#            '--mask_path', '../evo_files/masks/%s.npy'%mask_name,  
#            'TEST.MODEL_FILE', '../output/Lesion/superscalenet_seg/Lesion_Q5_9Slices_superscalenet/data_patch_train/best.pth',
#            'DATASET.ROOT','',
#            'TRAIN.USE_FLIPPED',False]

experiment_name = 'Lesion_Q5_scalenet_seg_test'
mask_name = 'mask_1514'
arglist = ['--cfg', '../experiments/lesion_Q5/%s.yaml'%experiment_name ,  
           '--mask_path', '../evo_files/masks/%s.npy'%mask_name,  
           'TEST.MODEL_FILE', '../output/Lesion/superscalenet_seg/Lesion_Q5_superscalenet_base/data_patch_train/best.pth',
           'DATASET.ROOT','../abababab/',
           'TRAIN.USE_FLIPPED',False]

args = parse_args(arglist)

logger, final_output_dir, tb_log_dir = create_logger(
    cfg, args.cfg, 'valtest')

writer_dict = {
    'writer': SummaryWriter(tb_log_dir),
    'train_global_steps': 0,
    'valid_global_steps': 0,
}
# cudnn related setting
cudnn.benchmark = cfg.CUDNN.BENCHMARK
cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
cudnn.enabled = cfg.CUDNN.ENABLED

# build model
model = eval('models.' + cfg.MODEL.NAME +
             '.get_seg_model')(cfg)


if cfg.TEST.MODEL_FILE:
    model_state_file = cfg.TEST.MODEL_FILE
else:
    raise NotImplementedError
    model_state_file = os.path.join(final_output_dir,
                                    'final_state.pth')
# logger.info('=> loading model from {}'.format(model_state_file))

pretrained_dict = torch.load(model_state_file)

D2= {}
for key in pretrained_dict.keys():
    if key[:6] == 'model.':
        new_key = key[6:]
        D2[new_key] = pretrained_dict[key]
    else:
        # print(key)
        D2[key] = pretrained_dict[key]

pretrained_dict = D2      
model_dict = model.state_dict()

model_keys = set(model_dict.keys())
pretrained_keys = set(pretrained_dict.keys())
missing_keys = model_keys - pretrained_keys
# logger.warn('Missing keys in pretrained_dict: {}'.format(missing_keys))

model_dict.update(pretrained_dict)
model.load_state_dict(model_dict, strict=False)

elapsed = time.time() - t
print('Current time cost is {} sec'.format(elapsed) )


test_size = (cfg.TEST.IMAGE_SIZE[1], cfg.TEST.IMAGE_SIZE[0])

# manully select from below.
# ('PDS_AMGEN_20020408_22Cat_test',)
# ('PDS_Q2_A&C_22Cat_train',)
# ('PDS_CUIMC_22Cat_test',)

test_roidb, test_ratio_list, test_ratio_index = combined_roidb_for_training(
        ('inference',) , cfg.VAL.PROPOSAL_FILES)

test_dataset = RoiDataLoader(
    test_roidb,
    cfg.MODEL.NUM_CLASSES,
    training=True)


testloader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=cfg.TEST.BATCH_SIZE_PER_GPU,
    shuffle=False,
    num_workers=cfg.WORKERS,
    pin_memory=True,
    sampler=None)



gpus = list(cfg.GPUS)
# logger.info('GPU list is {}'.format(gpus))

model = nn.DataParallel(model, device_ids=gpus).cuda()

if args.mask_path and os.path.exists(args.mask_path):
    masks = np.load(args.mask_path, allow_pickle=True)
    model.module.set_active_subnet(masks)
    # logger.info('=> setting mask from {}'.format(args.mask_path))
    # logger.info(masks)
else:
    masks=None
    logger.info('No model mask')


mean_IoU, IoU_array, pixel_acc, mean_acc = testval_lesion(cfg, 
                                                  test_dataset, 
                                                  testloader, 
                                                  model.cuda(),
                                                  sv_dir=mask_name, 
                                                  device=None)


elapsed = time.time() - t
print('Current time cost is {} sec'.format(elapsed) )



In [None]:
elapsed

In [None]:
def pd_str_replace(df , col, ori, new):
    if isinstance(col , str):
        df[col] = df[col].str.replace(ori,new, case = False) 
    elif isinstance(col, list):
        for one in col:
            pd_str_replace(df , one, ori, new)
    else:
        raise('col instance should be str or list')
pd_str_replace(df_all, ['Image File Path' , 'Contour File Path'], "X:" , "/mnt/X-drive")
pd_str_replace(df_all, ['Image File Path' , 'Contour File Path'], r"\\" , "/")
pd_str_replace(df_all, ['Image File Path'], "/mnt/X-drive/ClinicalTrials/FNIH_VOLPACK", "/mnt/fast-disk1/mjc/AutoRecist/Inputs")
pd_str_replace(df_all, ['Image File Path'], "/mnt/X-drive/ClinicalTrialDone/FNIH_VOLPACK", "/mnt/fast-disk1/mjc/AutoRecist/Inputs")
pd_str_replace(df_all, ['Image File Path'], "/mnt/X-drive/ClinicalTrials", "/mnt/fast-disk1/mjc/AutoRecist/Inputs")

pd_str_replace(df_all, ['Contour File Path'], "/mnt/X-drive/ConvWeasisToRaw/PDS_AUTO_RECIST", "/mnt/fast-disk1/mjc/AutoRecist/Inputs/ConvWeasisToRaw/PDS_AUTO_RECIST_RAW")
pd_str_replace(df_all, ['Contour File Path'], "/mnt/X-drive/ConvWeasisToRaw", "/mnt/fast-disk1/mjc/AutoRecist/Inputs/ConvWeasisToRaw")
pd_str_replace(df_all, ['Contour File Path'], "/mnt/X-drive/ConvWeasisToMatlab", "/mnt/fast-disk1/mjc/AutoRecist/Inputs/ConvWeasisToRaw")


In [None]:
get_ipython().magic('reload_ext autoreload')
get_ipython().magic('autoreload 2')

import os
import logging
import numpy as np
import _init_paths

from config import cfg
from config import update_config
from utils_test import *
from utils_test import __get_annotation__
from utils_metrics_3d import *
import cv2
from PIL import Image

import sys
sys.path.append('/mnt/fast-disk1/mjc/utils_codes/read_weasis_raw_v0.96/')
import weasis_raw_data_api as wr


def convert_name(name):
    new = name.replace('/','_')
    return new

HEIGHT , WIDTH = 512, 512
def get_pred_vol(oneCT , site_list , D_z_index, union_mask = True):
    slice_no_list =list ( oneCT.keys() )

    V = D_z_index.values()
    shape_z = np.max(list(V)) + 1
    vol_shape = (shape_z , HEIGHT , WIDTH )
    height = vol_shape[1]
    width = vol_shape[2]

    if len(slice_no_list):
        slice_no_list.sort()
        vol_gt = np.zeros(vol_shape, dtype = bool)
        vol_pred = np.zeros(vol_shape, dtype = bool)

        for s in slice_no_list:
            aroidb , bboxes , segmentations = oneCT[s]

            ix = [a for a,b in enumerate(aroidb['gt_classes']) if int(b) in site_list]
            contours = [ aroidb['segms'][int(kk)] for kk in ix ]
            
            for c in contours:
                if len(c): #gt
                    new = polys_to_mask(c , height , width)
                    vol_gt[D_z_index[s]][new>0] = 1 


            for j in site_list:
                contours = segmentations[j]
                if union_mask:
                    #contour should be numpy.array here. list cause error of no attribute 'flatten'
                    cc = [ contour.flatten().tolist() for contour in contours if len(contour)!=0]
                    contours = union_ploys(cc , height, width)

                for c in contours:#union pred
                    if len(c)>=6:
                        new = polys_to_mask([c] , height , width) 
                        vol_pred[D_z_index[s]][new>0] = 1 
                    elif len(c):
                        print('len pred contour is %d'%len(c))
    return vol_pred

def seperate_vol(vol_pred , reduceFP = False):
    # vol_dict = seperate_vol(vol_pred)
    connectivity = 2
    from skimage import measure
    labels_pred=measure.label(vol_pred,connectivity=connectivity)
    l_pred,c_pred = np.unique(labels_pred , return_counts=True)


    ix2 = l_pred>0
    l_pred = l_pred[ix2] #background pixels are labeled as 0, so we exclude them
    c_pred = c_pred[ix2]

    if reduceFP:
        ix2 = l_pred>0
        for i, p in enumerate(l_pred):
            z = np.where(labels_pred == p)[0]
            if len( set(z) )<=1:
                ix2[i]=False

        l_pred = l_pred[ix2] #background pixels are labeled as 0, so we exclude them
        c_pred = c_pred[ix2]


    vol_dict = {}

    for p in l_pred:
        vp = labels_pred == p
        vol_dict[p] = vp

    return vol_dict



site_list_liver = [8]
site_list_liver_lung_LNs = [2,4,6,8,9,10,11,14,17] 
site_list_LNs = [2,4,6,10,11,14,17] 

site_list = site_list_liver
user_id = 'jm4669'

cache_path = './cache/'
name = 'inference'
# name = 'lesion_train'

cache_filepath = os.path.join(cache_path, name+'_gt_roidb.pkl')
# print('Loading cached gt_roidb from %s', cache_filepath)
with open(cache_filepath, 'rb') as fp:
    cached_roidb = pickle.load(fp)
    
roidb = cached_roidb



sv_dir = mask_name
sv_path = os.path.join(sv_dir, 'test_val_results')

all_boxes = [ [ np.zeros((0,5),dtype="float32") for _ in range(len(roidb)) ] for _ in range( cfg.DATASET.NUM_CLASSES) ]
all_segms = [ [ [] for _ in range(len(roidb)) ] for _ in range( cfg.DATASET.NUM_CLASSES) ]

for i in range(len(roidb)):

    one = roidb[i]
    onename = one['image']
    if not os.path.exists( os.path.join( sv_path, convert_name(onename) ) ):
        print(os.path.join(sv_path, convert_name(onename) ) , 'not exists!')
    pred_im = Image.open(os.path.join( sv_path, convert_name(onename) ))
    pred = np.array(pred_im)
    for j in range(cfg.DATASET.NUM_CLASSES):
        mask = np.asarray( pred==j , dtype=np.uint8)
        if np.sum(mask > 0) <= 3 :
            continue
        segmentation, bbox, area = __get_annotation__(mask , xywh = False , bbox_score=True)
        if segmentation and bbox:
            all_segms[j][i] = segmentation
            all_boxes[j][i] = bbox


D_CT = {}
for i , aroidb in enumerate(roidb):
    dicom_path , png_name = os.path.split(aroidb['image'])
    slice_no , _= os.path.splitext(png_name)
    slice_no = int(slice_no)
    if slice_no != aroidb['slice_no']:
        print('following slice numbers are not consistence.')
        print(dicom_path,slice_no,aroidb['slice_no'])

    segmentations = {}
    bboxes = {}
    for j in site_list:
        segmentations[j] = all_segms[j][i]
        bboxes[j] = all_boxes[j][i]

    if dicom_path not in D_CT:
        D_CT[dicom_path] = {}
        D_CT[dicom_path][slice_no] = [aroidb , bboxes , segmentations]
    else:
        D_CT[dicom_path][slice_no] = [aroidb , bboxes , segmentations]

In [None]:
        
def initialize_mask_vol( D_z_index , height , width):
    V = D_z_index.values()
    shape_z = np.max(list(V)) + 1
    mask_vol = np.zeros((shape_z , height , width ) , dtype=np.uint8 )
    return mask_vol

Metrics_vol = []
keys = list(D_CT.keys())
for k in keys:
#     if 'COU-AA-302' in k:
#         site_list = site_list_LNs
#     else:
#         site_list = site_list_liver_lung_LNs

    image_series_path = k.replace('/Pngs/' , '/Inputs/')    

    df_image = get_dicom_header_df( image_series_path )
    instanceNumber_list = df_image.index.to_list()
    D_z_index = instanceNumber2Matrix_z_index(instanceNumber_list)


    oneCT = remove_single_slice_segms(D_CT[k])
    vol_pred = get_pred_vol(oneCT , site_list , D_z_index, union_mask = False)
    



    image_series = wr.dicom_header(image_series_path)
    if len(image_series):
        height = image_series[0].Rows 
        width = image_series[0].Columns 
    else:
        print('ERROR image_series has no len' , image_series_path)
    assert(len(image_series) == vol_pred.shape[0])

    mask_vol = initialize_mask_vol( D_z_index, height , width)

    df_radiologist = df_all[(df_all['Image File Path'] == image_series_path) & (df_all['Location'].isin(['liver'])) ]
    for _ , row in df_radiologist.iterrows():
        radiologist_raw = wr.read(row['Contour File Path'])
        slice_list = radiologist_raw.get_instance_number_array()
        for j, one in enumerate(slice_list):
            mask = radiologist_raw.get_mask_image(j)
            mask_vol[D_z_index[one]] += mask


    vol2 = vol_pred + mask_vol
    vol2[vol2>1]=1
    vol_dict = seperate_vol(vol2)
    
    for tumor_index in vol_dict:
        mask_volume = vol_dict[tumor_index]
        weasis_raw_data = wr.create(image_series, mask_volume)

        file_folder = os.path.join(SAVE_PATH , 'RawToWeasis')
        if not os.path.exists(file_folder):
            os.makedirs(file_folder)
        file_name = wr.unique(image_series, tumor_index, user_id)
        file_name = os.path.join(file_folder,file_name)
        wr.write(weasis_raw_data, file_name)

        Metrics_vol.append( [image_series_path , file_name , user_id ])



    df_metrics = pd.DataFrame(Metrics_vol, 
                              columns = ['Image File Path','Contour File Path','Uni']) 
    df_metrics.to_csv('RawToWeasisUnionAIRadiologist_1033.csv' , index=False)
    print('finished ', k )

In [5]:
whos

Interactive namespace is empty.


In [3]:
!ls -lht

total 46M
-rw-r--r--. 1 jm4669 domain users  17K Apr 16 13:55 Display Weasis Raw.ipynb
drwxr-x---. 2 jm4669 domain users  20K Apr 16 13:53 DisplayWeasisRaw
-rw-r--r--. 1 jm4669 domain users 167K Mar 19 23:35 ScaleNAS_3Slices to Weasis Raw Union of AI and Rad Amgen.ipynb
drwxr-xr-x. 2 jm4669 domain users  112 Mar 18 14:22 __pycache__
-rw-r--r--. 1 jm4669 domain users  14K Mar 18 14:22 utils_metrics_3d.py
-rw-r--r--. 1 jm4669 domain users  28K Mar 16 22:25 Weasis cvs change str to X Drive.ipynb
-rw-r--r--. 1 jm4669 domain users 406K Mar 16 20:15 RawToWeasisUnionAIRadiologist_AmgenatXdrive.csv
-rw-r--r--. 1 jm4669 domain users 410K Mar 16 19:27 RawToWeasisUnionAIRadiologist_Amgen20020408.csv
drwxrwxr-x. 2 jm4669 domain users  232 Mar 16 19:05 cache
-rw-r--r--. 1 jm4669 domain users  49K Mar 16 18:58 ScaleNAS_3Slices to Weasis Raw Union Contour of AI and Radiologist CUIMC1033.ipynb
-rw-r--r--. 1 jm4669 domain users 853K Mar 16 18:29 RawToWeasisUnionAIRadiologist_1033atXdrive.csv