In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

from imea.measure_2d.micro import  fractal_dimension_boxcounting as fract_dim
import multiprocessing as mp

import time
import cv2
import nibabel as nb

import warnings
warnings.filterwarnings('ignore')

In [13]:
fract_dim(np.zeros((100,100)))

0

## Fractal Dimension Calculation using Box Counting Algorithm

In [14]:
def fract(x):
    '''x is a slice'''

    x = x.astype(np.uint8)    
    y = x    
    bin_ = np.zeros((x.shape[0], x.shape[1]))    
    bin_[x!=0] = 1
    bin_ = bin_.astype(np.uint8)
    
    contours = cv2.findContours(image = bin_, mode = cv2.RETR_TREE, method = cv2.CHAIN_APPROX_SIMPLE)[0]
    contours = sorted(contours, key = cv2.contourArea, reverse = True)

    if len(contours) == 0:
        return 0

    c_0 = contours[0]       #Get the 4 points of the bounding rectangle
    xx, yy, w, h = cv2.boundingRect(c_0)
    X  =  y[yy:yy+h, xx:xx+w]
    
    
    XX = np.zeros((X.shape[0], X.shape[1]))    
    XX[X==1] = 1
    ncrnet =  fract_dim(XX)
    
    
    XX = np.zeros((X.shape[0], X.shape[1]))    
    XX[X==2] = 1
    ed =  fract_dim(XX)
    
    
    XX = np.zeros((X.shape[0], X.shape[1]))    
    XX[X==4] = 1
    et =  fract_dim(XX)
    
    
    return np.array([ncrnet, ed, et])

## Mean and Median FD of each subcomponent

In [15]:
def fd_(X):
    X = X.astype(np.uint8)
    
    X = np.delete(X, [i for i in range(0, X.shape[2]) if np.sum(X[:, :, i].ravel()) < 10], axis=2)
    
    X_slices = [X[:, :, i] for i in range(X.shape[2])]
    
    fd = np.array([fract(x) for x in X_slices])
    
    n_mean =  np.mean(fd[:,0])
    n_med =  np.median(fd[:,0])
    
    ed_mean =  np.mean(fd[:,1])
    ed_med =  np.median(fd[:,1])
    
    et_mean =  np.mean(fd[:,2])
    et_med =  np.median(fd[:,2])
                   
    return  np.array([n_mean, n_med, ed_mean, ed_med, et_mean, et_med])

In [16]:
def fd_cor(X):
    
    X = X.astype(np.uint8)      
    X = np.delete(X, [i for i in range(0, X.shape[1]) if np.sum(X[:, i, :].ravel()) < 10], axis=1)
    
    X_slices = [X[:, i, :] for i in range(X.shape[1])]
    fd = np.array([fract(x) for x in X_slices])
    
    n_mean =  np.mean(fd[:,0])
    n_med =  np.median(fd[:,0])
    
    ed_mean =  np.mean(fd[:,1])
    ed_med =  np.median(fd[:,1])
    
    et_mean =  np.mean(fd[:,2])
    et_med =  np.median(fd[:,2])
                   
    return  np.array([n_mean, n_med, ed_mean, ed_med, et_mean, et_med])

In [17]:
def fd_sag(X):       
    X = X.astype(np.uint8)      
    X = np.delete(X, [i for i in range(0, X.shape[0]) if np.sum(X[i, :, :].ravel()) < 10], axis=0)
    
    X_slices = [X[i, :, :] for i in range(X.shape[0])]
    fd = np.array([fract(x) for x in X_slices])
    
    
    n_mean =  np.mean(fd[:,0])
    n_med =  np.median(fd[:,0])
    
    ed_mean =  np.mean(fd[:,1])
    ed_med =  np.median(fd[:,1])
    
    et_mean =  np.mean(fd[:,2])
    et_med =  np.median(fd[:,2])
                   
    return  np.array([n_mean, n_med, ed_mean, ed_med, et_mean, et_med]) 

## Running defined functions on text files containing path to the .nii.gz files

### GBM

In [18]:
gbm_fd = []
t0 = time.time()

with open('gbm_input.txt') as file:      #text file containing filenames
    for fl in file:
        try:
            gbm_fd.append(fd_(nb.load(r'{}'.format(fl.rstrip())).get_fdata())) 
        except:
            continue
        

print('Runtime:' , time.time() - t0 , 'seconds') 
x = np.array(gbm_fd)

case_id = []

with open('gbm_input.txt') as file:
    for fl in file:
        case_id.append(fl.split('\\')[5])    #Number is the position of subject code in tha path filename
        
print(case_id[0])    #to check what is being printed


gbm_fract_dim = pd.DataFrame({'case_id':case_id,
                          'ncr_net_meanfd': x[:, 0], 'ncr_net_medfd': x[:, 1],
                         'ed_meanfd': x[:, 2], 'ed_medfd':x[:, 3],
                         'et_meanfd': x[:, 4], 'et_medfd': x[:, 5]})

gbm_fract_dim.to_csv('gbm_frac_dim.csv', index=False)

Runtime: 20.677095890045166 seconds
TCGA-02-0006


### LGG

In [19]:
lgg_fd = []
t0 = time.time()

with open('lgg_input.txt') as file:
    for fl in file:
        lgg_fd.append(fd_(nb.load(r'{}'.format(fl.rstrip())).get_fdata()))  

print('Runtime:', time.time() - t0 , 'seconds')  
x = np.array(lgg_fd)


case_id2 = []

with open('lgg_input.txt') as file:
    for fl in file:
        case_id2.append(fl.split('\\')[5])

print(case_id2[0])


lgg_fract_dim = pd.DataFrame({'case_id':case_id2,
                          'ncr_net_meanfd': x[:, 0], 'ncr_net_medfd': x[:, 1],
                         'ed_meanfd': x[:, 2], 'ed_medfd':x[:, 3],
                         'et_meanfd': x[:, 4], 'et_medfd': x[:, 5]})

lgg_fract_dim.to_csv('lgg_frac_dim.csv', index=False)

Runtime: 11.820613861083984 seconds
TCGA-CS-4942


## Coronal FD

In [20]:
gbm_fd = []
t0 = time.time()

with open('gbm_input.txt') as file:      #text file containing filenames
    for fl in file:
        try:
            gbm_fd.append(fd_cor(nb.load(r'{}'.format(fl.rstrip())).get_fdata())) 
        except:
            continue
        

print('Runtime:', time.time() - t0 , 'seconds') 
x = np.array(gbm_fd)

case_id = []

with open('gbm_input.txt') as file:
    for fl in file:
        case_id.append(fl.split('\\')[5])    #Number is the position of subject code in tha path filename
        
print(case_id[0])    #to check what is being printed


gbm_fract_dim_cor = pd.DataFrame({'case_id':case_id,
                          'ncr_net_meanfd_cor': x[:, 0], 'ncr_net_medfd_cor': x[:, 1],
                         'ed_meanfd_cor': x[:, 2], 'ed_medfd_cor':x[:, 3],
                         'et_meanfd_cor': x[:, 4], 'et_medfd_cor': x[:, 5]})

gbm_fract_dim.to_csv('gbm_frac_dim_cor.csv', index=False)

Runtime: 20.746455192565918 seconds
TCGA-02-0006


In [21]:
lgg_fd = []
t0 = time.time()

with open('lgg_input.txt') as file:
    for fl in file:
        lgg_fd.append(fd_cor(nb.load(r'{}'.format(fl.rstrip())).get_fdata()))  

print('Runtime:', time.time() - t0 , 'seconds')  
x = np.array(lgg_fd)


case_id2 = []

with open('lgg_input.txt') as file:
    for fl in file:
        case_id2.append(fl.split('\\')[5])

print(case_id2[0])

lgg_fract_dim_cor = pd.DataFrame({'case_id':case_id2,
                          'ncr_net_meanfd_cor': x[:, 0], 'ncr_net_medfd_cor': x[:, 1],
                         'ed_meanfd_cor': x[:, 2], 'ed_medfd_cor':x[:, 3],
                         'et_meanfd_cor': x[:, 4], 'et_medfd_cor': x[:, 5]})

lgg_fract_dim.to_csv('lgg_frac_dim_cor.csv', index=False)

Runtime: 12.124009370803833 seconds
TCGA-CS-4942


## Sagittal FD

In [22]:
gbm_fd = []
t0 = time.time()

with open('gbm_input.txt') as file:      #text file containing filenames
    for fl in file:
        try:
            gbm_fd.append(fd_sag(nb.load(r'{}'.format(fl.rstrip())).get_fdata())) 
        except:
            continue
        

print('Runtime:', time.time() - t0 , 'seconds') 
x = np.array(gbm_fd)

case_id = []

with open('gbm_input.txt') as file:
    for fl in file:
        case_id.append(fl.split('\\')[5])    #Number is the position of subject code in tha path filename
        
print(case_id[0])    #to check what is being printed


gbm_fract_dim_sag = pd.DataFrame({'case_id':case_id,
                          'ncr_net_meanfd_sag': x[:, 0], 'ncr_net_medfd_sag': x[:, 1],
                         'ed_meanfd_sag': x[:, 2], 'ed_medfd_sag':x[:, 3],
                         'et_meanfd_sag': x[:, 4], 'et_medfd_sag': x[:, 5]})

gbm_fract_dim.to_csv('gbm_frac_dim_sag.csv', index=False)

Runtime: 20.586206912994385 seconds
TCGA-02-0006


In [23]:
lgg_fd = []
t0 = time.time()

with open('lgg_input.txt') as file:
    for fl in file:
        lgg_fd.append(fd_sag(nb.load(r'{}'.format(fl.rstrip())).get_fdata()))  

print('Runtime: ', time.time() - t0 , 'seconds')  
x = np.array(lgg_fd)


case_id2 = []

with open('lgg_input.txt') as file:
    for fl in file:
        case_id2.append(fl.split('\\')[5])

print(case_id2[0])

lgg_fract_dim_sag = pd.DataFrame({'case_id':case_id2,
                          'ncr_net_meanfd_sag': x[:, 0], 'ncr_net_medfd_sag': x[:, 1],
                         'ed_meanfd_sag': x[:, 2], 'ed_medfd_sag':x[:, 3],
                         'et_meanfd_sag': x[:, 4], 'et_medfd_sag': x[:, 5]})


lgg_fract_dim.to_csv('lgg_frac_dim_sag.csv', index=False)

Runtime:  13.099388122558594 seconds
TCGA-CS-4942


In [29]:
df_merged = pd.concat([lgg_fract_dim, gbm_fract_dim])
df_merged_cor = pd.concat([lgg_fract_dim_cor, gbm_fract_dim_cor])
df_merged_sag = pd.concat([lgg_fract_dim_sag, gbm_fract_dim_sag])

frac_temp = pd.merge(df_merged, df_merged_cor, on='case_id', how='inner')
frac_merged = pd.merge(frac_temp, df_merged_sag, on='case_id', how='inner')

In [30]:
selected_columns = ['ncr_net_meanfd', 'ncr_net_meanfd_cor', 'ncr_net_meanfd_sag']
mean_values = frac_merged[selected_columns].mean(axis=1)
frac_merged['ncr_net_mean3dfd'] = mean_values

selected_columns2 = ['et_meanfd', 'et_meanfd_cor', 'et_meanfd_sag']
mean_values2 = frac_merged[selected_columns2].mean(axis=1)
frac_merged['et_mean3dfd'] = mean_values2

selected_columns3 = ['ed_meanfd', 'ed_meanfd_cor', 'ed_meanfd_sag']
mean_values3 = frac_merged[selected_columns3].mean(axis=1)
frac_merged['ed_mean3dfd'] = mean_values3

In [32]:
frac_merged.to_csv('3Dfrac.csv', index=False)