In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import matplotlib.pyplot as plt
import pydicom

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
cd ../input/second-annual-data-science-bowl/

# **Dataset visualisation & understanding**

In [None]:
validate=os.listdir("validate/validate")
validate=sorted(validate,key=lambda x:int(x))
print(validate)
print("\n length :",len(validate))

In [None]:
validate=os.listdir("test/test")
validate=sorted(validate,key=lambda x:int(x))
print(validate)
print("\n length :",len(validate))

In [None]:
validate=os.listdir("train/train")
validate=sorted(validate,key=lambda x:int(x))
print(validate)
print("\n length :",len(validate))

In [None]:

# Data to plot
labels = 'Train', 'test', 'validate'
sizes = [500, 440, 200]
colors = ['yellowgreen', 'lightcoral', 'lightskyblue']
explode = (0, 0, 0)  # explode 1st slice

# Plot
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
autopct='%1.1f%%', shadow=True, startangle=140)

plt.axis('equal')
plt.show()

In [None]:

df=pd.read_csv("train.csv")
print(df)
 

In [None]:
path="train/train/"
patients=os.listdir(path)
num_files=[]
sax_files=[]
for patient in patients:
    count=0
    patient_files=os.listdir(path+patient+"/study")
    num_files.append(len(patient_files))
    for file in patient_files:
        if file[:3]=="sax":
            count+=1
    sax_files.append(count)

In [None]:
fig = plt.figure(figsize=(20,9))
ax = fig.add_axes([0,0,1,1])
ax.bar(patients[:50],num_files[:50])
ax.set_title('number of files per patient')
plt.show()

In [None]:
all_sax=[]
for patient in patients:
    files=os.listdir(path+patient+"/study")
    sax_files=[ss for ss in files if ss[0]=="s"]
    all_sax=all_sax+sax_files
sax=dict()
for s in all_sax:
    if s not in sax:
        sax[s]=all_sax.count(s)

        
all_sax2=list(sax.keys())
all_sax2=sorted(all_sax2,key=lambda x:int(x[4:]))
print(all_sax2)

In [None]:
pos=[int(s[4:]) for s in all_sax2]
val=sax.values()
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.bar(pos,val)
ax.set_title('number of slices by position')
plt.show()

In [None]:
files=os.listdir(path+patient+"/study")
slices_paths=os.listdir(path+patient+"/study/"+files[0])
a_slice=pydicom.dcmread(path+patient+"/study/"+files[0]+"/"+slices_paths[0])
plt.imshow(a_slice.pixel_array,cmap=plt.cm.bone)

In [None]:
fig=plt.figure(figsize=(20, 20))
columns=6
rows=5
for i in range(1,columns*rows+1):
    fig.add_subplot(rows,columns,i)
    slices_paths=sorted(slices_paths,key=lambda x: int(x[-8:-4]))
    a_slice=pydicom.dcmread(path+patient+"/study/"+files[0]+"/"+slices_paths[i-1])
    print(path+patient+"/study/"+files[0]+"/"+slices_paths[i-1])
    plt.imshow(a_slice.pixel_array,cmap=plt.cm.bone)
plt.show()

# **ROI extraction**

In [None]:
import cv2
import pydicom
import matplotlib.pyplot as plt



path="train/train"
patients_list=os.listdir(path) #self explanatory


def patient_folders(patient):
    """returns all the short axis slices folders, sorted by position from sax_MIN to sax_MAX"""
    files = os.listdir(path+"/"+patient+"/study")
    files = [f for f in files if f[0]=="s"]                           #remove 2 and 4 chamber views
    files = sorted(files, key = lambda x: int(x.split("_")[1]))       #sort the folders on spacial position
    return files




def normalize_image(dcm_slice):
    """returns a normalized image where pixels are between 0-255 and each pixel is one mm (original size of the images)"""
    
    image = dcm_slice.pixel_array
    
    scale = dcm_slice.PixelSpacing
    
    new_size = (int(dcm_slice.Rows*scale[0]), int(dcm_slice.Columns*scale[1]))
    
    #normalizing the image
    
    img_2d = image.astype(float)
    img_2d_scaled = (np.maximum(img_2d,0) / img_2d.max()) * 255.0
    image = np.uint8(img_2d_scaled)
    
    
    #resizing the image to real dimensions (mm)
    
    image = cv2.resize(image, dsize=new_size, interpolation=cv2.INTER_CUBIC)
    
    return image



def create_stack(patient,folder):
    """creates a 3D matrix of 30 slices from a single folder, that will be used to define the ROI"""
    
    stack=[]
    if folder in patient_folders(patient):
        
        slices_path = "train/train/"+patient+"/study/"+folder
        slices_names = os.listdir(slices_path)
        
        slices_names=sorted(slices_names,key=lambda x: int(x[-8:-4]))
        for s in slices_names:
            
            dcm_slice = pydicom.dcmread(slices_path+"/"+s)
            image = normalize_image(dcm_slice)
            stack.append(image)
        
        image_stack = np.dstack(stack)
    
        return image_stack
        
    else:
        print("error, folder and patient don't match, consider calling print(patient_folders('{}'))".format(patient))
        
        


In [None]:
def crop_ROI(patient, file, offset=35):
    """crops the region of interest in our images returning a smaller image focusing on the heart using standard deviation
    to find the region of the heart exploiting the movement of muscles"""
    
    #creat a stack of 30 images of the same section in diffrent frames

    patient_stack = create_stack(patient,file)             
    
    #calculates the standard deviation of the stack, returns an image containing only the moving pixels during the 30 frames
                                          
    std_image = np.std(patient_stack, axis=2) 
    
    #normalizing the image again to have a range of 0-255
    
    img_2d         = std_image.astype(float)
    img_2d_scaled  = (np.maximum(img_2d,0) / img_2d.max()) * 255.0
    movement_image = np.uint8(img_2d_scaled)
    
    #applying gaussian filter for noise reduction
    
    movement_image = cv2.blur(movement_image,(6,6))
    
    #applying Canny edge detection to find the edges of the heart 
    
    edge = cv2.Canny(movement_image,50,100)

    #blurring again
    
    edge = cv2.GaussianBlur(edge,(3,3),cv2.BORDER_DEFAULT)

    #using Hough transform to find an approximation circular patterns (left ventricule) position and radius
    
    circles = cv2.HoughCircles(edge,cv2.HOUGH_GRADIENT,1,100,param1=100,param2=34,minRadius=0,maxRadius=0)
    circles = np.uint16(np.around(circles))
    
    #unpacking the coordinates of the circle
    
    x,y,r = circles[0][0][0], circles[0][0][1], circles[0][0][2]

    #defining the ROI box
    
    x_1 = x-r-offset
    y_1 = y-r-offset
    
    x_2 = x+r+offset
    y_2 = y+r+offset
    
    return (x_1, x_2, y_1, y_2)
    

In [None]:
def display_cropped(patient, file):
    """ displays the 30 slices cropped ROI """
    
    slices_path = "train/train/"+patient+"/study/"

    #verifies if the file is ammong the patient's files
    
    if file not in os.listdir(slices_path):
        print("Error: {} file not found in this directory : {}".format(file,slices_path))
        return 1
    
    #this bloc is executed if the file is found
    
    #finds the file names
    
    slices_path = "train/train/"+patient+"/study/"+file
    slices_names = os.listdir(slices_path)
    
    #sort the slices through time
    
    slices_names=sorted(slices_names,key=lambda x: int(x[-8:-4]))
    
    #creates a list of relative paths to the slices
    
    slices = [slices_path+"/"+s for s in slices_names]
    
    #check if we want to view the cropped version
    
    x1, x2, y1, y2 = crop_ROI(patient, file)
    
    
    
    for s in slices:
        
        my_slice = pydicom.dcmread(s)
        
        #normalize the image , pixels range 0-255
        image = normalize_image(my_slice)
        
        #create the cropped image
        cropped = image[y1:y2, x1:x2]
        
        #create the plot
        
        
        
        plt.imshow(image,cmap="bone")
        plt.title("original")
        plt.show()
        
        plt.imshow(cropped,cmap="bone")
        plt.title("cropped image")
        
        plt.show()
        
        
    


In [None]:
patient_folders("85")

In [None]:
display_cropped("85","sax_10")