<a href="https://colab.research.google.com/github/mbagci06/lung_covid/blob/main/functions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np
import torch
from PIL import Image
# from pydrive.drive import GoogleDrive
# from google.colab import drive
# from pydrive.auth import GoogleAuth
# from google.colab import auth
import torchvision.transforms as transforms
# from oauth2client.client import GoogleCredentials
from torch.utils.data import DataLoader,Dataset
# drive.mount('/content/drive')
# Shuffle 
import random

# Evaluation metrics: iou
def iou(input, targets, smooth = 0.000001):
    """interection over union function.
        intersection / (union= a+b-(a intersect b )

    :param input  : the output of the model.
    :param targets: The reference mask.

    :returns : iou value between 0-1 
    :rtype   : float
    """
    # the input values can be floats rather then 0 and 1 so we have decided a threshold 0.5 
    input= torch.where(input>=0.5, 1, 0)
    # The intersection calculation when the values are 1 
    intersection = torch.sum(input * targets)
    # Ths sum of the ones for input and target
    sum_ = torch.sum(input) + torch.sum(targets)
    # The intersection / (union= a+b-(a intersect b )
    jac = (intersection + smooth) / (sum_ - intersection + smooth)
    return jac


def binary_acc(y_true, y_pred, smooth = 0.0001):
    """interection over union function.
      

    :param y_true  : the output of the model.
    :param y_pred: The reference mask.

    :returns : binary accuracy value between 0-1 
    :rtype   : float
    """
    # the input values can be floats rather then 0 and 1 so we have decided a threshold 0.5 
    y_true= torch.where(y_true>=0.5, 1, 0)
    # flattening the values 
    y_true=torch.flatten(y_true)
    y_pred=torch.flatten(y_pred)
    # nominator when the vaues are same
    sum_ = torch.sum(y_true==y_pred)
    # the denominator is the lenght of the y_pred because it is flat and it means it is size of the image
    acc = ( sum_ ) / (y_pred.shape[0])
    return acc


def dice( inputs, targets):
    """dice coefficient calculation function.
        intersection*2/pixelnumber(Inputs)+pixelnumber(targets)

    :param input  : the output of the model.
    :param targets: The reference mask.

    :returns : dice coefficent  .
    :rtype   : float
    """

    # Input can be values rather then 0 and 1 to arrange it
    inputs = torch.where(inputs>=0.5, 1, 0)
    inputs = torch.flatten(inputs)
    targets = torch.flatten(targets)
    # The the part where we have calcualted the function 
    dice = (1e-5+torch.sum(inputs*targets)*2.0) / (torch.sum(targets) + torch.sum(inputs))
    return float(dice)

def Negative_mask(mask,batch_size):
    """ takes mask image and calculates the negative of the mask and creteas a new array that includes 
    the mask and negative of it then gives it back it is compatible with multi size masks 
    :param mask : tensor : batchsizex1x256x256
    
    :returns :  tensor : batchsizex2x256x256
    :rtype   :  tensor array int
    """
    true_masks=torch.reshape(mask, (batch_size,1,256,256))
    negative=torch.abs(true_masks-1)
    true_masks=torch.hstack((true_masks,negative))
    true_masks=torch.reshape(true_masks, (batch_size,2,256,256))
    return true_masks
class Lung_Dataset_Gray(Dataset): # inherit from torch.utils.data.Dataset
    "Lung and covid-19 sengmentation dataset."

    def __init__(self,root_dir = ("archive"),class_folder='lung',split = "Train", transforms = None , shuffle = True,batch_size=1,
                 sample_size=500,seed=228 ,norm=True):
        """
        Args:
        :param root_dir (str): the location of the datasetfolders 
        :param class_folder (str): the lung or covid default to lung 
        :param split (str): the usage purpose of the data
        :param transforms( optional): for normalization  ,default to None
        :param batch_size (int, optional) : default 1 
        :param class (str) : lung or covid : to call the dataset, default to lung:  
        :param sample_size (int) : default to 500
        :param seed (int) : default to 228
        """
        self.norm=norm

        self.seed=seed
        self.root_dir = root_dir
        self.class_folder=class_folder
        self.split = split # Test Train Val
        self.transforms = transforms
        self.batch_size=batch_size
        self.sample_size=sample_size

        # used to store image and mask file names 
        image_file=[]
        mask_file=[]
        mask_folder=[]
        array_file=[]
        # Label array used to reach folders 
        self.label_array=["/COVID-19","/Non-COVID","/Normal"]
        self.image_folder='/images/'
        # If the dataset called for lung segmentattion  clas folder will be acording to that 
        if class_folder=='lung':
          self.class_folder='/Lung Segmentation Data/Lung Segmentation Data/'
          self.mask_folder='/lung masks/'
        else:
          self.class_folder='/Infection Segmentation Data/Infection Segmentation Data/'
          self.mask_folder='/infection masks/'
        
        # loop iterated 3 times for labels and the data filled with the form of the image ans mask as 
        """array_file example data: /COVID-19/images/covid123.png """
        # Image label/folder/file
        for label_num in range(len(self.label_array)):
          array_file=[]
          path_image=self.root_dir+self.class_folder+self.split+self.label_array[label_num]+self.image_folder
          path_mask=self.root_dir+self.class_folder+self.split+self.label_array[label_num]+self.mask_folder
          # Flatten name_list 
          name_list_image=(os.listdir(path_image))
          name_list_mask=(os.listdir(path_mask))
          data_image=[fName for fName in name_list_image ]
          data_mask=[fName for fName in name_list_mask ]
          # arrange the locations of the images and mask on the right form 
          for x in range(len(data_image)):
            array_file.append('{}{}{}'.format(self.label_array[label_num],self.image_folder,data_image[x]))
          image_file.append(array_file)
          array_file=[]
          for x in range(len(data_mask)):
            array_file.append('{}{}{}'.format(self.label_array[label_num],self.mask_folder,data_mask[x]))
          mask_file.append(array_file)
        # flatten and shorten the size of the dataset 
        self.image_file=[item for sublist in image_file for item in sublist][:sample_size]  
        self.mask_file=[item for sublist in mask_file for item in sublist][:sample_size]  
        self.mask_idx = [ind for ind in range(sample_size) ]
        # For the Test case all the values given to the output
        if split=="Test":
          self.image_file=[item for sublist in image_file for item in sublist][:]  
          self.mask_file=[item for sublist in mask_file for item in sublist][:]  
          self.mask_idx = [ind for ind in range(len(self.mask_file)) ]
        # Shuffle the dataset for train with the seed
        if split!='Test' and shuffle==True:
          c = list(zip(self.image_file, self.mask_file,self.mask_idx))
          random.Random(self.seed).shuffle(c)
          self.image_file, self.mask_file,self.mask_idx = zip(*c)
        

          # Shorten the files after shuffle 
          self.image_file=self.image_file[:sample_size]
          self.mask_file=self.mask_file[:sample_size]
          self.mask_idx=self.mask_idx[:sample_size]
        # Storig the data to the dictionary
        self.data_file = {"Data"  : {"image":self.image_file, "mask": self.mask_file}}
        self.mask_idx ={"idx" : self.mask_idx}


    def __len__(self):
        return len(self.mask_idx['idx'])

    def __getitem__(self, idx):
        idx = self.mask_idx['idx'][idx]
        # set index
        # The paths of the images and the masks have called
        img_path=os.path.join(self.root_dir+self.class_folder+self.split+self.data_file['Data']['image'][idx])
        mask_path=os.path.join(self.root_dir+self.class_folder+self.split+self.data_file["Data"]['mask'][idx])

        # Colling the path to reach the images and the masks 
        # The critical part in here is to use 'L' because it helps us to use the image its non int values 
        # For the mask files the images do not  need to be in form of that.  
        img  = Image.open(img_path).convert('L')
        mask = Image.open(mask_path)
        trans1 = transforms.ToTensor()
        

        trans2= transforms.ToTensor()
        img=trans1(img)
        trans_norm=transforms.Normalize(mean=[0.456], std=[ 0.224])
        if self.norm==True:
          img=trans_norm(img)
        mask=trans2(mask)
        # The return dictionary 
        sample = {'image': img, 'mask': mask}
       
        # Changing the dtype and form of the sample data
        # 
        # sample=trans1(sample)
        if self.transforms:
            sample = self.transforms(sample)

        
        return sample

class Lung_Dataset_RGB(Dataset): # inherit from torch.utils.data.Dataset
    "Lung and covid-19 sengmentation dataset for RGB output images."

    def __init__(self,root_dir = ("archive"),class_folder='lung',split = "Train", transforms = None , shuffle = True,batch_size=1,
                 sample_size=500,seed=228,norm=True):
        """
        Args:
        :param root_dir (str): the location of the datasetfolders 
        :param class_folder (str): the lung or covid default to lung 
        :param split (str): the usage purpose of the data
        :param transforms( optional): for normalization  ,default to None
        :param batch_size (int, optional) : default 1 
        :param class (str) : lung or covid : to call the dataset, default to lung:  
        :param sample_size (int) : default to 500
        :param seed (int) : default to 228
        """
        self.norm=norm
        
        self.root_dir = root_dir
        self.class_folder=class_folder
        self.split = split # Test Train Val
        self.transforms = transforms
        self.batch_size=batch_size
        self.sample_size=sample_size
        self.seed=seed
        # used to store image and mask file names 
        image_file=[]
        mask_file=[]
        mask_folder=[]
        array_file=[]
        # Label array used to reach folders 
        self.label_array=["/COVID-19","/Non-COVID","/Normal"]
        self.image_folder='/images/'
        # If the dataset called for lung segmentattion  clas folder will be acording to that 
        if class_folder=='lung':
          self.class_folder='/Lung Segmentation Data/Lung Segmentation Data/'
          self.mask_folder='/lung masks/'
        else:
          self.class_folder='/Infection Segmentation Data/Infection Segmentation Data/'
          self.mask_folder='/infection masks/'
        
        # loop iterated 3 times for labels and the data filled with the form of the image ans mask as 
        """array_file example data: /COVID-19/images/covid123.png """
        # Image label/folder/file
        for label_num in range(len(self.label_array)):
          array_file=[]
          path_image=self.root_dir+self.class_folder+self.split+self.label_array[label_num]+self.image_folder
          path_mask=self.root_dir+self.class_folder+self.split+self.label_array[label_num]+self.mask_folder
          # Flatten name_list 
          name_list_image=(os.listdir(path_image))
          name_list_mask=(os.listdir(path_mask))
          data_image=[fName for fName in name_list_image ]
          data_mask=[fName for fName in name_list_mask ]
          # arrange the locations of the images and mask on the right form 
          for x in range(len(data_image)):
            array_file.append('{}{}{}'.format(self.label_array[label_num],self.image_folder,data_image[x]))
          image_file.append(array_file)
          array_file=[]
          for x in range(len(data_mask)):
            array_file.append('{}{}{}'.format(self.label_array[label_num],self.mask_folder,data_mask[x]))
          mask_file.append(array_file)
        
        # flatten and shorten the size of the dataset 
        self.image_file=[item for sublist in image_file for item in sublist][:sample_size]  
        self.mask_file=[item for sublist in mask_file for item in sublist][:sample_size]  
        self.mask_idx = [ind for ind in range(sample_size) ]
        if split=="Test":
          self.image_file=[item for sublist in image_file for item in sublist][:]  
          self.mask_file=[item for sublist in mask_file for item in sublist][:]  
          self.mask_idx = [ind for ind in range(len(self.mask_file)) ]
        # Shuffle the dataset 
        if split!='Test' and shuffle==True:
          c = list(zip(self.image_file, self.mask_file,self.mask_idx))
          random.Random(self.seed).shuffle(c)
          self.image_file, self.mask_file,self.mask_idx = zip(*c)
        

          # Shorten the files after shuffle 
          self.image_file=self.image_file[:sample_size]
          self.mask_file=self.mask_file[:sample_size]
          self.mask_idx=self.mask_idx[:sample_size]
        # Storig the data to the dictionary
        self.data_file = {"Data"  : {"image":self.image_file, "mask": self.mask_file}}
        self.mask_idx ={"idx" : self.mask_idx}


    def __len__(self):
        return len(self.mask_idx['idx'])

    def __getitem__(self, idx):
        idx = self.mask_idx['idx'][idx]
        # set index
        # The paths of the images and the masks have called
        img_path=os.path.join(self.root_dir+self.class_folder+self.split+self.data_file['Data']['image'][idx])
        mask_path=os.path.join(self.root_dir+self.class_folder+self.split+self.data_file["Data"]['mask'][idx])

        # Colling the path to reach the images and the masks 
        # The critical part in here is to use 'RGB' because it helps us to use the image its non int values 
        # For the mask files the images do not  need to be in form of that.  
        img  = Image.open(img_path).convert('RGB')
        mask = Image.open(mask_path)
        trans1 = transforms.ToTensor()
        trans2= transforms.ToTensor()
        

        trans2= transforms.ToTensor()
        img=trans1(img)
        trans_norm=transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        if self.norm==True:
          img=trans_norm(img)
        mask=trans2(mask)
        # The return dictionary 
        sample = {'image': img, 'mask': mask}
       
        # Changing the dtype and form of the sample data
        # 
        # sample=trans1(sample)
        if self.transforms:
            sample = self.transforms(sample)

        
        return sample



Mounted at /content/drive
