Import libraries

In [25]:
import cv2
import os
from tqdm import tqdm
import matplotlib.pyplot as plt 
import torchxrayvision as xrv

Initial Data Loader 

In [30]:
def test_dataloader(path, imgpath='images'):
    
    d_covid19 = xrv.datasets.COVID19_Dataset(
        views= ["PA", "AP", "AP Supine"],
        imgpath= imgpath,
        csvpath= path + "metadata.csv"
    )

    for i in tqdm(range(len(d_covid19))):
        # start from the most recent
        a = d_covid19[len(d_covid19)-i-1]
    
    return d_covid19

Data Instance

In [7]:
path_origin = 'C:/Users/frank/covid-chestxray-dataset/'

In [8]:
data = test_dataloader(path)

100%|████████████████████████████████████████████████████████████████████████████████| 697/697 [01:05<00:00, 10.57it/s]


Data DataFrame Info Description

In [10]:
data.csv.head()

Unnamed: 0,index,patientid,offset,sex,age,finding,RT_PCR_positive,survival,intubated,intubation_present,...,date,location,folder,filename,doi,url,license,clinical_notes,other_notes,Unnamed: 29
0,0,2,0.0,M,65.0,Pneumonia/Viral/COVID-19,Y,Y,N,N,...,"January 22, 2020","Cho Ray Hospital, Ho Chi Minh City, Vietnam",images,auntminnie-a-2020_01_28_23_51_6665_2020_01_28_...,10.1056/nejmc2001272,https://www.nejm.org/doi/full/10.1056/NEJMc200...,,"On January 22, 2020, a 65-year-old man with a ...",,
1,1,2,3.0,M,65.0,Pneumonia/Viral/COVID-19,Y,Y,N,N,...,"January 25, 2020","Cho Ray Hospital, Ho Chi Minh City, Vietnam",images,auntminnie-b-2020_01_28_23_51_6665_2020_01_28_...,10.1056/nejmc2001272,https://www.nejm.org/doi/full/10.1056/NEJMc200...,,"On January 22, 2020, a 65-year-old man with a ...",,
2,2,2,5.0,M,65.0,Pneumonia/Viral/COVID-19,Y,Y,N,N,...,"January 27, 2020","Cho Ray Hospital, Ho Chi Minh City, Vietnam",images,auntminnie-c-2020_01_28_23_51_6665_2020_01_28_...,10.1056/nejmc2001272,https://www.nejm.org/doi/full/10.1056/NEJMc200...,,"On January 22, 2020, a 65-year-old man with a ...",,
3,3,2,6.0,M,65.0,Pneumonia/Viral/COVID-19,Y,Y,N,N,...,"January 28, 2020","Cho Ray Hospital, Ho Chi Minh City, Vietnam",images,auntminnie-d-2020_01_28_23_51_6665_2020_01_28_...,10.1056/nejmc2001272,https://www.nejm.org/doi/full/10.1056/NEJMc200...,,"On January 22, 2020, a 65-year-old man with a ...",,
4,4,4,0.0,F,52.0,Pneumonia/Viral/COVID-19,Y,,N,N,...,"January 25, 2020","Changhua Christian Hospital, Changhua City, Ta...",images,nejmc2001573_f1a.jpeg,10.1056/NEJMc2001573,https://www.nejm.org/doi/full/10.1056/NEJMc200...,,diffuse infiltrates in the bilateral lower lungs,,


In [11]:
#data labels
data.csv.finding.unique()

array(['Pneumonia/Viral/COVID-19', 'Pneumonia', 'Pneumonia/Viral/SARS',
       'Pneumonia/Fungal/Pneumocystis',
       'Pneumonia/Bacterial/Streptococcus', 'No Finding',
       'Pneumonia/Bacterial/Chlamydophila', 'Pneumonia/Bacterial/E.Coli',
       'Pneumonia/Bacterial/Klebsiella', 'Pneumonia/Bacterial/Legionella',
       'Pneumonia/Lipoid', 'Pneumonia/Viral/Varicella',
       'Pneumonia/Bacterial', 'Pneumonia/Bacterial/Mycoplasma',
       'Pneumonia/Viral/Influenza', 'Tuberculosis',
       'Pneumonia/Viral/Influenza/H1N1', 'Pneumonia/Fungal/Aspergillosis',
       'Pneumonia/Viral/Herpes ', 'Pneumonia/Aspiration',
       'Pneumonia/Bacterial/Nocardia', 'Pneumonia/Viral/MERS-CoV',
       'Pneumonia/Bacterial/Staphylococcus/MRSA'], dtype=object)

In [18]:
#givin example of filename
data.csv.filename[3]

'auntminnie-d-2020_01_28_23_51_6665_2020_01_28_Vietnam_coronavirus.jpeg'

Image Naive Processing

In [28]:
class Transformation(object):
    """
    Class that executes specific transformations 
    over images uploaded previously in xvr.COVID19_DATASET.
  
    Inputs:
    ------
    data_instance: 
        torchxrayvision object containing picture data
    order: 
        string containing processing order
    paht_origin:
        local path where pictures are located.
    path_saving:
        local path to save pictures transformed
        
    Output:
    ------
    No returns.
    
    Important:
    --------
    Pictures are saving with their metadata original names.
    """
    def __init__(self, 
                 data_instance,
                 order, 
                 path_origin, 
                 path_saving, 
                 save=True):
        
        self.data_instance = data_instance
        self.path_origin = path_origin
        self.path_saving = path_saving
        self.order = order
        self.save = save
        
    def __savingpicture__(self, img, filename):
        cv2.imwrite(filename, img) 
        
    def __naiveGrayScale__(self):
        list_pictures = []
        for filename in self.data_instance.csv.filename:
            dir_ = self.path_origin + 'images' + '/' + filename
            picture = cv2.imread(dir_, 0)
            list_pictures.append(picture)
        return list_pictures
            
    def get_information(self):
        
        if self.path_saving == self.path_origin:
            raise ValueError(
                "'origin' path and 'saving' path should be different"
            )
            
        if self.save:
            os.chdir(self.path_saving) 
            if self.order == 'gray_transformation':
                list_pictures = self.__naiveGrayScale__()
                for idx in tqdm(range(self.data_instance.csv.shape[0])):
                    self.__savingpicture__(
                        list_pictures[idx], 
                        self.data_instance.csv.filename[idx]
                    )

In [29]:
Transformation(data, 
               'gray_transformation', 
               'C:/Users/frank/covid-chestxray-dataset/', 
               'C:/Users/frank/covid-chestxray-dataset/images_filtered').get_information()

100%|████████████████████████████████████████████████████████████████████████████████| 697/697 [00:21<00:00, 33.12it/s]
