In [1]:
import pandas as pd
import os
import shutil

## Create Train-Test dataset for or Model

- This dataset will contain two types of data. They are,
  - COVID Positive Xray cases
  - Normal Xray cases

In [2]:
if not os.path.exists('./Dataset/train'):
    os.mkdir('./Dataset/train')
    print('Directory created')
else:
    print('Already Created')

Directory created


In [3]:
if not os.path.exists('./Dataset/test'):
    os.mkdir('./Dataset/test')
    print('Directory created')
else:
    print('Already Created')

Directory created


#### Train Dataset


- Create data for `Covid Positive Xray Cases`

In [4]:
covid_img_metadata = './Covid_XRAY/metadata.csv'
covid_img_path = './Covid_XRAY/images'

df = pd.read_csv(covid_img_metadata)
print(df.shape)
df.head(n=2) 

(372, 29)


Unnamed: 0,patientid,offset,sex,age,finding,survival,intubated,intubation_present,went_icu,in_icu,...,date,location,folder,filename,doi,url,license,clinical_notes,other_notes,Unnamed: 28
0,2,0.0,M,65.0,COVID-19,Y,,,,,...,"January 22, 2020","Cho Ray Hospital, Ho Chi Minh City, Vietnam",images,auntminnie-a-2020_01_28_23_51_6665_2020_01_28_...,10.1056/nejmc2001272,https://www.nejm.org/doi/full/10.1056/NEJMc200...,,"On January 22, 2020, a 65-year-old man with a ...",,
1,2,3.0,M,65.0,COVID-19,Y,,,,,...,"January 25, 2020","Cho Ray Hospital, Ho Chi Minh City, Vietnam",images,auntminnie-b-2020_01_28_23_51_6665_2020_01_28_...,10.1056/nejmc2001272,https://www.nejm.org/doi/full/10.1056/NEJMc200...,,"On January 22, 2020, a 65-year-old man with a ...",,


In [5]:
df.columns

Index(['patientid', 'offset', 'sex', 'age', 'finding', 'survival', 'intubated',
       'intubation_present', 'went_icu', 'in_icu', 'needed_supplemental_O2',
       'extubated', 'temperature', 'pO2_saturation', 'leukocyte_count',
       'neutrophil_count', 'lymphocyte_count', 'view', 'modality', 'date',
       'location', 'folder', 'filename', 'doi', 'url', 'license',
       'clinical_notes', 'other_notes', 'Unnamed: 28'],
      dtype='object')

In [6]:
target_dir = './Dataset/Train/Covid'

if not os.path.exists(target_dir):
    os.mkdir(target_dir)
    print('Directory Created')
else:
    print('Already Present')

Directory Created


In [7]:
""" Select those covid data, those 
have a frontview of Xray images."""

count = 0
for (i,row) in df.iterrows():
    if row['finding'] == 'COVID-19' and row['view']=='PA':
        img_name = row['filename']
        img_path = os.path.join(covid_img_path,img_name)
        target_path = os.path.join(target_dir,img_name)
        
        if img_name not in os.listdir(target_dir):
            shutil.copy2(img_path,target_dir)
            print(f'Image moving {img_name}')
        count += 1
        
print(f'Total Images : {count}')

Image moving auntminnie-a-2020_01_28_23_51_6665_2020_01_28_Vietnam_coronavirus.jpeg
Image moving auntminnie-b-2020_01_28_23_51_6665_2020_01_28_Vietnam_coronavirus.jpeg
Image moving auntminnie-c-2020_01_28_23_51_6665_2020_01_28_Vietnam_coronavirus.jpeg
Image moving auntminnie-d-2020_01_28_23_51_6665_2020_01_28_Vietnam_coronavirus.jpeg
Image moving nejmc2001573_f1a.jpeg
Image moving nejmc2001573_f1b.jpeg
Image moving lancet-case2a.jpg
Image moving lancet-case2b.jpg
Image moving 1-s2.0-S0140673620303706-fx1_lrg.jpg
Image moving nCoV-radiol.2020200269.fig1-day7.jpeg
Image moving nejmoa2001191_f1-PA.jpeg
Image moving nejmoa2001191_f3-PA.jpeg
Image moving nejmoa2001191_f4.jpeg
Image moving nejmoa2001191_f5-PA.jpeg
Image moving ryct.2020200034.fig2.jpeg
Image moving ryct.2020200034.fig5-day0.jpeg
Image moving ryct.2020200034.fig5-day4.jpeg
Image moving ryct.2020200034.fig5-day7.jpeg
Image moving ryct.2020200028.fig1a.jpeg
Image moving radiol.2020200490.fig3.jpeg
Image moving covid-19-pneumoni

- Create data for `Normal Xray Cases`.

In [8]:
import random

In [9]:
target_dir_norm = './Dataset/train/Normal'

if not os.path.exists(target_dir_norm):
    os.mkdir(target_dir_norm)
    print('Directory Created')
else:
    print('Already Present')

Directory Created


In [10]:
normal_xray_path = './Chest_XRAY/train/NORMAL'

img_names = os.listdir(normal_xray_path)

In [11]:
for i in range(200):
    img_name = img_names[i]
    img_path = os.path.join(normal_xray_path,img_name)
    target_path = os.path.join(target_dir_norm)
        
    if img_name not in os.listdir(target_dir_norm):
        shutil.copy2(img_path,target_dir_norm)
        print(f'Image moving {img_name} : {i}')
        
print(f'Total Images : {count}')

Image moving IM-0115-0001.jpeg : 0
Image moving IM-0117-0001.jpeg : 1
Image moving IM-0119-0001.jpeg : 2
Image moving IM-0122-0001.jpeg : 3
Image moving IM-0125-0001.jpeg : 4
Image moving IM-0127-0001.jpeg : 5
Image moving IM-0128-0001.jpeg : 6
Image moving IM-0129-0001.jpeg : 7
Image moving IM-0131-0001.jpeg : 8
Image moving IM-0133-0001.jpeg : 9
Image moving IM-0135-0001.jpeg : 10
Image moving IM-0137-0001.jpeg : 11
Image moving IM-0140-0001.jpeg : 12
Image moving IM-0141-0001.jpeg : 13
Image moving IM-0143-0001.jpeg : 14
Image moving IM-0145-0001.jpeg : 15
Image moving IM-0147-0001.jpeg : 16
Image moving IM-0149-0001.jpeg : 17
Image moving IM-0151-0001.jpeg : 18
Image moving IM-0152-0001.jpeg : 19
Image moving IM-0154-0001.jpeg : 20
Image moving IM-0156-0001.jpeg : 21
Image moving IM-0158-0001.jpeg : 22
Image moving IM-0160-0001.jpeg : 23
Image moving IM-0162-0001.jpeg : 24
Image moving IM-0164-0001.jpeg : 25
Image moving IM-0166-0001.jpeg : 26
Image moving IM-0168-0001.jpeg : 27
Im

#### Test Dataset

In [12]:
target_dir = './Dataset/Test/Covid'

if not os.path.exists(target_dir):
    os.mkdir(target_dir)
    print('Directory Created')
else:
    print('Already Present')

Directory Created


In [13]:
target_dir = './Dataset/Test/Normal'

if not os.path.exists(target_dir):
    os.mkdir(target_dir)
    print('Directory Created')
else:
    print('Already Present')

Directory Created


In [14]:
train_root = './Dataset/Train'
test_root = './Dataset/Test'

for i in os.listdir(train_root):
    print(i)
    img_names = os.listdir(os.path.join(train_root,i))
    test_size=int(len(img_names)*0.2)
    
    for j in range(test_size):
        img = img_names[-(j+1)]
        img_path = os.path.join(train_root,i,img)
        
        target_path = os.path.join(test_root,i)
        shutil.move(img_path,target_path)
        
    print(test_size)

Covid
28
Normal
40
