In [1]:
cd /content/drive/My Drive/Projects/CoronaVirus detection /Features on CXR using Limited Training DataSet/data

/content/drive/My Drive/Projects/CoronaVirus detection /Features on CXR using Limited Training DataSet/data


In [0]:
import os
import shutil
import math
import cv2
from PIL import Image
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#1. Images

Proceesing JSRT IMG files

In [0]:
#os.mkdir('./Images/JSRT SCR/JSRT_PNG')
shape = (2048 , 2048)# default shape of IMG file
dtype = np.dtype('>u2') # big-endian unsigned integer (16bit)

for f in os.listdir('./Images/JSRT SCR/JSRT'):
    output_filename = './Images/JSRT SCR/JSRT_PNG/'
    # Reading.
    with open('./Images/JSRT SCR/JSRT/' + f, 'rb') as fid:
      data = np.fromfile(fid, dtype)
    #processing
    data = (data * 1.0 / np.max(data))
    data = (1.0 - data)*255.0
    image = np.array(data, dtype='uint8').reshape(shape)
    #saving
    print(np.max(image), np.min(image), f)
    cv2.imwrite(output_filename + os.path.splitext(f)[0]+'.png', image)

## 1.1 Reading the image metadata

### 1.1.1 JSRT/SCR files

In [0]:
nodule_files, non_nodule_files = [], []

with open('./Images/JSRT SCR/CLNDAT_EN.txt') as file:
  for line in file:
    #Adding the PNG extention instead of IMG 
    file_name = line.split('\t')[0]
    new_file_name =os.path.splitext(file_name)[0] + '.PNG'
    nodule_files.append(new_file_name)

with open('./Images/JSRT SCR/CNNDAT_EN.TXT') as file:
  for line in file:
    #Adding the PNG extention instead of IMG 
    file_name = line.split(' ')[0]
    new_file_name =os.path.splitext(file_name)[0] + '.PNG'
    non_nodule_files.append(new_file_name)

nodule_files.remove('\n.PNG')

In [0]:
len(nodule_files), len(non_nodule_files)

(154, 93)

In [0]:
nodule_files[-5:], non_nodule_files[-5:]

(['JPCLN150.PNG',
  'JPCLN151.PNG',
  'JPCLN152.PNG',
  'JPCLN153.PNG',
  'JPCLN154.PNG'],
 ['JPCNN089.PNG',
  'JPCNN090.PNG',
  'JPCNN091.PNG',
  'JPCNN092.PNG',
  'JPCNN093.PNG'])

###1.1.2 Montgomery

In [0]:
tb, montgomery_normal = [], []

for f in os.listdir('./Images/Montgomery/CXR'):
  if(os.path.splitext(f)[1] != '.png'):
    continue
  
  label = f.split('.')[0].split('_')[-1]

  if(label == '0'):
    montgomery_normal.append(f)
  else:
    tb.append(f)

In [0]:
len(tb), len(montgomery_normal)

(58, 80)

In [0]:
tb[-5:]

['MCUCXR_0383_1.png',
 'MCUCXR_0387_1.png',
 'MCUCXR_0390_1.png',
 'MCUCXR_0393_1.png',
 'MCUCXR_0399_1.png']

###1.1.3 Corornahack

In [0]:
meta_data = pd.read_csv('./Images/Coronahack/Chest_xray_Corona_Metadata.csv')

coronahack_pnemonia = meta_data[meta_data['Label'] == 'Pnemonia']
coronahack_normal = meta_data[meta_data['Label'] == 'Normal']['X_ray_image_name']

coronahack_pnemonia_virus = coronahack_pnemonia[coronahack_pnemonia['Label_1_Virus_category'] == 'Virus']
coronahack_pnemonia_bacteria = coronahack_pnemonia[coronahack_pnemonia['Label_1_Virus_category'] == 'bacteria']['X_ray_image_name']

coronahack_pnemonia_virus_other = coronahack_pnemonia_virus[coronahack_pnemonia_virus['Label_2_Virus_category'].isnull()]['X_ray_image_name'].values.tolist()
coronahack_pnemonia_virus_covid = coronahack_pnemonia_virus[coronahack_pnemonia_virus['Label_2_Virus_category'] == 'COVID-19']['X_ray_image_name'].values.tolist()
coronahack_pnemonia_virus_sars = coronahack_pnemonia_virus[coronahack_pnemonia_virus['Label_2_Virus_category'] == 'SARS']['X_ray_image_name'].values.tolist()

In [0]:
len(coronahack_normal), len(coronahack_pnemonia_bacteria), len(coronahack_pnemonia_virus_other), len(coronahack_pnemonia_virus_covid), len(coronahack_pnemonia_virus_sars)

(1576, 2777, 1493, 58, 4)

In [0]:
coronahack_normal[-5:]

5515    NORMAL2-IM-0341-0001.jpeg
5516    NORMAL2-IM-0340-0001.jpeg
5517    NORMAL2-IM-0339-0001.jpeg
5518    NORMAL2-IM-0338-0001.jpeg
5519    NORMAL2-IM-0337-0001.jpeg
Name: X_ray_image_name, dtype: object

###1.1.4 CovidGithub

In [0]:
metadata = pd.read_csv('./Images/Covid-github/metadata.csv')
metadata = metadata[metadata['view'].isin(['PA', 'AP'])]
github_pnemonia_covid = metadata[metadata['finding'].isin(['COVID-19'])]['filename'].values.tolist()
github_pnemonia_virus = metadata[metadata['finding'].isin(['SARS', 'ARDS'])]['filename'].values.tolist()
github_pnemonia_bacteria = metadata[metadata['finding'].isin(['Streptococcus', 'Chlamydophila', 'Klebsiella', 'Legionella', 'Pneumocystis'])]['filename'].values.tolist()
github_normal = metadata[metadata['finding'].isin(['No Finding'])]['filename'].values.tolist()

In [0]:
len(github_pnemonia_bacteria), len(github_pnemonia_virus), len(github_pnemonia_covid), len(github_normal)

(29, 20, 198, 5)

In [0]:
github_pnemonia_covid[-5:]

['16747_3_1.jpg',
 '16745_4_1.png',
 '16745_5_1.png',
 '16745_7_1.png',
 '16744_1_1.jpg']

##1.2 Combining the image meatadata

Combining the image files and adding labels as mentioned in the paper

In [0]:
df = pd.DataFrame(columns=['filename', 'folder', 'dataset', 'condition'])

for i in nodule_files:
  df = df.append({'filename': i, 'folder': 'JSRT SCR/JSRT_PNG', 'dataset':'JSRT/SCR', 'condition':'Nodule'}, ignore_index=True)

for i in non_nodule_files:
  df = df.append({'filename': i, 'folder':'JSRT SCR/JSRT_PNG', 'dataset':'JSRT/SCR', 'condition':'Normal'},ignore_index=True)

for i in montgomery_normal:
  df = df.append({'filename': i, 'folder':'Montgomery/CXR', 'dataset':'NLM(MG)', 'condition':'Normal'},ignore_index=True)

for i in tb:
  df = df.append({'filename': i, 'folder':'Montgomery/CXR', 'dataset':'NLM(MG)', 'condition':'Tb'},ignore_index=True)

for i in coronahack_normal:
  df = df.append({'filename': i, 'folder':'Coronahack/CXR', 'dataset':'Coronahack', 'condition':'Normal'},ignore_index=True)

for i in coronahack_pnemonia_bacteria:
  df = df.append({'filename': i, 'folder':'Coronahack/CXR', 'dataset':'Coronahack', 'condition':'Bacterial Pneumonia'},ignore_index=True)

for i in coronahack_pnemonia_virus_covid:
  df = df.append({'filename': i, 'folder':'Coronahack/CXR', 'dataset':'Coronahack', 'condition':'COVID-19 Pneumonia'},ignore_index=True)

for i in coronahack_pnemonia_virus_other:
  df = df.append({'filename': i, 'folder':'Coronahack/CXR', 'dataset':'Coronahack', 'condition':'Viral Pneumonia'},ignore_index=True)

for i in github_pnemonia_covid:
  df = df.append({'filename': i, 'folder':'Covid-github/images', 'dataset':'Github', 'condition':'COVID-19 Pneumonia'},ignore_index=True)

for i in github_pnemonia_bacteria:
  df = df.append({'filename': i, 'folder':'Covid-github/images', 'dataset':'Github', 'condition':'Bacterial Pneumonia'},ignore_index=True)

for i in github_pnemonia_virus:
  df = df.append({'filename': i, 'folder':'Covid-github/images', 'dataset':'Github', 'condition':'Viral Pneumonia'},ignore_index=True)

for i in github_normal:
  df = df.append({'filename': i, 'folder':'Covid-github/images', 'dataset':'Github', 'condition':'Normal'},ignore_index=True)

In [0]:
df.set_index('filename', drop=True, append=False, inplace=True, verify_integrity=False)
len(df)

6541

In [0]:
df.head()

Unnamed: 0_level_0,folder,dataset,condition
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
JPCLN001.PNG,JSRT SCR/JSRT_PNG,JSRT/SCR,Nodule
JPCLN002.PNG,JSRT SCR/JSRT_PNG,JSRT/SCR,Nodule
JPCLN003.PNG,JSRT SCR/JSRT_PNG,JSRT/SCR,Nodule
JPCLN004.PNG,JSRT SCR/JSRT_PNG,JSRT/SCR,Nodule
JPCLN005.PNG,JSRT SCR/JSRT_PNG,JSRT/SCR,Nodule


In [0]:
df.to_csv('Images_metadata.csv')

#2. Masks

##2.1 Reading and processing masks 

###2.1.1 JSRT/SCR

Moving all the files in the same folder

In [0]:
for f in os.listdir('./Images/JSRT SCR/SCR/fold1/masks/right lung/'):
  os.rename('./Images/JSRT SCR/SCR/fold1/masks/right lung/'+f, './Images/JSRT SCR/SCR/fold2/masks/right lung/'+f)

for f in os.listdir('./Images/JSRT SCR/SCR/fold1/masks/left lung/'):
  os.rename('./Images/JSRT SCR/SCR/fold1/masks/left lung/'+f, './Images/JSRT SCR/SCR/fold2/masks/left lung/'+f)

for f in os.listdir('./Images/JSRT SCR/SCR/fold1/masks/right clavicle/'):
  os.rename('./Images/JSRT SCR/SCR/fold1/masks/right clavicle/'+f, './Images/JSRT SCR/SCR/fold2/masks/right clavicle/'+f)

for f in os.listdir('./Images/JSRT SCR/SCR/fold1/masks/left clavicle/'):
  os.rename('./Images/JSRT SCR/SCR/fold1/masks/left clavicle/'+f, './Images/JSRT SCR/SCR/fold2/masks/left clavicle/'+f)

for f in os.listdir('./Images/JSRT SCR/SCR/fold1/masks/heart/'):
  os.rename('./Images/JSRT SCR/SCR/fold1/masks/heart/'+f, './Images/JSRT SCR/SCR/fold2/masks/heart/'+f)

for f in os.listdir('./Images/JSRT SCR/SCR/fold1/landmarks/'):
  os.rename('./Images/JSRT SCR/SCR/fold1/landmarks/'+f, './Images/JSRT SCR/SCR/fold2/landmarks/'+f)

for f in os.listdir('./Images/JSRT SCR/SCR/fold1/points/'):
  os.rename('./Images/JSRT SCR/SCR/fold1/points/'+f, './Images/JSRT SCR/SCR/fold2/points/'+f)

Left lung Segmentation + Right lung Segmentation 

In [0]:
os.mkdir('./Images/JSRT SCR/SCR/Complete Lung Mask')

for f in os.listdir('./Images/JSRT SCR/SCR/fold2/masks/right lung/'):
  left_lung = Image.open('./Images/JSRT SCR/SCR/fold2/masks/left lung/' + f)
  right_lung = Image.open('./Images/JSRT SCR/SCR/fold2/masks/right lung/' + f)

  complete = Image.fromarray(np.asarray(left_lung) + np.asarray(right_lung))
  complete.save('./Images/JSRT SCR/SCR/Complete Lung Mask/'+ os.path.splitext(f)[0]+'.PNG')

In [0]:
SCR_lung_seg = [f for f in os.listdir('./Images/JSRT SCR/SCR/Complete Lung Mask/')]

In [0]:
len(SCR_lung_seg)

247

In [0]:
SCR_lung_seg[-5:]

['JPCNN086.PNG',
 'JPCNN084.PNG',
 'JPCNN088.PNG',
 'JPCNN090.PNG',
 'JPCNN092.PNG']

###2.1.2 Montgomery

In [0]:
for f in os.listdir('./Images/Montgomery/Mask/leftMask/'):
  if(os.path.splitext(f)[-1] == '.db'):
    continue

  left_lung = Image.open('./Images/Montgomery/Mask/leftMask/' + f)
  right_lung = Image.open('./Images/Montgomery/Mask/rightMask/' + f)

  complete = Image.fromarray(np.asarray(left_lung) + np.asarray(right_lung))
  complete.save('./Images/Montgomery/Mask/Complete Lung Mask/'+f)

In [0]:
MG_lung_seg = [f for f in os.listdir('./Images/Montgomery/Mask/Complete Lung Mask/')]

In [0]:
len(MG_lung_seg)

138

In [0]:
MG_lung_seg[:5]

['MCUCXR_0001_0.png',
 'MCUCXR_0003_0.png',
 'MCUCXR_0002_0.png',
 'MCUCXR_0004_0.png',
 'MCUCXR_0006_0.png']

##2.2 Combining metadata

Metadata is same as the image files

In [0]:
df = pd.DataFrame(columns=['filename', 'folder', 'dataset', 'condition'])

for i in nodule_files:
  df = df.append({'filename': i, 'folder': 'JSRT SCR/SCR/Complete Lung Mask', 
                  'dataset':'JSRT/SCR', 'condition':'Nodule'}, ignore_index=True)

for i in non_nodule_files:
  df = df.append({'filename': i, 'folder':'JSRT SCR/SCR/Complete Lung Mask', 
                  'dataset':'JSRT/SCR', 'condition':'Normal'},ignore_index=True)

for i in montgomery_normal:
  df = df.append({'filename': i, 'folder':'Montgomery/Mask/Complete Lung Mask', 
                  'dataset':'NLM(MG)', 'condition':'Normal'},ignore_index=True)

for i in tb:
  df = df.append({'filename': i, 'folder':'Montgomery/Mask/Complete Lung Mask', 
                  'dataset':'NLM(MG)', 'condition':'Tb'},ignore_index=True)

In [0]:
df.set_index('filename', drop=True, append=False, inplace=True, verify_integrity=False)
len(df)

385

In [0]:
df.to_csv('Masks_metadata.csv')

#3. Make train, dev, test split

#### Read data

In [0]:
mask_df = pd.read_csv('Masks_metadata.csv')
image_df = pd.read_csv('Images_metadata.csv')

##3.1 Segmentation data split

###3.1.1 JSRT SCR

In [0]:
JSRT_mask = mask_df[mask_df['dataset'] == 'JSRT/SCR']
JSRT_image = image_df[image_df['dataset'] == 'JSRT/SCR']

In [0]:
train_jsrt_image = JSRT_image.sample(frac=0.8, replace=False)
val_jsrt_image = JSRT_image.drop(train_jsrt_image.index)

len(train_jsrt_image), len(val_jsrt_image)

(198, 49)

In [0]:
print(train_jsrt_image.head())
print(val_jsrt_image.head())

         filename             folder   dataset condition
99   JPCLN100.PNG  JSRT SCR/JSRT_PNG  JSRT/SCR    Nodule
93   JPCLN094.PNG  JSRT SCR/JSRT_PNG  JSRT/SCR    Nodule
182  JPCNN029.PNG  JSRT SCR/JSRT_PNG  JSRT/SCR    Normal
55   JPCLN056.PNG  JSRT SCR/JSRT_PNG  JSRT/SCR    Nodule
46   JPCLN047.PNG  JSRT SCR/JSRT_PNG  JSRT/SCR    Nodule
        filename             folder   dataset condition
17  JPCLN018.PNG  JSRT SCR/JSRT_PNG  JSRT/SCR    Nodule
24  JPCLN025.PNG  JSRT SCR/JSRT_PNG  JSRT/SCR    Nodule
26  JPCLN027.PNG  JSRT SCR/JSRT_PNG  JSRT/SCR    Nodule
31  JPCLN032.PNG  JSRT SCR/JSRT_PNG  JSRT/SCR    Nodule
34  JPCLN035.PNG  JSRT SCR/JSRT_PNG  JSRT/SCR    Nodule


In [0]:
train_jsrt_mask = JSRT_mask[JSRT_mask['filename'].isin(train_jsrt_image['filename'])]
val_jsrt_mask = JSRT_mask.drop(train_jsrt_mask.index)

len(train_jsrt_mask), len(val_jsrt_mask)

(198, 49)

In [0]:
print(train_jsrt_mask.head())
print(val_jsrt_mask.head())

       filename                           folder   dataset condition
0  JPCLN001.PNG  JSRT SCR/SCR/Complete Lung Mask  JSRT/SCR    Nodule
1  JPCLN002.PNG  JSRT SCR/SCR/Complete Lung Mask  JSRT/SCR    Nodule
2  JPCLN003.PNG  JSRT SCR/SCR/Complete Lung Mask  JSRT/SCR    Nodule
3  JPCLN004.PNG  JSRT SCR/SCR/Complete Lung Mask  JSRT/SCR    Nodule
4  JPCLN005.PNG  JSRT SCR/SCR/Complete Lung Mask  JSRT/SCR    Nodule
        filename                           folder   dataset condition
17  JPCLN018.PNG  JSRT SCR/SCR/Complete Lung Mask  JSRT/SCR    Nodule
24  JPCLN025.PNG  JSRT SCR/SCR/Complete Lung Mask  JSRT/SCR    Nodule
26  JPCLN027.PNG  JSRT SCR/SCR/Complete Lung Mask  JSRT/SCR    Nodule
31  JPCLN032.PNG  JSRT SCR/SCR/Complete Lung Mask  JSRT/SCR    Nodule
34  JPCLN035.PNG  JSRT SCR/SCR/Complete Lung Mask  JSRT/SCR    Nodule


### 3.1.2 Montgomery 

In [0]:
MG_mask = mask_df[(mask_df['dataset'] == 'NLM(MG)') & (mask_df['condition'] == 'Normal')]
MG_image = image_df[(image_df['dataset'] == 'NLM(MG)') & (image_df['condition'] == 'Normal')]

In [0]:
val_mg_image = MG_image.sample(frac=0.91, replace=False)
val_mg_mask = MG_mask[MG_mask['filename'].isin(val_mg_image['filename'])]

len(val_mg_image), len(val_mg_mask)

(73, 73)

### 3.1.3 Combining data

In [0]:
train = pd.merge(train_jsrt_image, train_jsrt_mask, on=['filename', 'dataset', 'condition'], how='inner')

df_jsrt_val = pd.merge(val_jsrt_image, val_jsrt_mask, on=['filename', 'dataset', 'condition'], how='inner')
df_mg_val = pd.merge(val_mg_image, val_mg_mask, on=['filename', 'dataset', 'condition'], how='inner')
val = pd.concat([df_jsrt_val, df_mg_val], ignore_index=True)

train.columns = ['filename', 'image_folder', 'dataset', 'condition', 'mask_folder']
val.columns = ['filename', 'image_folder', 'dataset', 'condition', 'mask_folder']
print(len(train), len(val))

198 122


In [0]:
val.tail()

Unnamed: 0,filename,image_folder,dataset,condition,mask_folder
117,MCUCXR_0097_0.png,Montgomery/CXR,NLM(MG),Normal,Montgomery/Mask/Complete Lung Mask
118,MCUCXR_0028_0.png,Montgomery/CXR,NLM(MG),Normal,Montgomery/Mask/Complete Lung Mask
119,MCUCXR_0027_0.png,Montgomery/CXR,NLM(MG),Normal,Montgomery/Mask/Complete Lung Mask
120,MCUCXR_0005_0.png,Montgomery/CXR,NLM(MG),Normal,Montgomery/Mask/Complete Lung Mask
121,MCUCXR_0024_0.png,Montgomery/CXR,NLM(MG),Normal,Montgomery/Mask/Complete Lung Mask


In [0]:
train.to_csv('Seg_train.csv')
val.to_csv('Seg_val.csv')

In [0]:
pd.concat([train, val], ignore_index=True).to_csv('combined_seg.csv')

##3.2 Classification data split

Select adult patients from Coronahack dataset

In [0]:
coronahack_pnemonia_bacteria = image_df[(image_df["dataset"] == "Coronahack") & (image_df["condition"] == "Bacterial Pneumonia")]

JSRT_normal = image_df[(image_df["dataset"] == 'JSRT/SCR') & (image_df["condition"] == 'Normal')]

NLM_normal = image_df[(image_df["dataset"] == 'NLM(MG)') & (image_df["condition"] == 'Normal')]
NLM_tb = image_df[(image_df["dataset"] == 'NLM(MG)') & (image_df["condition"] == 'Tb')]

github_bacteria = image_df[(image_df["dataset"] == 'Github') & (image_df["condition"] == 'Bacterial Pneumonia')]
github_virus = image_df[(image_df["dataset"] == 'Github') & (image_df["condition"] == 'Viral Pneumonia')]
github_covid = image_df[(image_df["dataset"] == 'Github') & (image_df["condition"] == 'COVID-19 Pneumonia')]
github_normal = image_df[(image_df["dataset"] == 'Github') & (image_df["condition"] == 'Normal')]

In [0]:
adult_patients = []
data = coronahack_pnemonia_bacteria.values
np.random.shuffle(data) 
for patient in data:
  path = os.path.join('./Images',patient[1], patient[0])
  print(path)
  img = cv2.imread(path)
  print(img.shape)
  plt.imshow(img)    
  plt.show()

  add = input("add")

  if(add == 'y'):
    adult_patients.append(patient)

In [0]:
adult_patients = np.asarray(adult_patients)

In [0]:
coronahack_adult_bacteria = image_df[(image_df["filename"].isin(adult_patients[:, 0])) & (image_df["dataset"] == 'Coronahack')]

In [0]:
len(coronahack_adult_bacteria), len(JSRT_normal), len(NLM_normal), len(NLM_tb), len(github_bacteria), len(github_virus), len(github_covid), len(github_normal) 

(29, 93, 80, 58, 29, 20, 198, 5)

In [0]:
normal_combined = pd.concat([JSRT_normal, NLM_normal, github_normal], ignore_index=True)
bacterial_combined = pd.concat([coronahack_adult_bacteria, github_bacteria], ignore_index=True)

In [0]:
train_normal, val_normal, test_normal = np.split(normal_combined.sample(frac=1), [int(.7*len(normal_combined)), int(.8*len(normal_combined))])

train_tb, val_tb, test_tb = np.split(NLM_tb.sample(frac=1), [int(.7*len(NLM_tb)), int(.8*len(NLM_tb))])

train_bacteria, val_bacteria, test_bacteria = np.split(bacterial_combined.sample(frac=1), [int(.7*len(bacterial_combined)), int(.8*len(bacterial_combined))])

train_virus, val_virus, test_virus = np.split(github_virus.sample(frac=1), [int(.7*len(github_virus)), int(.8*len(github_virus))])

train_covid, val_covid, test_covid = np.split(github_covid.sample(frac=1), [int(.7*len(github_covid)), int(.8*len(github_covid))])

In [0]:
train = pd.concat([train_normal, train_tb, train_bacteria, train_virus, train_covid], ignore_index=True)
val = pd.concat([val_normal, val_tb, val_bacteria, val_virus, val_covid], ignore_index=True)
test = pd.concat([test_normal, test_tb, test_bacteria, test_virus, test_covid], ignore_index=True)

In [0]:
train.columns

Index(['filename', 'folder', 'dataset', 'condition'], dtype='object')

In [0]:
for df in [train, val, test]:
  df['condition'] = df['condition'].map(lambda x: x.replace('COVID-19 Pneumonia', 'Viral Pneumonia')) #Viral pneumonia and COVID in same category
  
  for c_label in ['Normal', 'Bacterial Pneumonia', 'Viral Pneumonia', 'Tb']:
    df[c_label] = df['condition'].map(lambda finding: 1 if c_label in finding else 0)

In [0]:
for df in [train, val, test]:
  df.rename(columns={'folder': 'image_folder'}, inplace=True)
  df['mask_folder'] = 'All masks'

In [0]:
train.to_csv('Class_train.csv')
val.to_csv('Class_val.csv')
test.to_csv('Class_test.csv')

In [0]:
pd.concat([train, test, val], ignore_index=True).to_csv('combined.csv')

In [0]:
os.makedirs('./Images/classification images')

dat = pd.read_csv('../splits/Classification/combined.csv')
for filename, src in zip(dat['filename'].values, dat['image_folder'].values):
  shutil.copyfile(os.path.join('./Images',src, filename), os.path.join('./Images','classification images', filename))

In [4]:
len(os.listdir('./Images/classification images')) == len(os.listdir('./Images/classification masks'))

True