In [3]:
import pandas as pd
import numpy as np
columns = ['Image', 'Atelectasis', 'Cardiomegaly', 'Effusion', 'Pneumothorax', 'Edema']


In [4]:
labels_col = ['Atelectasis', 'Cardiomegaly', 'Effusion', 'Pneumothorax', 'No Finding']

df = pd.read_csv('vin_label.csv')
df = df.drop(['Unnamed: 0'], axis=1)

new = pd.DataFrame()
new['Image'] = df['image_id'].unique()

for col in labels_col:
  tmp = df[df['class_name']==col].copy(deep=True)
  tmp['class_name'] = 1
  tmp = tmp.drop_duplicates()
  new = pd.merge(new, tmp, left_on='Image', right_on='image_id', how="left").rename(columns={'class_name': col})
  new = new.drop(['image_id'], axis=1)
  new[col] = new[col].fillna(0)
new['Image'] = new['Image'] + '.dcm'
new['Edema'] = 0
new = new[columns]

new.to_csv('data_vin.csv',index=False)
new.head()

Unnamed: 0,Image,Atelectasis,Cardiomegaly,Effusion,Pneumothorax,Edema
0,80caa435b6ab5edaff4a0a758ffaec6e.dcm,1.0,0.0,0.0,0.0,0
1,0622cd29e4e0e4f198abf15614819ae8.dcm,0.0,1.0,0.0,0.0,0
2,bd6eb525438d6da1ced0ed1810857772.dcm,0.0,0.0,1.0,0.0,0
3,25f2c7b53a6ed09a9aaf73c30357aaf6.dcm,0.0,1.0,0.0,0.0,0
4,f769eea17a2e7678f481f386c3c6261c.dcm,0.0,1.0,0.0,0.0,0


In [5]:
df = pd.read_csv('mimic_label.csv')
df = df.drop(['Unnamed: 0', 'study_id', 'ViewPosition', 'subject_id'], axis=1)
df = df.rename(columns={'dicom_id': 'Image'})
df['Image'] = df['Image'] + '.jpg'
df = df.replace(0.5, 0)
df = df[columns]

df.to_csv('data_mimic.csv',index=False)
df.head()

Unnamed: 0,Image,Atelectasis,Cardiomegaly,Effusion,Pneumothorax,Edema
0,02aa804e-bde0afdd-112c0b34-7bc16630-4e384014.jpg,0.0,0.0,0.0,0.0,0.0
1,68b5c4b1-227d0485-9cc38c3f-7b84ab51-4b472714.jpg,0.0,0.0,0.0,0.0,0.0
2,ea030e7a-2e3b1346-bc518786-7a8fd698-f673b44c.jpg,0.0,0.0,0.0,0.0,0.0
3,b75df1bd-0f22d631-52d73526-2ae7b85a-d843b39d.jpg,0.0,0.0,0.0,0.0,0.0
4,cfb03587-782edf6c-1bf392e1-98196cd5-365d69e8.jpg,0.0,0.0,0.0,0.0,0.0


In [6]:
df = pd.read_csv('nih_label.csv')
df = df[['Image Index', 'Finding Labels']]
arr = df['Finding Labels'].unique()
labels_col = set()
for i in arr: labels_col.update(i.split('|'))
df[list(labels_col)] = np.nan

def transform(row):
    findings = row['Finding Labels'].split('|')
    for i in findings:
        row[i] = 1
    return row
df = df.apply(transform, axis=1)

for i in labels_col:
    df[i] = df[i].fillna(0)
df = df.drop(['Finding Labels'], axis=1)
df = df.rename(columns={'Image Index': 'Image'})
df = df[columns]

df.to_csv('data_nih.csv',index=False)
df.head()

Unnamed: 0,Image,Atelectasis,Cardiomegaly,Effusion,Pneumothorax,Edema
0,00000001_001.png,0.0,1.0,0.0,0.0,0.0
1,00000011_000.png,0.0,0.0,1.0,0.0,0.0
2,00000011_006.png,1.0,0.0,0.0,0.0,0.0
3,00000013_002.png,0.0,0.0,0.0,1.0,0.0
4,00000013_004.png,0.0,0.0,1.0,1.0,0.0


In [7]:
import ast
df = pd.read_csv('padchest_label.csv')
df = df[['ImageID', 'Labels']]

arr = df['Labels'].unique()
labels_col = set()
for i in arr: labels_col.update(ast.literal_eval(i))
df[list(labels_col)] = np.nan


def transform(row):
    row['Labels'] = tuple(ast.literal_eval(row['Labels']))
    for i in row['Labels']:
        row[i] = 1
    return row
df = df.apply(transform, axis=1)

for i in labels_col:
    df[i] = df[i].fillna(0)
df = df.drop(['Labels'], axis=1)
df = df.rename(columns={'ImageID': 'Image'})
df = df[columns]

df.to_csv('data_padchest.csv',index=False)
df.head()

Unnamed: 0,Image,Atelectasis,Cardiomegaly,Effusion,Pneumothorax,Edema
0,20536686640136348236148679891455886468_k6ga29.png,0.0,0.0,0.0,0.0,0.0
1,3137231742710829928-247610802266403640553_kine...,1.0,0.0,1.0,0.0,0.0
2,238285621348398466668514178112618553012_a7k6dv...,0.0,0.0,0.0,0.0,0.0
3,152191969602076825998375638267191596461_ck9qkz...,0.0,0.0,0.0,0.0,0.0
4,84289138814897824414670894017743137660_xjsw7k.png,0.0,0.0,0.0,0.0,0.0


In [8]:
df = pd.read_csv('chula_label.csv')
df = df.drop(['Unnamed: 0'], axis=1)
df = df.rename(columns={'Image Index': 'Image'})
for i in df.columns:
    df[i] = df[i].fillna(0)
df = df[columns]

df.to_csv('data_chula.csv',index=False)
df.head()

Unnamed: 0,Image,Atelectasis,Cardiomegaly,Effusion,Pneumothorax,Edema
0,20210102CR0064.png,0.0,1.0,1.0,0.0,0.0
1,20210102CR0073.png,0.0,1.0,0.0,0.0,0.0
2,20210102CR0206.png,0.0,1.0,1.0,0.0,0.0
3,20210102CR0154.png,0.0,1.0,1.0,0.0,1.0
4,20210102CR0185.png,0.0,0.0,0.0,0.0,0.0
