In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import random
import tensorflow as tf
import tensorflow.keras.layers as L
import tensorflow.keras.backend as K
import joblib
import seaborn as sns
import pandas as pd
import cv2
import os
import matplotlib.pyplot as plt
from utilities_x_ray import read_xray,showXray
from tqdm import tqdm
import pydicom
from sklearn.model_selection import KFold
import pydicom as dicom
import warnings
warnings.filterwarnings("ignore")

In [None]:
def seedAll(seed=355):
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    random.seed(seed)
seedAll()

In [None]:
train = pd.read_csv('../input/vinbigdata-chest-xray-abnormalities-detection/train.csv')
ss = pd.read_csv('../input/vinbigdata-chest-xray-abnormalities-detection/sample_submission.csv')

In [None]:
train.head()

In [None]:
ss.head()

In [None]:
plt.figure(figsize=(8,10))
plt.imshow(read_xray('../input/vinbigdata-chest-xray-abnormalities-detection/train/0108949daa13dc94634a7d650a05c0bb.dicom'),cmap=plt.cm.bone)

In [None]:
showXray('../input/vinbigdata-chest-xray-abnormalities-detection/train/0108949daa13dc94634a7d650a05c0bb.dicom',train,with_boxes=True)

In [None]:
print("Number of rows in train dataframe: {}".format(train.shape[0]))
print("Number of Unique images in train set: {}".format(train.image_id.nunique()))
print("Number of Classes: {}\n".format(train.class_name.nunique()))
print("Class Names: {}".format(list(train.class_name.unique())))

In [None]:
print("Null Values:")
train.isna().sum().to_frame().rename(columns={0:'Null Value count'}).style.background_gradient('viridis')

In [None]:
plt.figure(figsize=(9,6))
sns.countplot(train["class_id"]);
plt.title("Class Distributions");

In [None]:
plt.figure(figsize=(9,6))
sns.countplot(train["rad_id"]);
plt.title("rad_id Distributions");

In [None]:
class_names = sorted(train.class_name.unique())
del class_names[class_names.index('No finding')]
class_names = class_names+['No finding']
classes = dict(zip(list(range(15)),class_names))

In [None]:
def prepareDataFrame(train_df= train):
    train_df = train_df.fillna(0)
    cols = ['image_id','label']+list(range(4*len(class_names[:-1])))
    return_df = pd.DataFrame(columns=cols)
    
    for image in tqdm(train_df.image_id.unique()):
        df = train_df.query("image_id==@image")
        label = np.zeros(15)
        for cls in df.class_id.unique():
            label[int(cls)]=1
        bboxes_df = df.groupby('class_id')[['x_min','y_min','x_max','y_max']].mean().round()
        
        bboxes_list = [0 for i in range(60)]
        for ind in list(bboxes_df.index):
            bboxes_list[4*ind:4*ind+4] = list(bboxes_df.loc[ind,:].values)
        return_df.loc[len(return_df),:] = [image]+[label]+bboxes_list[:-4]
    return return_df
train_df = prepareDataFrame()

In [None]:
train_df.head(2)

In [None]:
def generateFolds(n_splits = None):
    kf = KFold(n_splits= n_splits)
    for id,(tr_,val_) in enumerate(kf.split(train_df["image_id"],train_df["label"])):
        train_df.loc[val_,'kfold'] = int(id)
    train_df["kfold"].astype(int)

generateFolds(n_splits=5)

In [None]:
class DataLoader:
    def __init__(self,path = None,train_df=train_df,val_df=None):
        self.path = path
        self.df = train_df
        self.val_df = val_df
        self.train_list = [f'{img}.npy' for img in train_df["image_id"].unique()]
        np.random.shuffle(self.train_list)
        self.test_list = [f'{img}.npy' for img in val_df["image_id"].unique()]
        np.random.shuffle(self.test_list)
    
    def read_image(self):
        for img in self.train_list:
            im_name = img.split('.npy')[0]
            image = np.load(self.path+img)
            temp = self.df[self.df.image_id==im_name]
            c_label,bb = temp.iloc[0,1],temp.iloc[0,2:].values.astype('float')
            yield image,c_label,bb
    
    
    def batch_generator(self,items,batch_size):
        a=[]
        i=0
        for item in items:
            a.append(item)
            i+=1

            if i%batch_size==0:
                yield a
                a=[]
        if len(a) is not 0:
            yield a
            
    def flow(self,batch_size):
        """
        flow from given directory in batches
        ==========================================
        batch_size: size of the batch
        """
        while True:
            for bat in self.batch_generator(self.read_image(),batch_size):
                batch_images = []
                batch_c_labels = []
                batch_bb = []
                for im,im_c_label,im_bb in bat:
                    batch_images.append(im)
                    batch_c_labels.append(im_c_label)
                    batch_bb.append(im_bb)
                batch_images = np.stack(batch_images,axis=0)
                batch_labels =  (np.stack(batch_c_labels,axis=0),np.stack(batch_bb,axis=0))
                yield batch_images,batch_labels
    
    def getVal(self):
        images = []
        c_labels = []
        bb_labels = []
        for img in self.test_list:
            im_name = img.split('.npy')[0]
            image = np.load(self.path+img)
            temp = self.val_df[self.val_df.image_id==im_name]
            c_label,bb = temp.iloc[0,1],temp.iloc[0,2:].values.astype('float')
            images.append(image)
            c_labels.append(c_label)
            bb_labels.append(bb)
        return np.stack(images,axis=0),(np.stack(c_labels,axis=0),np.stack(bb_labels,axis=0))

In [None]:
def build():
    in1 = L.Input(shape=(256,256,1))
    
    out1 = L.Conv2D(32,(3,3),activation="relu")(in1)
    out1 = L.Conv2D(32,(3,3),activation="relu")(out1)
    out1 = L.MaxPooling2D((2,2))(out1)
    
    out1 = L.Conv2D(64,(3,3),activation="relu")(out1)
    out1 = L.Conv2D(64,(3,3),activation="relu")(out1)
    out1 = L.MaxPooling2D((2,2))(out1)
    
    out1 = L.Conv2D(128,(3,3),activation="relu")(out1)
    out1 = L.Conv2D(128,(3,3),activation="relu")(out1)
    out1 = L.MaxPooling2D((2,2))(out1)
    out1 = L.Flatten()(out1)
    
    out2 = L.Dense(50,activation="relu",kernel_initializer="lecun_normal")(out1)
    out2 = L.Dense(30,activation="relu",kernel_initializer="lecun_normal")(out2)
    out2 = L.Dense(15,activation="sigmoid",kernel_initializer="lecun_normal",name='class_out')(out2)
    
    out3 = L.Dense(50,activation="relu",kernel_initializer="lecun_normal")(out1)
    out3 = L.Dense(30,activation="relu",kernel_initializer="lecun_normal")(out3)
    out3 = L.Dense(56,activation="relu",kernel_initializer="lecun_normal",name="bb_out")(out3)
    
    model = tf.keras.Model(inputs=in1,outputs=[out2,out3])
    model.compile(loss={'class_out':'categorical_crossentropy','bb_out':'mse'},optimizer="adam")
    return model

In [None]:
model = build()

In [None]:
tf.keras.utils.plot_model(model)

In [None]:
def getTest(path=None):
    images = []
    for img in tqdm(os.listdir(path)):
        im_name = img.split('.npy')[0]
        image = np.load(path+img)
        images.append(image)
    return np.stack(images,axis=0)

X_test = getTest('../input/xraynumpy/images/test/')

In [None]:
class_label = np.zeros((len(X_test),15))
bb_label = np.zeros((len(X_test),56))

for fold in range(5):
    print(f'\nFold: {fold}\n')
    
    X_train = train_df[train_df.kfold!=fold].drop('kfold',axis=1)
    X_val = train_df[train_df.kfold==fold].drop('kfold',axis=1)
    
    dl = DataLoader('../input/xraynumpy/images/train/',X_train,X_val)
    train_set = dl.flow(batch_size=32)
    X_eval,Y_eval = dl.getVal()
    
    chckpt = tf.keras.callbacks.ModelCheckpoint(f'./model_f{fold}.hdf5',monitor='val_loss',mode='min',save_best_only=True)
    
    K.clear_session()
    model = build()
    
    model.fit(train_set,
             epochs=1,
              steps_per_epoch=int(15000/32),
              validation_data = (X_eval,Y_eval),
              callbacks = [chckpt]
             )
    
    c,b = model.predict(X_test)
    class_label+=c
    bb_label+=b
class_label = class_label/5
bb_label = bb_label/5
np.save('./class_label.npy',class_label)
np.save('./bb_label.npy',bb_label)

In [None]:
Y_test = model.predict(X_test)

In [None]:
joblib.dump(Y_test, 'y_test')

In [None]:
cls, b = Y_test

In [None]:
pred_labels = []

for lab in cls:
    lab = np.argmax(lab)
    pred_labels.append(lab)