In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### here we will try everything just on test dataset

In [None]:
test = pd.read_csv('../input/vinbigdata-chest-xray-abnormalities-detection/sample_submission.csv')

In [None]:
test.head()

In [None]:
test.shape

In [None]:
test.image_id.describe()

### load model

In [None]:
# Import Densenet from Keras
from keras.applications.densenet import DenseNet121
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Model
from keras import backend as K

In [None]:
def build_densenet_model():
    # get input layer
    img_input = tf.keras.layers.Input(shape=(256, 256,1))

    # change shape for compatibility
    img_conc = tf.keras.layers.Concatenate()([img_input, img_input, img_input])     
    
    # load base model - transfer learned
    base_model = DenseNet121(weights='../input/densenet-weights-nih-coursera-ai4m/densenet.hdf5', include_top=False, input_tensor=img_conc)
    
    # see last layer - customise it
    x = base_model.output
    
    # Add a global spatial average pooling layer
    x_pool = GlobalAveragePooling2D()(x)

    # Add a logistic layer the same size as the number of classes you're trying to predict
    n_classes = 15

    predictions = Dense(n_classes, activation="sigmoid")(x_pool)
    print(f"Predictions have {n_classes} units, one for each class")

    # Create an updated model
    # model = Model(inputs=in1, outputs=predictions)
    model = Model(inputs=img_input, outputs=predictions)
    
    return model


In [None]:
import tensorflow as tf
# import tensorflow.keras.layers as L
import tensorflow.keras.backend as K


In [None]:
model2 = build_densenet_model()

In [None]:
! ls ../input/chest-x-ray-abnormalities-densenet-pipeline


In [None]:
model2.load_weights('../input/chest-x-ray-abnormalities-densenet-pipeline/model2.1_march28.hdf5')


In [None]:
model2

### now ready to predict on test images

In [None]:
test.head(20)

In [None]:
# filename = '004f33259ee4aef671c2b95d54e4be68'
# filename = '008bdde2af2462e86fd373a445d0f4cd'
# filename = '009bc039326338823ca3aa84381f17f1'
# filename = '013c169f9dad6f1f6485da961b9f7bf2'
filename = '01431a2618c0ace741e4e270a37e20b9'





In [None]:
# ! ls ../input/xraynumpy/images/test

In [None]:
path = '../input/xraynumpy/images/test/'

In [None]:
image = np.load(path + filename + '.npy')


In [None]:
image.shape

In [None]:
# model = model2

In [None]:
images = [image]

X = np.stack(images,axis=0)



In [None]:
op = model.predict(X)


In [None]:
op = op[0]

In [None]:
classes = ['Aortic enlargement',
 'Atelectasis',
 'Calcification',
 'Cardiomegaly',
 'Consolidation',
 'ILD',
 'Infiltration',
 'Lung Opacity',
 'Nodule/Mass',
 'Other lesion',
 'Pleural effusion',
 'Pleural thickening',
 'Pneumothorax',
 'Pulmonary fibrosis',
 'No finding']

In [None]:
op_list = []

for ix, y_pred in enumerate(list(op)):
    if y_pred > 0.5:
        y_pred = round(y_pred,2)
        print(ix, y_pred, classes[ix])
        op_tag = str(ix) + ' ' + str(y_pred) + ' 0 0 1 1'
        print('op_tag=', op_tag)
        op_list.append(op_tag)

In [None]:
op_list

In [None]:
op_str = ' '.join(op_list)

In [None]:
op_str

In [None]:
# 0 0.576 1150 703 1419 1019 14 0 0 0 1 1


### now run for each case

In [None]:
def my_predict(row):
    row_ix = row.name
    if row_ix % 100 == 0:
        print('done for', row_ix)
    
    filename = row.image_id
    path = '../input/xraynumpy/images/test/'
    image = np.load(path + filename + '.npy')
    
    images = [image]
    X = np.stack(images,axis=0)
    
    
    op = model.predict(X)
    op = op[0]

    op_list = []

    for ix, y_pred in enumerate(list(op)):
        if y_pred > 0.5:
            y_pred = round(y_pred,2)
#             print(ix, y_pred, classes[ix])
            op_tag = str(ix) + ' ' + str(y_pred) + ' 0 0 1 1'
#             print('op_tag=', op_tag)
            op_list.append(op_tag)

    op_str = ' '.join(op_list)
    return op_str
    
    

In [None]:
my_predict(test.iloc[0])

In [None]:
test.head(20)

In [None]:
test['op_str'] = test.apply(lambda row: my_predict(row), axis=1)

In [None]:
test.head(20)

In [None]:
test.columns

In [None]:
my_cols = ['image_id', 'op_str']

In [None]:
submission_df = test[my_cols]

In [None]:
submission_df.head(20)

In [None]:
submission_df.columns = ['image_id', 'PredictionString']

In [None]:
submission_df.head(20)

In [None]:
submission_filepath = str("submission.csv")

In [None]:
submission_df.to_csv(submission_filepath, index=False)

### merge v0 and v1

In [None]:
# submission_df

In [None]:
! ls ../input/bams-xray-results-v0-and-v1/

In [None]:
import pandas as pd

In [None]:
submission_v0 = pd.read_csv('../input/bams-xray-results-v0-and-v1/submission v0.csv')

In [None]:
submission_v1 = pd.read_csv('../input/bams-xray-results-v0-and-v1/submission v1.csv')

In [None]:
submission_v0.head()

In [None]:
submission_v1.head()

### clean some v0 predictions and see scores

In [None]:
def clean_row(row):
    original = row.PredictionString
    
#     print('original=', original)
    original_list = original.split(' ')
#     print('original_list=', original_list)
    
    assert len(original_list) % 6 == 0
    
    if len(original_list)  == 6:
        return original
    
    shortlist = []
    
    for j in range(0, len(original_list), 6):
        obj = original_list[j: j + 6]
        conf = float(obj[1])
        
        if conf > 0.5:
#             print('selecting obj=', obj)
            obj_str = ' '.join(obj)
            shortlist.append(obj_str)
#         else:
#             print('NOT selecting obj=', obj)

#     print('shortlist=', shortlist)
    shortlist_str = ' '.join(shortlist)
    
    return shortlist_str
    

In [None]:
row = submission_v0.iloc[2]

In [None]:
row

In [None]:
clean_row(row)

In [None]:
submission_v0['clean_PredictionString'] = submission_v0.apply(lambda row: clean_row(row),
                                                             axis=1)

In [None]:
submission_v0.head()

In [None]:
submission_filepath = str("submission_v0_clean.csv")

In [None]:
submission_v0.to_csv(submission_filepath, index=False)