In [None]:
import numpy as np
import pandas as pd
import os
import random, re, math
import tensorflow as tf, tensorflow.keras.backend as K
import tensorflow_addons as tfa
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from tensorflow.keras import optimizers
from kaggle_datasets import KaggleDatasets
from tensorflow.keras.models import Sequential
import tensorflow.keras.layers as L
from tensorflow.keras.applications import ResNet152V2, InceptionResNetV2, InceptionV3, Xception, VGG19
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D,GlobalMaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ReduceLROnPlateau , EarlyStopping , ModelCheckpoint , LearningRateScheduler
from keras import regularizers

import matplotlib.pyplot as plt

!pip install efficientnet
import efficientnet.tfkeras as efn

In [None]:
AUTO = tf.data.experimental.AUTOTUNE
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
#GCS_DS_PATH = KaggleDatasets().get_gcs_path('vinbigdata-512-image-dataset')
GCS_DS_PATH = KaggleDatasets().get_gcs_path('chest-xray-new')

In [None]:
#train = pd.read_csv('../input/final-csv-ving-big/vinbig.csv')
train = pd.read_csv('../input/chest-xray-csv/final.csv')
train_paths = train.class_name.apply(lambda x: GCS_DS_PATH+ '/train/' + x +'.jpg')
train_labels = train.class_name.values

In [None]:
train_labels

In [None]:
nb_classes = 2 
BATCH_SIZE = 2 * strategy.num_replicas_in_sync
img_size = 512
IMG =[512,512]
#img_size = 75
EPOCHS = 20
SEED = 123

In [None]:
train.head(50)

In [None]:
train=train.drop(columns=['Unnamed: 0','Unnamed: 0.1', 'Unnamed: 0.1.1','rad_id','x_min','y_min','x_max','y_max'],axis=1)

In [None]:
#train=train[((train['No finding']== 1) | (train['Cardiomegaly'] == 1) | (train['Pulmonary fibrosis'] == 1) | (train['Pleural effusion'] == 1) | (train['Atelectasis'] == 1) | (train['ILD'] == 1) | (train['Other lesion'] == 1) | (train['Nodule/Mass'] == 1) )]

In [None]:
train

In [None]:
train=train.drop(columns=['Pleural thickening','Pneumothorax','Aortic enlargement','Atelectasis','Calcification','Consolidation','ILD','Pulmonary fibrosis','Cardiomegaly','Infiltration','Lung Opacity','Nodule/Mass','Other lesion','Pleural effusion'],axis=1)

In [None]:
train=train[((train['No finding']== 1) | (train['Edema'] == 1) )]

In [None]:
train

In [None]:
train=train[train['image_id']!='desktop']

In [None]:
train,valid = train_test_split(train,test_size = 0.2,random_state = 42)

In [None]:
valid,test = train_test_split(valid,test_size = 0.5,random_state = 42)

In [None]:
def get_mat(rotation, shear, height_zoom, width_zoom, height_shift, width_shift):
    # returns 3x3 transformmatrix which transforms indicies
        
    # CONVERT DEGREES TO RADIANS
    rotation = math.pi * rotation / 180.
    shear = math.pi * shear / 180.
    
    # ROTATION MATRIX
    c1 = tf.math.cos(rotation)
    s1 = tf.math.sin(rotation)
    one = tf.constant([1],dtype='float32')
    zero = tf.constant([0],dtype='float32')
    rotation_matrix = tf.reshape( tf.concat([c1,s1,zero, -s1,c1,zero, zero,zero,one],axis=0),[3,3] )
        
    # SHEAR MATRIX
    c2 = tf.math.cos(shear)
    s2 = tf.math.sin(shear)
    shear_matrix = tf.reshape( tf.concat([one,s2,zero, zero,c2,zero, zero,zero,one],axis=0),[3,3] )    
    
    # ZOOM MATRIX
    zoom_matrix = tf.reshape( tf.concat([one/height_zoom,zero,zero, zero,one/width_zoom,zero, zero,zero,one],axis=0),[3,3] )
    
    # SHIFT MATRIX
    shift_matrix = tf.reshape( tf.concat([one,zero,height_shift, zero,one,width_shift, zero,zero,one],axis=0),[3,3] )
    
    return K.dot(K.dot(rotation_matrix, shear_matrix), K.dot(zoom_matrix, shift_matrix))

def transform(image, label):
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image randomly rotated, sheared, zoomed, and shifted
    DIM = IMG[0]
    XDIM = DIM%2 #fix for size 331
    
    rot = 15. * tf.random.normal([1],dtype='float32')
    shr = 5. * tf.random.normal([1],dtype='float32') 
    h_zoom = 1.0 + tf.random.normal([1],dtype='float32')/10.
    w_zoom = 1.0 + tf.random.normal([1],dtype='float32')/10.
    h_shift = 16. * tf.random.normal([1],dtype='float32') 
    w_shift = 16. * tf.random.normal([1],dtype='float32') 
  
    # GET TRANSFORMATION MATRIX
    m = get_mat(rot,shr,h_zoom,w_zoom,h_shift,w_shift) 
    # LIST DESTINATION PIXEL INDICES
    x = tf.repeat( tf.range(DIM//2,-DIM//2,-1), DIM )
    y = tf.tile( tf.range(-DIM//2,DIM//2),[DIM] )
    z = tf.ones([DIM*DIM],dtype='int32')
    idx = tf.stack( [x,y,z] )
    
    # ROTATE DESTINATION PIXELS ONTO ORIGIN PIXELS
    idx2 = K.dot(m,tf.cast(idx,dtype='float32'))
    idx2 = K.cast(idx2,dtype='int32')
    idx2 = K.clip(idx2,-DIM//2+XDIM+1,DIM//2)
    
    # FIND ORIGIN PIXEL VALUES           
    idx3 = tf.stack( [DIM//2-idx2[0,], DIM//2-1+idx2[1,]] )
    d = tf.gather_nd(image,tf.transpose(idx3))
        
    return tf.reshape(d,[DIM,DIM,3]), label

In [None]:
def decode_image(filename, label=None, image_size=(img_size,img_size)):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3) 
    image = tf.image.resize(image, image_size)
    image = tf.cast(image, tf.float32)
    image = tf.image.per_image_standardization(image)
    if label is None:
        return image
    else:
        return image, label
    
def preprocess(df,test=False):
    paths = df.image_id.apply(lambda x: GCS_DS_PATH+'/train/'+x+'.jpg').values
    labels = df.loc[:, ['No finding','Edema']].values
    if test==False:
        return paths,labels
    else:
        return paths
    
def data_augment(image, label=None, seed=SEED):
    image = tf.image.random_flip_left_right(image, seed=seed)
    image = tf.image.random_flip_up_down(image, seed=seed)
           
    if label is None:
        return image
    else:
        return image, label

In [None]:
train_dataset = (tf.data.Dataset
    .from_tensor_slices(preprocess(train))
    .map(decode_image, num_parallel_calls=AUTO)
    .repeat()
    .shuffle(512)
    .batch(BATCH_SIZE)
    .prefetch(AUTO)
    )

In [None]:
valid_dataset= (tf.data.Dataset
    .from_tensor_slices(preprocess(valid))
    .map(decode_image, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
    .cache()
    .prefetch(AUTO))

In [None]:
test_dataset = (tf.data.Dataset
    .from_tensor_slices(preprocess(test))
    .map(decode_image, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
    #.cache()
    .prefetch(AUTO))

In [None]:
valid

In [None]:
test

In [None]:
with strategy.scope():
    desnet=tf.keras.applications.DenseNet201(
        weights='imagenet',
        include_top=False
      
    )
    model1 = tf.keras.Sequential([
        desnet,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(2, activation='sigmoid')
    ]) 

    model1.compile(
                    optimizer=tf.optimizers.Adam(),
                    loss='binary_crossentropy',
                    metrics=['binary_accuracy',
                            tf.keras.metrics.Recall(),
                            tf.keras.metrics.Precision(),   
                            tf.keras.metrics.AUC(),
                            tfa.metrics.F1Score(num_classes=2, average="macro")
                           ])

In [None]:
with strategy.scope():
    enet = efn.EfficientNetB4(
        input_shape=(img_size, img_size, 3),
        weights='noisy-student',
        include_top=False
    )

    model2 = tf.keras.Sequential([
        enet,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(2, activation='sigmoid')
    ]) 

    model2.compile(
                    optimizer=tf.optimizers.Adam(),
                    loss='binary_crossentropy',
                    metrics=['binary_accuracy',
                            tf.keras.metrics.Recall(),
                            tf.keras.metrics.Precision(),   
                            tf.keras.metrics.AUC(),
                            tfa.metrics.F1Score(num_classes=2, average="macro")
                           ])


In [None]:
with strategy.scope():
    enet = efn.EfficientNetB5(
        input_shape=(img_size, img_size, 3),
        weights='noisy-student',
        include_top=False
    )

    model3 = tf.keras.Sequential([
        enet,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(2, activation='sigmoid')
    ]) 

    model3.compile(
                    optimizer=tf.optimizers.Adam(),
                    loss='binary_crossentropy',
                    metrics=['binary_accuracy',
                            tf.keras.metrics.Recall(),
                            tf.keras.metrics.Precision(),   
                            tf.keras.metrics.AUC(),
                            tfa.metrics.F1Score(num_classes=2, average="macro")
                           ])

In [None]:
model11 = tf.keras.Sequential()
for layer in model1.layers[:-2]:
    model11.add(layer)
for layer in model11.layers:
    layer.trainable = False
model22 = tf.keras.Sequential()
for layer in model2.layers[:-2]:
    model22.add(layer)
for layer in model22.layers:
    layer.trainable = False
model33 = tf.keras.Sequential()
for layer in model3.layers[:-2]:
    model33.add(layer)
for layer in model33.layers:
    layer.trainable = False

In [None]:
with strategy.scope():
    
    x = tf.keras.Input(shape = (512, 512, 3))
    x1 = model11(x)
    x2 = model22(x)
    x3 = model33(x)
    x5 = tf.keras.layers.concatenate([x1, x2, x3], axis = 3)
    x6 = tf.keras.layers.GlobalAveragePooling2D()(x5)
    x6 = tf.keras.layers.Dropout(0.75)(x6)
    x6 = tf.keras.layers.Dense(2, activation='sigmoid')(x6)
    out = tf.keras.Model(inputs = x, outputs = x6)

    out.compile(
        optimizer=tf.keras.optimizers.Adam(lr=0.0001),
        loss = 'binary_crossentropy',
        metrics=[tfa.metrics.F1Score(num_classes=2, average="macro")]
    )
out.summary()


In [None]:
#%%time
h7=out.fit(
    train_dataset, 
    steps_per_epoch=200,
    validation_data=valid_dataset,
    epochs=50
)

In [None]:
'''import seaborn as sns
sns.set()
fig = plt.figure(0, (12, 4))

ax = plt.subplot(1, 2, 1)
sns.lineplot(h7.epoch,h7.history['accuracy'], label = 'train')
plt.title('Accuracy')
plt.tight_layout()

ax = plt.subplot(1, 2, 2)
sns.lineplot(h7.epoch,h7.history['loss'], label = 'train')
plt.title('Loss')
plt.tight_layout()
plt.show()'''

In [None]:
try : 
    
    ef7.evaluate(test_dataset)
except:
    pass

In [None]:
'''class_accuracies = []
for class_ in np.unique(y_true):
    class_acc = np.mean(y_pred[y_true == class_] == class_)
    class_acuracies.append(class_acc)'''

In [None]:
from sklearn.metrics import confusion_matrix
classes=['No finding','Edema']
Y_pred = ef7.predict(test_dataset)
true_classes = test.loc[:, ['No finding','Edema']].values
print('Confusion Matrix')
cm=confusion_matrix(true_classes.argmax(axis=1),Y_pred.argmax(axis=1))
cm

In [None]:
from sklearn.metrics import classification_report
y_true = true_classes.argmax(axis=1)
y_pred = Y_pred.argmax(axis=1)
target_names = classes
print(classification_report(y_true, y_pred, target_names=target_names))

In [None]:
import seaborn as sns
sns.set_style("darkgrid")
import itertools
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Oranges):
    plt.figure(figsize=(6,6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        cm = np.around(cm, decimals=2)
        cm[np.isnan(cm)] = 0.0
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
plot_confusion_matrix(cm,classes)

In [None]:
def calculate_sensitivity_specificity(y_test, y_pred_test):
    actual_pos = y_test == 1
    actual_neg = y_test == 0
    
    true_pos = (y_pred_test == 1) & (actual_pos)
    false_pos = (y_pred_test == 1) & (actual_neg)
    true_neg = (y_pred_test == 0) & (actual_neg)
    false_neg = (y_pred_test == 0) & (actual_pos)
    
    # Calculate sensitivity and specificity
    sensitivity = np.sum(true_pos) / np.sum(actual_pos)
    specificity = np.sum(true_neg) / np.sum(actual_neg)
    
    return sensitivity, specificity

In [None]:
sensitivity, specificity= calculate_sensitivity_specificity(true_classes.argmax(axis=1),Y_pred.argmax(axis=1))
print ('Sensitivity:', sensitivity)
print ('Specificity:', specificity)