In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.applications.vgg19 import VGG19, preprocess_input
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Input

In [None]:
def pre_process_dataset_train_n_valid(file_name):
    try:
        # reading input csv file
        train_full_df = pd.read_csv(file_name, index_col = False)
        
        # Spliting the data into training and validation
        train_df, validation_df = train_test_split(train_full_df, test_size = 0.2, random_state =420, shuffle = True)
        
        # Handling NaN values
        train_df = train_df.fillna(0)
        validation_df = validation_df.fillna(0)
        
        # Handling -1 values and replacing with 0
        train_df = train_df.replace(-1,0)
        validation_df = validation_df.replace(-1,0)
        
        # Re-ordering the columns
        reordered_columns = ["Path","Atelectasis","Cardiomegaly","Consolidation","Edema","Pleural Effusion",
                            "Pleural Other","Pneumonia","Pneumothorax","Enlarged Cardiomediastinum",
                            "Lung Opacity","Lung Lesion","Fracture","Support Devices","No Finding"]
        
        train_df = train_df[reordered_columns]
        validation_df = validation_df[reordered_columns]
        
        # Converting Datatype of the columns to int type
        train_df[['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema','Pleural Effusion', 'Pleural Other', 'Pneumonia',
                  'Pneumothorax','Enlarged Cardiomediastinum', 'Lung Opacity', 'Lung Lesion', 'Fracture', 'Support Devices', 'No Finding']] = train_df[['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema','Pleural Effusion', 'Pleural Other', 'Pneumonia', 'Pneumothorax','Enlarged Cardiomediastinum',
                                                                                                                                                          'Lung Opacity', 'Lung Lesion', 'Fracture','Support Devices', 'No Finding']].astype('int')
        validation_df[['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema','Pleural Effusion', 'Pleural Other', 'Pneumonia',
                  'Pneumothorax','Enlarged Cardiomediastinum', 'Lung Opacity', 'Lung Lesion', 'Fracture', 'Support Devices', 'No Finding']] = validation_df[['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema','Pleural Effusion', 'Pleural Other', 'Pneumonia', 'Pneumothorax','Enlarged Cardiomediastinum',
                                                                                                                                                          'Lung Opacity', 'Lung Lesion', 'Fracture','Support Devices', 'No Finding']].astype('int')
        
        # Re-naming the Cols
        train_df.rename(columns={'Pleural Effusion':'Pleural_Effusion','Pleural Other':'Pleural_Other','Enlarged Cardiomediastinum':'Enlarged_Cardiomediastinum','Lung Opacity':'Lung_Opacity','Lung Lesion':'Lung_Lesion','Support Devices':'Support_Devices','No Finding':'No_Finding'}, inplace=True)
        train_df.reset_index(drop=True)
        
        validation_df.rename(columns={'Pleural Effusion':'Pleural_Effusion','Pleural Other':'Pleural_Other','Enlarged Cardiomediastinum':'Enlarged_Cardiomediastinum','Lung Opacity':'Lung_Opacity','Lung Lesion':'Lung_Lesion','Support Devices':'Support_Devices','No Finding':'No_Finding'}, inplace=True)
        validation_df.reset_index(drop=True)
        return train_df , validation_df
    
    except Exception as err:
        print('Info:', err)

In [None]:
def pre_process_dataset_test(file_name):
    try:
        test_full_df = pd.read_csv(file_name, index_col= False)
        
        # Handling NaN Values
        test_df = test_full_df.fillna(0)
        
        # Replacing -1 values with 0
        test_df = test_df.replace(-1, 0)
        
        # Re-ordering the columns
        reordered_columns = ["Path","Atelectasis","Cardiomegaly","Consolidation","Edema","Pleural Effusion",
                            "Pleural Other","Pneumonia","Pneumothorax","Enlarged Cardiomediastinum",
                            "Lung Opacity","Lung Lesion","Fracture","Support Devices","No Finding"]
        
        test_df = test_df[reordered_columns]
        
        # Converting Datatype of the columns to int type
        test_df[['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema','Pleural Effusion', 'Pleural Other', 'Pneumonia',
                  'Pneumothorax','Enlarged Cardiomediastinum', 'Lung Opacity', 'Lung Lesion', 'Fracture', 'Support Devices', 'No Finding']] = test_df[['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema','Pleural Effusion', 'Pleural Other', 'Pneumonia', 'Pneumothorax','Enlarged Cardiomediastinum',
                                                                                                                                                          'Lung Opacity', 'Lung Lesion', 'Fracture','Support Devices', 'No Finding']].astype('int')
        # Renaming Columns 
        test_df.rename(columns={'Pleural Effusion':'Pleural_Effusion','Pleural Other':'Pleural_Other','Enlarged Cardiomediastinum':'Enlarged_Cardiomediastinum','Lung Opacity':'Lung_Opacity','Lung Lesion':'Lung_Lesion','Support Devices':'Support_Devices','No Finding':'No_Finding'}, inplace=True)
        return test_df
    except Exception as err:
        print('Info:', err)

In [None]:
train_file_name = '/kaggle/input/chexpert-filtered-data/train_filtered.csv'

In [None]:
test_file_name = '/kaggle/input/chexpert-filtered-data/valid_filtered.csv'

In [None]:
train_df , validation_df = pre_process_dataset_train_n_valid(train_file_name)

In [None]:
test_df = pre_process_dataset_test(test_file_name)

In [None]:
columns = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema','Pleural_Effusion', 'Pleural_Other', 'Pneumonia', 'Pneumothorax',
       'Enlarged_Cardiomediastinum', 'Lung_Opacity', 'Lung_Lesion', 'Fracture','Support_Devices', 'No_Finding']

In [None]:
datagen = ImageDataGenerator(rescale = 1./255.)

In [None]:
test_datagen = ImageDataGenerator(rescale = 1./255.)

In [None]:
train_generator = datagen.flow_from_dataframe(dataframe = train_df[:20000], directory = '/kaggle/input/chexpert', x_col = "Path", y_col= columns, batch_size = 64, seed = 42, shuffle = True, class_mode = "raw", target_size = (224, 224))

In [None]:
valid_generator = datagen.flow_from_dataframe(dataframe = validation_df[:5000], directory = '/kaggle/input/chexpert', x_col = "Path", y_col= columns, batch_size = 64, seed = 42, shuffle = True, class_mode = "raw", target_size = (224, 224))

In [None]:
test_generator = datagen.flow_from_dataframe(dataframe = test_df, directory = '/kaggle/input/chexpert', x_col = "Path", y_col= columns, batch_size = 64, seed = 42, shuffle = True, class_mode = "raw", target_size = (224, 224))

Model Architecture Begins - DenseNet

In [None]:
STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size
STEP_SIZE_VALID = valid_generator.n // valid_generator.batch_size
STEP_SIZE_TEST = test_generator.n // test_generator.batch_size

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.layers import Conv2D, Flatten, MaxPooling2D, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

# Define the DenseNet base
densenet_base = DenseNet121(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

# Freeze the base layers
for layer in densenet_base.layers:
    layer.trainable = True

# Define the top layers for classification
model = Sequential([
    densenet_base,
    Conv2D(32, (3, 3), activation='relu', padding='same'),
    MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
    Conv2D(64, (3, 3), activation='relu', padding='same'),
    MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
    Conv2D(128, (3, 3), activation='relu', padding='same'),
    MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
    Dropout(rate=0.4),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(14, activation='softmax')
])

# Compile the model
# optim = Adam(learning_rate=0.001)
# model.compile(optimizer=optim, loss="categorical_crossentropy", metrics=["accuracy"])

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score
import numpy as np

#Change the learning rate accordingly
lr1=0.001
j=0.001
lr2=0.007
for i in range(lr1,lr2+j,j):
    # Compile the model
    optim = Adam(learning_rate=0.001)
    model.compile(optimizer=optim, loss="categorical_crossentropy", metrics=["accuracy"])
    # Train the model
    history = model.fit(
        train_generator,
        epochs=2,
        validation_data=valid_generator,
    )
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()
    
    
    test_auc = model.evaluate(test_generator)
    print("Test AUC:", test_auc)
    
    

    # Generate predictions for the test set
    y_pred = model.predict(test_generator)

    # Flatten the true labels and predictions
    y_true_flat = np.array(test_df[columns])
    y_pred_flat = y_pred.reshape(-1)

    # Calculate ROC curve for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(len(columns)):
        fpr[i], tpr[i], _ = roc_curve(y_true_flat[:, i], y_pred_flat[i * len(test_df):(i + 1) * len(test_df)])
        roc_auc[i] = roc_auc_score(y_true_flat[:, i], y_pred_flat[i * len(test_df):(i + 1) * len(test_df)])

    # Calculate micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(y_true_flat.ravel(), y_pred_flat.ravel())
    roc_auc["micro"] = roc_auc_score(y_true_flat, y_pred_flat, average="micro")

    # Plot ROC curve
    plt.figure(figsize=(10, 6))
    plt.plot(fpr["micro"], tpr["micro"], label='Micro-average ROC curve (AUC = {0:0.2f})'.format(roc_auc["micro"]), color='deeppink', linestyle=':', linewidth=4)

    for i in range(len(columns)):
        plt.plot(fpr[i], tpr[i], label='{0} (AUC = {1:0.2f})'.format(columns[i], roc_auc[i]))

    plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve')
    plt.legend(loc="lower right")
    plt.show()


In [None]:
# Train the model
history = model.fit(
    train_generator,
    epochs=2,
    validation_data=valid_generator,
)

In [None]:
# Plot training history
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
test_auc = model.evaluate(test_generator)
print("Test AUC:", test_auc)

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score
import numpy as np

# Generate predictions for the test set
y_pred = model.predict(test_generator)

# Flatten the true labels and predictions
y_true_flat = np.array(test_df[columns])
y_pred_flat = y_pred.reshape(-1)

# Calculate ROC curve for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(len(columns)):
    fpr[i], tpr[i], _ = roc_curve(y_true_flat[:, i], y_pred_flat[i * len(test_df):(i + 1) * len(test_df)])
    roc_auc[i] = roc_auc_score(y_true_flat[:, i], y_pred_flat[i * len(test_df):(i + 1) * len(test_df)])

# Calculate micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(y_true_flat.ravel(), y_pred_flat.ravel())
roc_auc["micro"] = roc_auc_score(y_true_flat, y_pred_flat, average="micro")

# Plot ROC curve
plt.figure(figsize=(10, 6))
plt.plot(fpr["micro"], tpr["micro"], label='Micro-average ROC curve (AUC = {0:0.2f})'.format(roc_auc["micro"]), color='deeppink', linestyle=':', linewidth=4)

for i in range(len(columns)):
    plt.plot(fpr[i], tpr[i], label='{0} (AUC = {1:0.2f})'.format(columns[i], roc_auc[i]))

plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()
