In [1]:
import math
import numpy as np
import pandas as pd
import os
from tensorflow import keras
from keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras import Model
from keras.layers import GlobalAveragePooling2D, Dense, AveragePooling2D, LeakyReLU
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam,SGD
from tensorflow.keras.metrics import Precision, Recall, AUC
from tensorflow_addons.metrics import F1Score
from tensorflow.keras.applications.resnet import preprocess_input
from sklearn.preprocessing import OneHotEncoder, LabelEncoder


2024-05-02 23:42:27.693648: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.

TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

 The versions of TensorFlow you are currently using is 2.11.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported con

In [5]:
data = pd.read_csv("../Data_Entry_2017.csv")
filenames = []
directories = []
with open("../selected_png_list.txt", "r") as fr:
    for line in fr.readlines():
        start = line.find("images_0")
        directory = "images/" + line[start:]
        filename_reverse = line[::-1]
        filename_reverse = filename_reverse[:filename_reverse.find("/")]
        filename = filename_reverse[::-1]
        filenames.append(filename[:-1])
        directories.append(directory)
data = data[data["Image Index"].isin(filenames)]

In [6]:
for index, row in data.iterrows():
    if row["Finding Labels"].find("|") != -1:
        data.drop(index, inplace=True)

In [7]:
classifications = np.unique(data.loc[:,"Finding Labels"])
print(len(classifications))
data.to_csv("full_data.csv")

15


In [23]:
train_full_set = []
test_full_set = []
data = pd.read_csv("full_data.csv")
for classification in classifications:
    class_filter = data[data["Finding Labels"] == classification]
    print(classification, len(class_filter))
    indices_len = len(class_filter)
    if indices_len > 400:
        indices_len = 400
    train_set_size = math.floor(0.8 * indices_len)
    permutation = np.random.permutation(np.arange(0,indices_len))
    permutation = [int(x) for x in permutation]
    
    train_filenames = class_filter.iloc[permutation[:train_set_size], 1]
    test_filenames = class_filter.iloc[permutation[train_set_size:], 1]

    for filename in train_filenames:
        train_full_set.append(filename)
    for filename in test_filenames:
        test_full_set.append(filename)

train_data = data[data["Image Index"].isin(train_full_set)]
test_data = data[data["Image Index"].isin(test_full_set)]


Atelectasis 377
Cardiomegaly 82
Consolidation 102
Edema 63
Effusion 321
Emphysema 71
Fibrosis 56
Hernia 9
Infiltration 807
Mass 176
No Finding 5210
Nodule 214
Pleural_Thickening 118
Pneumonia 28
Pneumothorax 202


In [26]:


classifications_observed = classifications[classifications != "No Finding"]
classifications_observed = ["Cardiomegaly"]
model_predictions = []
histories = []
for classification in classifications_observed:
        # assemble ResNet50 model
        base_model = ResNet50(include_top=False, 
                                    classes=1,
                                    input_shape=(1024,1024,3))
        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        x = LeakyReLU(alpha = 0.4)(x)
        predictions = Dense(1, activation='sigmoid')(x)
        model = Model(inputs=base_model.input, outputs=predictions)
        model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy', Precision(), Recall(), F1Score(num_classes=1, average = "weighted"), AUC(curve = "ROC")])

        print(f"Training ResNet50 model on {classification}")
        possible_classes = [classification, "No Finding"]
        
        train_data_subset = train_data[train_data["Finding Labels"].isin(possible_classes)]
        print(np.unique(train_data_subset.loc[:,"Finding Labels"]))

        print(np.unique(train_data.loc[:,"Finding Labels"]))
        train_datagen = ImageDataGenerator(rescale = 1./255, preprocessing_function=preprocess_input)
        train_generator = train_datagen.flow_from_dataframe(
                train_data_subset,
                x_col = "Image Index",
                y_col = "Finding Labels",
                directory = "images",
                target_size=(224,224),
                batch_size=16,
                class_mode='binary',
                shuffle=True)


        history = model.fit(
                train_generator,
                steps_per_epoch=len(train_generator),
                epochs=10,
                verbose=1)
        
        
        print(f"Testing ResNet50 model on {classification}")
        test_data_subset = test_data[test_data["Finding Labels"].isin(possible_classes)]
        # test_data_y_cols = list(test_data_subset.columns[16:])
        test_datagen = ImageDataGenerator(rescale=1./255,preprocessing_function=preprocess_input)
        test_generator = test_datagen.flow_from_dataframe(
                test_data_subset,
                x_col = "Image Index",
                y_col = "Finding Labels",
                directory = "images",
                target_size=(224,224),
                batch_size=16,
                class_mode='binary',
                shuffle=True)
        
        histories.append(history)
        model_predictions.append(model.predict(test_generator))
        model.evaluate(test_generator)
        

Training ResNet50 model on Cardiomegaly
['Cardiomegaly' 'No Finding']
['Atelectasis' 'Cardiomegaly' 'Consolidation' 'Edema' 'Effusion'
 'Emphysema' 'Fibrosis' 'Hernia' 'Infiltration' 'Mass' 'No Finding'
 'Nodule' 'Pleural_Thickening' 'Pneumonia' 'Pneumothorax']
Found 385 validated image filenames belonging to 2 classes.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Testing ResNet50 model on Cardiomegaly
Found 97 validated image filenames belonging to 2 classes.
