In [1]:
import torch
import pandas as pd
import os
import sys
from torchvision import transforms
from torch.utils.data import DataLoader
import torch
from torchvision.models import efficientnet_b1
import torch.nn as nn

2024-05-02 09:53:10.937541: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-05-02 09:53:16.069905: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-02 09:53:16.070201: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-02 09:53:16.083454: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-05-02 09:53:17.890833: I tensorflow/core/platform/cpu_feature_g

In [2]:
notebook_dir = os.getcwd()
project_dir = os.path.dirname(notebook_dir)

# Add the src directory to sys.path
src_dir = os.path.join(project_dir, 'src')
if src_dir not in sys.path:
    sys.path.append(src_dir)

from utils import generate_class_weights, show_batch_images
from data import ChestXray14HFDataset
from trainers import MultiLabelLightningModule
from models import set_model

## load dataframes

In [3]:
data_path = "/cluster/home/taheeraa/datasets/chestxray-14"

In [4]:
labels = [
    "Atelectasis", 
    "Cardiomegaly",
    "Effusion", 
    "Infiltration", 
    "Mass",
    "Nodule",
    "Pneumonia",
    "Pneumothorax",  
    "Consolidation",
    "Edema",
    "Emphysema",
    "Fibrosis",
    "Pleural_Thickening",
    "Hernia"
]
num_labels = len(labels)

file_path_train = data_path + '/train_official.txt'
file_path_val = data_path + '/val_official.txt'
file_path_test = data_path + '/test_official.txt'

columns = ['Image Filename'] + labels

df_train = pd.read_csv(file_path_train, sep='\s+', names=columns)
df_val = pd.read_csv(file_path_val, sep='\s+', names=columns)
df_test = pd.read_csv(file_path_test, sep='\s+', names=columns)

# Finding all image paths, and mapping them to the DataFrame
subfolders = [f"images_{i:03}/images" for i in range(1, 13)]  # Generates 'images_001' to 'images_012'
path_mapping = {}
for subfolder in subfolders:
    full_folder_path = os.path.join(data_path, subfolder)
    for img_file in os.listdir(full_folder_path):
        path_mapping[img_file] = os.path.join(full_folder_path, img_file)

# Update the DataFrame using the mapping
df_train['Full Image Path'] = df_train['Image Filename'].map(path_mapping)
df_val['Full Image Path'] = df_val['Image Filename'].map(path_mapping)
df_test['Full Image Path'] = df_test['Image Filename'].map(path_mapping)

# Move 'Full Image Path' to the front of the DataFrame
cols_train = ['Full Image Path'] + [col for col in df_train.columns if col != 'Full Image Path']
cols_val = ['Full Image Path'] + [col for col in df_val.columns if col != 'Full Image Path']
cols_test = ['Full Image Path'] + [col for col in df_test.columns if col != 'Full Image Path']
df_train = df_train[cols_train]
df_val = df_val[cols_val]
df_test = df_test[cols_test]

# Drop 'Image Filename' column
train_df = df_train.drop(columns=['Image Filename'])
val_df = df_val.drop(columns=['Image Filename'])
test_df = df_test.drop(columns=['Image Filename'])

# Create class weights
df_train_calculate_weights = df_train.drop(columns=['Full Image Path']).to_numpy()
class_weights_dict = generate_class_weights(df_train_calculate_weights, multi_class=False, one_hot_encoded=True)
class_weights_list = [class_weights_dict[i] for i in class_weights_dict]
class_weights_tensor = torch.tensor(class_weights_list, dtype=torch.float32)

label_weights_dict = {labels[i]: class_weights_dict[i] for i in range(len(labels))}
print(f"Class weights: {label_weights_dict}")

print(f"Train dataframe shape: {df_train.shape} (1 size larger than expected due to 'Full Image Path')")
print(f"Train columns: {df_train.columns}")
print(f"Labels: {labels}")
print(f"Number of labels: {len(labels)}")


Class weights: {'Atelectasis': 0.06666666666666667, 'Cardiomegaly': 0.6968494101318529, 'Effusion': 3.3449700199866754, 'Infiltration': 0.6563995293502418, 'Mass': 0.41714855433698905, 'Nodule': 1.4048125349748182, 'Pneumonia': 1.2366502463054188, 'Pneumothorax': 6.52051948051948, 'Consolidation': 2.237433155080214, 'Edema': 2.024516129032258, 'Emphysema': 4.1494214876033055, 'Fibrosis': 4.01664, 'Pleural_Thickening': 4.63601108033241, 'Hernia': 2.6055007784120394}
Train dataframe shape: (75312, 16) (1 size larger than expected due to 'Full Image Path')
Train columns: Index(['Full Image Path', 'Image Filename', 'Atelectasis', 'Cardiomegaly',
       'Effusion', 'Infiltration', 'Mass', 'Nodule', 'Pneumonia',
       'Pneumothorax', 'Consolidation', 'Edema', 'Emphysema', 'Fibrosis',
       'Pleural_Thickening', 'Hernia'],
      dtype='object')
Labels: ['Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'Pneumonia', 'Pneumothorax', 'Consolidation', 'Edema', 'Emphys

## load dataset and transforms

In [5]:
img_size = 224
num_workers = 4
pin_memory = False
batch_size = 32

In [6]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

train_transforms = transforms.Compose([
    transforms.Resize((256, 256), interpolation=transforms.InterpolationMode.BILINEAR, antialias=True),
    transforms.CenterCrop(img_size),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=7),
    transforms.ToTensor(),
    normalize,
])

val_transforms = transforms.Compose([
    transforms.Resize((256, 256), interpolation=transforms.InterpolationMode.BILINEAR, antialias=True),
    transforms.CenterCrop(img_size),
    transforms.ToTensor(),
    normalize,
])

In [7]:
train_dataset = ChestXray14HFDataset(
    dataframe=train_df, transform=train_transforms)
val_dataset = ChestXray14HFDataset(
    dataframe=val_df, transform=val_transforms)
test_dataset = ChestXray14HFDataset(
    dataframe=test_df, transform=val_transforms)

In [8]:
train_loader = DataLoader(
    train_dataset, 
    batch_size=batch_size, 
    shuffle=True, 
    num_workers=num_workers, 
    pin_memory=pin_memory
)
val_loader = DataLoader(
    val_dataset, 
    batch_size=batch_size, 
    shuffle=False, 
    num_workers=num_workers, 
    pin_memory=pin_memory
)
test_loader = DataLoader(
    test_dataset, 
    batch_size=batch_size, 
    shuffle=False, 
    num_workers=num_workers, 
    pin_memory=pin_memory
)

## load model

In [22]:
model_file="/cluster/home/taheeraa/code/master-thesis/01-multi-label/checkpoints/DenseNet121_aug4_pretrain_WeightBelow1_1_0.829766922537.pkl"

In [63]:
model, img_size = set_model('resnet50', num_labels, labels)

criterion = nn.BCEWithLogitsLoss()
learning_rate = 0.001
optimizer_func = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler_func = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer_func, mode='max', factor=0.1, patience=3, verbose=True)
model_ckpts_folder = "/cluster/home/taheeraa/code/master-thesis/01-multi-label/notebooks/checkpoints"
logger = None
root_path = "/cluster/home/taheeraa/code/master-thesis/01-multi-label/output"

before: The model has 7794184 trainable parameters
after: The model has 7370030 trainable parameters


In [None]:

training_module = MultiLabelLightningModule(
    model=model,
    criterion=criterion,
    learning_rate=learning_rate,
    num_labels=num_labels,
    labels=labels,
    optimizer_func=optimizer_func,
    scheduler_func=scheduler_func,
    model_ckpts_folder=model_ckpts_folder,
    logger=logger,
    root_path=root_path,
)