## Import Libraries

## Load Data - Create Train Generator - Train Model

In [4]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.efficientnet import EfficientNetB3, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, BatchNormalization, Dropout, LeakyReLU
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.utils import Sequence, to_categorical
from sklearn.model_selection import train_test_split
from PIL import Image

# Load dataset and bounding box information
data_path = r"C:\Users\kelly\Desktop\New folder\Data_Entry_2017_v2020.csv"
bbox_path = r"C:\Users\kelly\Desktop\New folder\BBox_List_2017.csv"
image_dir = r"C:\Users\kelly\Desktop\New folder\images\image_com"

df = pd.read_csv(data_path)
bbox_df = pd.read_csv(bbox_path)
df['binary_label'] = df['Finding Labels'].apply(lambda x: 0 if x == 'No Finding' else 1)

# Split data
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['binary_label'], random_state=42)

# Instantiate the ImageDataGenerator with the augmentations
augmentation_datagen = ImageDataGenerator(
    rotation_range=10,       # Degree range for random rotations
    width_shift_range=0.1,   # Ranges (as a fraction of total width) for random horizontal shifts
    height_shift_range=0.1,  # Ranges (as a fraction of total height) for random vertical shifts
    shear_range=0.1,         # Shearing intensity (shear angle in degrees)
    zoom_range=0.1,          # Range for random zoom
    horizontal_flip=False,    
    fill_mode='nearest'      # Strategy for filling in newly created pixels
)

class BBoxDataGenerator(Sequence):
    def __init__(self, dataframe, bbox_df, image_dir, batch_size=16, augmentations=None):
        self.dataframe = dataframe
        self.bbox_df = bbox_df
        self.image_dir = image_dir
        self.batch_size = batch_size
        self.augmentations = augmentations
        
    def __len__(self):
        return int(np.ceil(len(self.dataframe) / self.batch_size))
    
    def __getitem__(self, idx):
        batch = self.dataframe.iloc[idx * self.batch_size:(idx + 1) * self.batch_size]
        imgs, labels = [], []
        for _, row in batch.iterrows():
            img_path = os.path.join(self.image_dir, row['Image Index'])
            img = self.crop_and_process_image(img_path, row['Image Index'])
            if self.augmentations:
                img = self.augmentations.random_transform(img)
            imgs.append(img)
            labels.append(row['binary_label'])
        return np.array(imgs), to_categorical(labels, num_classes=2)
    
    def crop_and_process_image(self, img_path, img_name):
        if img_name in self.bbox_df['Image Index'].values:
            bbox = self.bbox_df[self.bbox_df['Image Index'] == img_name].iloc[0]
            img = Image.open(img_path).convert('L').crop((bbox['Bbox [x'], bbox['y'], bbox['Bbox [x']+bbox['w'], bbox['y']+bbox['h]']))
        else:
            img = Image.open(img_path).convert('L')
        img = img.resize((224, 224))
        img = image.img_to_array(img)
        img = np.repeat(img, 3, axis=-1)  # Ensure img has shape (height, width, 1)
        return preprocess_input(img)


train_gen = BBoxDataGenerator(train_df, bbox_df, image_dir, batch_size=16, augmentations=augmentation_datagen)
val_gen = BBoxDataGenerator(val_df, bbox_df, image_dir, batch_size=16)  # No augmentations for validation data

# Model with Batch Normalization, Dropout
base_model = EfficientNetB3(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)
x = Dense(1024, activation ='relu')(x)
x = Dropout(0.3)(x) 
predictions = Dense(2, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

#Train all layers
for layer in base_model.layers:
    layer.trainable = True


#Compile model
optimizer=Adam(learning_rate=1e-4)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


# Callbacks
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=4, min_lr=1e-6, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1)

# Train the model
history = model.fit(
    train_gen,
    epochs=10,
    validation_data=val_gen,
    callbacks=[reduce_lr, early_stopping]
)



Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb3_notop.h5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 7: ReduceLROnPlateau reducing learning rate to 1.9999999494757503e-05.
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [5]:
model.save_weights("C://Users//kelly//Desktop//New folder//efficientnet1_bbox", save_format = 'tf')

In [6]:
model.save_weights("C://Users//kelly//Desktop//New folder//efficientnet1_bbox.h5")

## Create Test Generator

In [7]:
class TestDataGenerator(Sequence):
    def __init__(self, image_paths, bbox_df, image_dir, batch_size=16):
        self.image_paths = image_paths
        self.bbox_df = bbox_df
        self.image_dir = image_dir
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.image_paths) / self.batch_size))
    
    def __getitem__(self, idx):
        batch_paths = self.image_paths[idx * self.batch_size:(idx + 1) * self.batch_size]
        imgs = [self.crop_and_process_image(os.path.join(self.image_dir, img_path), img_path) for img_path in batch_paths]
        return np.array(imgs)
    
    def crop_and_process_image(self, img_path, img_name):
        if img_name in self.bbox_df['Image Index'].values:
            bbox = self.bbox_df[self.bbox_df['Image Index'] == img_name].iloc[0]
            img = Image.open(img_path).convert('L').crop((bbox['Bbox [x'], bbox['y'], bbox['Bbox [x']+bbox['w'], bbox['y']+bbox['h]']))
        else:
            img = Image.open(img_path).convert('L')
        img = img.resize((224, 224))
        img = image.img_to_array(img)
        img = np.repeat(img, 3, axis=-1)  # Convert grayscale to RGB
        return preprocess_input(img)


## Load Test Images

In [8]:
test_image_dir = r"C:\Users\kelly\Desktop\New folder\eval_xray_im"
test_image_paths = [os.path.join(test_image_dir, img) for img in os.listdir(test_image_dir)]

# Ensure the paths are sorted
test_image_paths.sort()

test_generator = TestDataGenerator(test_image_paths, bbox_df, image_dir)

## Make Predictions

In [9]:
predictions = model.predict(test_generator)
predicted_classes = np.argmax(predictions, axis=1)
np.save("C://Users//kelly//Desktop//New folder//efficientnet1_bbox//predictions_efficientnet1.npy", predictions)



In [10]:
# Prepare submission dataframe
submission_df = pd.DataFrame({
    'Id': [os.path.basename(path) for path in test_image_paths],
    'Label': predicted_classes
})

In [11]:
submission_df

Unnamed: 0,Id,Label
0,00000.jpg,0
1,00001.jpg,1
2,00002.jpg,1
3,00003.jpg,0
4,00004.jpg,1
...,...,...
5995,05995.jpg,1
5996,05996.jpg,1
5997,05997.jpg,0
5998,05998.jpg,1


In [12]:
submission_csv_path = 'submission.csv'
submission_df.to_csv(submission_csv_path, index=False)
print(f"Submission file saved to {submission_csv_path}")

Submission file saved to submission.csv
