In [1]:
import tensorflow as tf
import os

# Define constants
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
DATA_DIR = 'chest_xray'

# Create training and validation datasets from the directory structure
train_dataset = tf.keras.utils.image_dataset_from_directory(
    os.path.join(DATA_DIR, 'train'),
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='binary',
    shuffle=True
)

validation_dataset = tf.keras.utils.image_dataset_from_directory(
    os.path.join(DATA_DIR, 'val'),
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='binary',
    shuffle=False
)

# Normalize the image data to be between 0 and 1
normalization_layer = tf.keras.layers.Rescaling(1./255)
train_dataset = train_dataset.map(lambda x, y: (normalization_layer(x), y))
validation_dataset = validation_dataset.map(lambda x, y: (normalization_layer(x), y))

print("Datasets created and preprocessed successfully.")

Found 5216 files belonging to 2 classes.
Found 16 files belonging to 2 classes.
Datasets created and preprocessed successfully.


In [2]:
# Load the pre-trained MobileNetV2 model without its top classification layer
base_model = tf.keras.applications.MobileNetV2(
    input_shape=(224, 224, 3),
    include_top=False,
    weights='imagenet'
)

# Freeze the layers of the base model so they don't get retrained
base_model.trainable = False

# Create the final model by adding our custom classification layers on top
model = tf.keras.Sequential([
    base_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1, activation='sigmoid') # Sigmoid for binary (Normal/Pneumonia) classification
])

# Compile the model with an optimizer, loss function, and metrics
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Print a summary of the model architecture
model.summary()

In [3]:
# Train the model for a few epochs
# Note: This step will take some time to run
history = model.fit(
    train_dataset,
    epochs=5, # 5 epochs is a good balance of speed and performance for a hackathon
    validation_data=validation_dataset
)

# Save the final trained model to the backend folder
model.save('../backend/pneumonia_model.h5')

print("\n--- Training complete! ---")
print("Model saved as 'pneumonia_model.h5' in the 'backend' folder.")

Epoch 1/5
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 513ms/step - accuracy: 0.8500 - loss: 0.3487 - val_accuracy: 0.9375 - val_loss: 0.2861
Epoch 2/5
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 452ms/step - accuracy: 0.9509 - loss: 0.1476 - val_accuracy: 0.8750 - val_loss: 0.2910
Epoch 3/5
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 455ms/step - accuracy: 0.9579 - loss: 0.1198 - val_accuracy: 0.8750 - val_loss: 0.3073
Epoch 4/5
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 456ms/step - accuracy: 0.9591 - loss: 0.1112 - val_accuracy: 0.8750 - val_loss: 0.2724
Epoch 5/5
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 459ms/step - accuracy: 0.9642 - loss: 0.1016 - val_accuracy: 0.8750 - val_loss: 0.2931





--- Training complete! ---
Model saved as 'pneumonia_model.h5' in the 'backend' folder.
