In [8]:
import tensorflow as tf
import os

# Define constants
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
DATA_DIR = 'chest_xray'

# Create training and validation datasets from the directory structure
train_dataset = tf.keras.utils.image_dataset_from_directory(
    os.path.join(DATA_DIR, 'train'),
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='binary',
    shuffle=True
)

validation_dataset = tf.keras.utils.image_dataset_from_directory(
    os.path.join(DATA_DIR, 'val'),
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='binary',
    shuffle=False
)

# Normalize the image data to be between 0 and 1
normalization_layer = tf.keras.layers.Rescaling(1./255)
train_dataset = train_dataset.map(lambda x, y: (normalization_layer(x), y))
validation_dataset = validation_dataset.map(lambda x, y: (normalization_layer(x), y))

print("Datasets created and preprocessed successfully.")

Found 5216 files belonging to 2 classes.
Found 16 files belonging to 2 classes.
Datasets created and preprocessed successfully.


In [9]:
# Load the pre-trained MobileNetV2 model without its top classification layer
base_model = tf.keras.applications.MobileNetV2(
    input_shape=(224, 224, 3),
    include_top=False,
    weights='imagenet'
)

# Freeze the layers of the base model so they don't get retrained
base_model.trainable = False

# Create the final model by adding our custom classification layers on top
model = tf.keras.Sequential([
    base_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1, activation='sigmoid') # Sigmoid for binary (Normal/Pneumonia) classification
])

# Compile the model with an optimizer, loss function, and metrics
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Print a summary of the model architecture
model.summary()

In [10]:
# Train the model for a few epochs
# Note: This step will take some time to run
history = model.fit(
    train_dataset,
    epochs=5, # 5 epochs is a good balance of speed and performance for a hackathon
    validation_data=validation_dataset
)

# Save the final trained model to the backend folder
model.save('../backend/pneumonia_model.h5')

print("\n--- Training complete! ---")
print("Model saved as 'pneumonia_model.h5' in the 'backend' folder.")

Epoch 1/5


[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 543ms/step - accuracy: 0.8465 - loss: 0.3585 - val_accuracy: 0.8750 - val_loss: 0.3983
Epoch 2/5
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 483ms/step - accuracy: 0.9508 - loss: 0.1442 - val_accuracy: 0.8750 - val_loss: 0.4213
Epoch 3/5
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 475ms/step - accuracy: 0.9535 - loss: 0.1165 - val_accuracy: 0.8750 - val_loss: 0.3889
Epoch 4/5
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 480ms/step - accuracy: 0.9583 - loss: 0.1097 - val_accuracy: 0.8750 - val_loss: 0.3438
Epoch 5/5
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 490ms/step - accuracy: 0.9661 - loss: 0.0983 - val_accuracy: 0.8750 - val_loss: 0.3282





--- Training complete! ---
Model saved as 'pneumonia_model.h5' in the 'backend' folder.
