# BONE FRACTURE DETECTION 

In [3]:
# -----------------------------
# 0. Import Libraries
# -----------------------------
import os
import shutil
import random
import numpy as np
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import confusion_matrix, classification_report

# -----------------------------
# 1. Create a Balanced Dataset
# -----------------------------
original_dataset_dir = r"/kaggle/input/bone-fracture-dataset/Bone fracture dataset/Bone fracture dataset/Dataset"
balanced_dataset_dir = r"./balanced_dataset"
os.makedirs(balanced_dataset_dir, exist_ok=True)

classes = ["fracture", "normal"]
for cls in classes:
    os.makedirs(os.path.join(balanced_dataset_dir, cls), exist_ok=True)

# Count images and balance
majority_class = max(classes, key=lambda x: len(os.listdir(os.path.join(original_dataset_dir, x))))
minority_class = min(classes, key=lambda x: len(os.listdir(os.path.join(original_dataset_dir, x))))
majority_count = len(os.listdir(os.path.join(original_dataset_dir, majority_class)))
minority_count = len(os.listdir(os.path.join(original_dataset_dir, minority_class)))

print(f"Majority ({majority_class}): {majority_count}")
print(f"Minority ({minority_class}): {minority_count}")

# Copy images
for cls in classes:
    src_dir = os.path.join(original_dataset_dir, cls)
    dst_dir = os.path.join(balanced_dataset_dir, cls)
    for img_name in os.listdir(src_dir):
        shutil.copy(os.path.join(src_dir, img_name), dst_dir)

# Downscale majority class
majority_dir = os.path.join(balanced_dataset_dir, majority_class)
all_majority_imgs = os.listdir(majority_dir)
if len(all_majority_imgs) > minority_count:
    to_remove = random.sample(all_majority_imgs, len(all_majority_imgs) - minority_count)
    for img_name in to_remove:
        os.remove(os.path.join(majority_dir, img_name))

print(f"Balanced dataset created at '{balanced_dataset_dir}'")

# -----------------------------
# 2. Split Dataset (Train, Val, Test)
# -----------------------------
split_ratios = {'train': 0.7, 'val': 0.15, 'test': 0.15}
for split in split_ratios:
    for cls in classes:
        os.makedirs(os.path.join("dataset", split, cls), exist_ok=True)

for cls in classes:
    imgs = os.listdir(os.path.join(balanced_dataset_dir, cls))
    random.shuffle(imgs)
    n_total = len(imgs)
    n_train = int(split_ratios['train'] * n_total)
    n_val = int(split_ratios['val'] * n_total)

    # Train
    for img_name in imgs[:n_train]:
        shutil.copy(os.path.join(balanced_dataset_dir, cls, img_name), os.path.join("dataset/train", cls))
    # Validation
    for img_name in imgs[n_train:n_train+n_val]:
        shutil.copy(os.path.join(balanced_dataset_dir, cls, img_name), os.path.join("dataset/val", cls))
    # Test
    for img_name in imgs[n_train+n_val:]:
        shutil.copy(os.path.join(balanced_dataset_dir, cls, img_name), os.path.join("dataset/test", cls))

# -----------------------------
# 3. Prepare ImageDataGenerators
# -----------------------------
IMG_SIZE = (224, 224)
BATCH_SIZE = 16

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    "dataset/train",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

val_generator = val_test_datagen.flow_from_directory(
    "dataset/val",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

# -----------------------------
# 4. Build Model (VGG16 Transfer Learning)
# -----------------------------
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
base_model.trainable = False  # Freeze base model initially

x = base_model.output
x = Flatten()(x)  # VGG16 works better with Flatten
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=output)

model.compile(optimizer=Adam(learning_rate=1e-4),
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()

# -----------------------------
# 5. Callbacks
# -----------------------------
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
checkpoint = ModelCheckpoint("best_vgg16_model.h5", monitor='val_loss', save_best_only=True)

# -----------------------------
# 6. Train Model
# -----------------------------
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=20,
    callbacks=[early_stop, checkpoint]
)

# -----------------------------
# 7. Fine-Tune Base Model (Optional)
# -----------------------------
# Unfreeze some layers for better performance
base_model.trainable = True
for layer in base_model.layers[:-4]:  # Freeze first layers, train last 4 conv blocks
    layer.trainable = False

model.compile(optimizer=Adam(1e-5),
              loss='binary_crossentropy',
              metrics=['accuracy'])

history_finetune = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=10,
    callbacks=[early_stop, checkpoint]
)

# -----------------------------
# 8. Evaluate on Test Set
# -----------------------------
test_generator = val_test_datagen.flow_from_directory(
    "dataset/test",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

loss, accuracy = model.evaluate(test_generator)
print(f"Test Accuracy: {accuracy*100:.2f}%")

# Confusion Matrix and Classification Report
y_pred = model.predict(test_generator)
y_pred_classes = np.where(y_pred > 0.5, 1, 0)
y_true = test_generator.classes

print("Confusion Matrix:")
print(confusion_matrix(y_true, y_pred_classes))
print("\nClassification Report:")
print(classification_report(y_true, y_pred_classes, target_names=classes))


Majority (fracture): 2000
Minority (normal): 127
Balanced dataset created at './balanced_dataset'
Found 288 images belonging to 2 classes.
Found 72 images belonging to 2 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


  self._warn_if_super_not_called()


Epoch 1/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 966ms/step - accuracy: 0.5673 - loss: 0.7471 - val_accuracy: 0.6389 - val_loss: 0.5266
Epoch 2/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 685ms/step - accuracy: 0.6203 - loss: 0.6700 - val_accuracy: 0.8889 - val_loss: 0.3990
Epoch 3/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 668ms/step - accuracy: 0.7525 - loss: 0.4859 - val_accuracy: 0.8750 - val_loss: 0.3418
Epoch 4/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 620ms/step - accuracy: 0.7200 - loss: 0.5290 - val_accuracy: 0.7778 - val_loss: 0.5000
Epoch 5/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 651ms/step - accuracy: 0.8049 - loss: 0.4248 - val_accuracy: 0.9167 - val_loss: 0.3137
Epoch 6/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 713ms/step - accuracy: 0.7642 - loss: 0.4293 - val_accuracy: 0.9306 - val_loss: 0.2964
Epoch 7/20
[1m18/18[