In [None]:
import shutil

shutil.rmtree('images_data')
shutil.rmtree('labels_data')

In [None]:
# --- Required Libraries ---
import os
import zipfile
import shutil
import cv2
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet import preprocess_input

# --- Upload images.zip and labels.zip in Colab ---
from google.colab import files
uploaded = files.upload()

# --- Unzip Data and Flatten Directories ---
def unzip_and_flatten(zip_path, target_dir):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(target_dir)
    for root, _, files in os.walk(target_dir):
        for file in files:
            src = os.path.join(root, file)
            dst = os.path.join(target_dir, file)
            if src != dst:
                shutil.move(src, dst)
    for subfolder in os.listdir(target_dir):
        p = os.path.join(target_dir, subfolder)
        if os.path.isdir(p): shutil.rmtree(p)

# Prepare directories
base_dir = Path("dataset")
img_dir = base_dir / "images"
lbl_dir = base_dir / "labels"
shutil.rmtree(base_dir, ignore_errors=True)
img_dir.mkdir(parents=True)
lbl_dir.mkdir(parents=True)

# Extract
unzip_and_flatten("images.zip", img_dir)
unzip_and_flatten("labels.zip", lbl_dir)

# --- Data Preparation ---
IMG_DIM = 224
inputs, labels = [], []

for img_file in os.listdir(img_dir):
    if not img_file.lower().endswith((".jpg", ".jpeg", ".png")):
        continue

    image_name = Path(img_file).stem
    label_file = lbl_dir / f"{image_name}.txt"
    image_path = img_dir / img_file

    if not label_file.exists():
        os.remove(image_path)
        continue

    with open(label_file, "r") as file:
        lines = [l.strip() for l in file.readlines() if l.strip()]

    has_tumor = False
    for line in lines:
        parts = line.split()
        if len(parts) == 5 and parts[0] == '1':
            has_tumor = True
            break

    labels.append(1 if has_tumor else 0)

    img = cv2.imread(str(image_path), cv2.IMREAD_GRAYSCALE)
    if img is None:
        continue

    h, w = img.shape
    scale = IMG_DIM / max(h, w)
    new_h, new_w = int(h * scale), int(w * scale)
    resized = cv2.resize(img, (new_w, new_h))
    pad_top = (IMG_DIM - new_h) // 2
    pad_bottom = IMG_DIM - new_h - pad_top
    pad_left = (IMG_DIM - new_w) // 2
    pad_right = IMG_DIM - new_w - pad_left
    padded_img = cv2.copyMakeBorder(resized, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=0)

    padded_img = np.expand_dims(padded_img, axis=-1)
    padded_img = np.repeat(padded_img, 3, axis=-1)
    padded_img = preprocess_input(padded_img.astype(np.float32))
    inputs.append(padded_img)

X = np.array(inputs)
y = np.array(labels)

# --- Train/Validation Split ---
X_train, X_val, y_train, y_val = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

# --- Data Augmentation ---
augment = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.05),
])

# --- Class Weights ---
weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
weights = dict(enumerate(weights))

# --- Build ResNet50 Model ---
base = ResNet50(include_top=False, input_shape=(IMG_DIM, IMG_DIM, 3), weights="imagenet")
base.trainable = False  # Initially freeze

inp = layers.Input(shape=(IMG_DIM, IMG_DIM, 3))
x = augment(inp)
x = base(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dropout(0.3)(x)
out = layers.Dense(1, activation='sigmoid')(x)

model = models.Model(inputs=inp, outputs=out)

# Compile Model (Initial Phase)
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
)

# --- Callbacks ---
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1),
]

# --- Train (Initial with Frozen Base) ---
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=10,
    batch_size=16,
    class_weight=weights,
    callbacks=callbacks
)

# --- Fine-tune: Unfreeze Last 40 Layers ---
base.trainable = True
for layer in base.layers[:-40]:
    layer.trainable = False

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
)

# --- Continue Training ---
fine_tune_history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=20,
    batch_size=16,
    class_weight=weights,
    callbacks=callbacks
)

# --- Evaluate ---
y_pred = (model.predict(X_val) > 0.5).astype(int)
print("\n Classification Report:")
print(classification_report(y_val, y_pred, digits=4))


Saving labels.zip to labels.zip
Saving images.zip to images.zip
Epoch 1/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 102ms/step - accuracy: 0.5194 - loss: 0.9568 - precision_1: 0.5773 - recall_1: 0.2933 - val_accuracy: 0.5170 - val_loss: 0.7234 - val_precision_1: 0.5522 - val_recall_1: 0.4022 - learning_rate: 1.0000e-04
Epoch 2/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - accuracy: 0.6416 - loss: 0.6950 - precision_1: 0.7206 - recall_1: 0.5352 - val_accuracy: 0.5852 - val_loss: 0.6950 - val_precision_1: 0.6092 - val_recall_1: 0.5761 - learning_rate: 1.0000e-04
Epoch 3/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - accuracy: 0.6778 - loss: 0.6064 - precision_1: 0.7250 - recall_1: 0.6043 - val_accuracy: 0.5682 - val_loss: 0.6717 - val_precision_1: 0.5784 - val_recall_1: 0.6413 - learning_rate: 1.0000e-04
Epoch 4/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - accuracy: 