In [None]:
# ✅ Mount Drive
from google.colab import drive
drive.mount('/content/drive')

# ✅ Dataset path
DATASET_DIR = '/content/drive/MyDrive/dataset_cancer_v1/classificacao_binaria/100X'

# ✅ Imports
import os
import numpy as np
# import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input, BatchNormalization, Activation, Concatenate, AveragePooling2D
from tensorflow.keras.optimizers import Adam
import random

# === CONFIG ===
IMG_SIZE = (224, 224)  # Standard for GoogLeNet
EPOCHS = 10
BATCH_SIZE = 32
LEARNING_RATE = 0.0001

In [None]:
# === GoogLeNet Building Blocks ===
def inception_block(x, filters_1x1, filters_3x3_reduce, filters_3x3, filters_5x5_reduce, filters_5x5, filters_pool_proj, name):
    """Inception block with multiple parallel paths"""
    
    # 1x1 convolution branch
    conv1x1 = Conv2D(filters_1x1, 1, padding='same', activation='relu', name=f'{name}_1x1')(x)
    
    # 3x3 convolution branch
    conv3x3_reduce = Conv2D(filters_3x3_reduce, 1, padding='same', activation='relu', name=f'{name}_3x3_reduce')(x)
    conv3x3 = Conv2D(filters_3x3, 3, padding='same', activation='relu', name=f'{name}_3x3')(conv3x3_reduce)
    
    # 5x5 convolution branch
    conv5x5_reduce = Conv2D(filters_5x5_reduce, 1, padding='same', activation='relu', name=f'{name}_5x5_reduce')(x)
    conv5x5 = Conv2D(filters_5x5, 5, padding='same', activation='relu', name=f'{name}_5x5')(conv5x5_reduce)
    
    # Pooling branch
    pool_proj = MaxPooling2D(3, strides=1, padding='same', name=f'{name}_pool')(x)
    pool_proj = Conv2D(filters_pool_proj, 1, padding='same', activation='relu', name=f'{name}_pool_proj')(pool_proj)
    
    # Concatenate all branches
    output = Concatenate(axis=-1, name=f'{name}_concat')([conv1x1, conv3x3, conv5x5, pool_proj])
    
    return output

def inception_block_with_reduction(x, filters_1x1, filters_3x3_reduce, filters_3x3, filters_5x5_reduce, filters_5x5, filters_pool_proj, name):
    """Inception block with reduction"""
    
    # 1x1 convolution branch
    conv1x1 = Conv2D(filters_1x1, 1, padding='same', activation='relu', name=f'{name}_1x1')(x)
    
    # 3x3 convolution branch
    conv3x3_reduce = Conv2D(filters_3x3_reduce, 1, padding='same', activation='relu', name=f'{name}_3x3_reduce')(x)
    conv3x3 = Conv2D(filters_3x3, 3, strides=2, padding='same', activation='relu', name=f'{name}_3x3')(conv3x3_reduce)
    
    # 5x5 convolution branch
    conv5x5_reduce = Conv2D(filters_5x5_reduce, 1, padding='same', activation='relu', name=f'{name}_5x5_reduce')(x)
    conv5x5 = Conv2D(filters_5x5, 5, strides=2, padding='same', activation='relu', name=f'{name}_5x5')(conv5x5_reduce)
    
    # Pooling branch
    pool_proj = MaxPooling2D(3, strides=2, padding='same', name=f'{name}_pool')(x)
    
    # Concatenate all branches
    output = Concatenate(axis=-1, name=f'{name}_concat')([conv1x1, conv3x3, conv5x5, pool_proj])
    
    return output

In [None]:
# === Visualize Sample Images ===
def plot_sample_images(dataset_path, num_images_per_class=5):
    classes = sorted(os.listdir(dataset_path))
    # plt.figure(figsize=(15, len(classes) * 2.5))
    for class_idx, class_name in enumerate(classes):
        class_path = os.path.join(dataset_path, class_name)
        images = [img for img in os.listdir(class_path) if img.lower().endswith(('.png', '.jpg', '.jpeg'))]
        sample_images = random.sample(images, min(num_images_per_class, len(images)))
        for i, image_name in enumerate(sample_images):
            img_path = os.path.join(class_path, image_name)
            img = load_img(img_path, target_size=IMG_SIZE)
            # plt.subplot(len(classes), num_images_per_class, class_idx * num_images_per_class + i + 1)
            # plt.imshow(img)
            # plt.axis('off')
            # plt.title(class_name)
    # plt.tight_layout()
    # plt.show()

# === Load Dataset ===
def load_dataset(path):
    images = []
    labels = []
    for label_name in os.listdir(path):
        label_folder = os.path.join(path, label_name)
        if os.path.isdir(label_folder):
            for fname in os.listdir(label_folder):
                if fname.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(label_folder, fname)
                    img = load_img(img_path, target_size=IMG_SIZE)
                    img = img_to_array(img) / 255.0
                    images.append(img)
                    labels.append(label_name)
    return np.array(images), np.array(labels)

In [None]:
# === Load and Prepare Data ===
print("🔄 Loading dataset...")
X, y = load_dataset(DATASET_DIR)
print(f"✅ Loaded {len(X)} images.")

# === Visualize ===
print("🖼 Displaying sample images...")
plot_sample_images(DATASET_DIR)

In [None]:
# === Encode Labels ===
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
loss_fn = 'binary_crossentropy'
activation_fn = 'sigmoid'
output_units = 1

# === Split Dataset ===
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42
)

In [None]:
# === Build GoogLeNet Model ===
def build_googlenet_model(input_shape=(224, 224, 3), num_classes=1):
    inputs = Input(shape=input_shape)
    
    # Initial convolution
    x = Conv2D(64, 7, strides=2, padding='same', activation='relu', name='conv1')(inputs)
    x = MaxPooling2D(3, strides=2, padding='same', name='maxpool1')(x)
    
    # Second convolution
    x = Conv2D(64, 1, padding='same', activation='relu', name='conv2_1x1')(x)
    x = Conv2D(192, 3, padding='same', activation='relu', name='conv2_3x3')(x)
    x = MaxPooling2D(3, strides=2, padding='same', name='maxpool2')(x)
    
    # Inception blocks
    # Inception 3a
    x = inception_block(x, 64, 96, 128, 16, 32, 32, 'inception_3a')
    
    # Inception 3b
    x = inception_block(x, 128, 128, 192, 32, 96, 64, 'inception_3b')
    
    # Inception 3c (with reduction)
    x = inception_block_with_reduction(x, 192, 96, 208, 16, 48, 64, 'inception_3c')
    
    # Inception 4a
    x = inception_block(x, 192, 96, 208, 16, 48, 64, 'inception_4a')
    
    # Inception 4b
    x = inception_block(x, 160, 112, 224, 24, 64, 64, 'inception_4b')
    
    # Inception 4c
    x = inception_block(x, 128, 128, 256, 24, 64, 64, 'inception_4c')
    
    # Inception 4d
    x = inception_block(x, 112, 144, 288, 32, 64, 64, 'inception_4d')
    
    # Inception 4e (with reduction)
    x = inception_block_with_reduction(x, 256, 160, 320, 32, 128, 128, 'inception_4e')
    
    # Inception 5a
    x = inception_block(x, 256, 160, 320, 32, 128, 128, 'inception_5a')
    
    # Inception 5b
    x = inception_block(x, 384, 192, 384, 48, 128, 128, 'inception_5b')
    
    # Global average pooling
    x = AveragePooling2D(pool_size=(7, 7), name='avgpool')(x)
    x = Flatten(name='flatten')(x)
    
    # Fully connected layers
    x = Dense(512, activation='relu', name='fc1')(x)
    x = Dropout(0.5, name='dropout1')(x)
    x = Dense(256, activation='relu', name='fc2')(x)
    x = Dropout(0.5, name='dropout2')(x)
    outputs = Dense(num_classes, activation=activation_fn, name='predictions')(x)
    
    model = Model(inputs, outputs, name='GoogLeNet')
    return model

# Create model
model = build_googlenet_model(input_shape=(*IMG_SIZE, 3), num_classes=output_units)

# Compile
model.compile(optimizer=Adam(learning_rate=LEARNING_RATE), loss=loss_fn, metrics=['accuracy'])
model.summary()

In [None]:
# === Train Model ===
history = model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=0.2)

In [None]:
# === Evaluate ===
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"📊 Test Accuracy: {test_acc:.2f}")

In [None]:
# === Prediction Function ===
def predict_image(img_path):
    img = load_img(img_path, target_size=IMG_SIZE)
    img_array = img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)

    prediction = model.predict(img_array)
    label = "Malignant" if prediction[0][0] > 0.5 else "Benign"
    confidence = prediction[0][0] if prediction[0][0] > 0.5 else 1 - prediction[0][0]

    print(f"Prediction: {label} ({confidence:.2f})")
    # plt.imshow(load_img(img_path))
    # plt.title(f"{label} ({confidence:.2f})")
    # plt.axis('off')
    # plt.show()

# Example:
# predict_image('/content/drive/MyDrive/dataset_cancer_v1/classificacao_binaria/100X/benign/img001.png')