In [None]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from sklearn.model_selection import train_test_split


In [None]:
import pandas as pd

csv_path = "/content/drive/My Drive/MIDOG25_Atypical_Classification_Train_Set.csv"
df = pd.read_csv(csv_path)

print(df.columns)  # Inspect column names


In [None]:
image_dir = "/content/drive/My Drive/MIDOG25_Binary_Classification_Train_Set/"
df['filename'] = df['filename'].apply(lambda x: x.replace(".tiff", ".png"))
df['filepath'] = image_dir + df['filename']


In [None]:
label_map = {'NMF': 0, 'AMF': 1}
df['label'] = df['majority'].map(label_map)

# Drop rows where image is missing (important!)
import os
df = df[df['filepath'].apply(os.path.exists)]


In [None]:
from sklearn.model_selection import train_test_split

train_paths, val_paths, train_labels, val_labels = train_test_split(
    df['filepath'].values,
    df['label'].values,
    test_size=0.2,
    stratify=df['label'].values,
    random_state=42
)


In [None]:
import tensorflow as tf

BATCH_SIZE = 32
IMG_SIZE = (224, 224)

def process_image(file_path, label):
    img = tf.io.read_file(file_path)
    img = tf.image.decode_png(img, channels=3)
    img = tf.image.resize(img, IMG_SIZE)
    img = img / 255.0
    return img, label

train_ds = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
val_ds = tf.data.Dataset.from_tensor_slices((val_paths, val_labels))

train_ds = (train_ds
    .shuffle(1000)
    .map(process_image, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(BATCH_SIZE)
    .prefetch(tf.data.AUTOTUNE)
)

val_ds = (val_ds
    .map(process_image, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(BATCH_SIZE)
    .prefetch(tf.data.AUTOTUNE)
)


In [None]:
import matplotlib.pyplot as plt

class_names = ['NMF', 'AMF']

for images, labels in train_ds.take(1):
    plt.figure(figsize=(10, 10))
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy())
        plt.title(class_names[labels[i].numpy()])
        plt.axis("off")


CNN


In [None]:
model.fit(train_ds, validation_data=val_ds, epochs=5)


In [None]:
from collections import Counter

label_counter = Counter()

for _, labels in train_ds.unbatch():
    label_counter[int(labels.numpy())] += 1

print("Label distribution in train_ds:", label_counter)



Compute Class Weights

In [None]:
# Step 1a: Encode 'majority' column to numeric labels
label_map = {'NMF': 0, 'AMF': 1}
df['label'] = df['majority'].map(label_map)

# Step 1b: Build full path for each image
image_dir = '/content/drive/My Drive/MIDOG25_Binary_Classification_Train_Set'
df['filepath'] = df['filename'].apply(lambda x: os.path.join(image_dir, x.replace('.tiff', '.png')))

# Step 1c: Create dictionary {filepath: label}
path_to_label = dict(zip(df['filepath'], df['label']))


In [None]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.array([0, 1]),
    y=train_labels
)

class_weight_dict = dict(enumerate(class_weights))
print("Class weights:", class_weight_dict)


Common Setup (for both models)



In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import MobileNetV2, ResNet50

IMG_SIZE = (224, 224)
INPUT_SHAPE = IMG_SIZE + (3,)

# This stays the same for both models
base_input = tf.keras.Input(shape=INPUT_SHAPE)

MobileNetV2 Model



In [None]:
mobilenet_base = MobileNetV2(input_shape=INPUT_SHAPE, include_top=False, weights='imagenet')
mobilenet_base.trainable = False  # Freeze base

mobilenet_model = models.Sequential([
    mobilenet_base,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.3),
    layers.Dense(1, activation='sigmoid')  # Binary classification
])

mobilenet_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
mobilenet_model.summary()


In [None]:
mobilenet_model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=5,
    class_weight=class_weight_dict
)


ResNet

In [None]:
resnet_base = ResNet50(input_shape=INPUT_SHAPE, include_top=False, weights='imagenet')
resnet_base.trainable = False  # Freeze base

resnet_model = models.Sequential([
    resnet_base,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.3),
    layers.Dense(1, activation='sigmoid')
])

resnet_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
resnet_model.summary()


In [None]:
resnet_model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=5,
    class_weight=class_weight_dict
)


In [None]:
from collections import Counter
y_val = []
for _, labels in val_ds.unbatch():
    y_val.append(int(labels.numpy()))
print(Counter(y_val))


In [None]:
df['label'] = df['majority'].map({'NMF': 0, 'AMF': 1})  # or reverse if needed


In [None]:
def preprocess_image(file_path, label):
    image = tf.io.read_file(file_path)
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.resize(image, [224, 224])
    image = tf.keras.applications.resnet50.preprocess_input(image)
    return image, label


In [None]:
base_model.trainable = False  # freeze base


In [None]:
resnet_model.fit(train_ds, validation_data=val_ds, epochs=5)
