<a href="https://colab.research.google.com/github/keelinarseneault/ML-Engineering/blob/main/Exploring_CNN_Models_7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import os

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
train_path = '/Users/karseneault/Desktop/train_data/'
test_path = '/Users/karseneault/Desktop/test_data_v2/'

In [4]:
train = pd.read_csv('drive/MyDrive/train.csv')
test = pd.read_csv('drive/MyDrive/test.csv')

In [5]:
train = train[['file_name', 'label']]
train.columns = ['id', 'label']

In [6]:
print(train.shape)

(79950, 2)


In [7]:
print(train.value_counts('label'))

label
0    39975
1    39975
Name: count, dtype: int64


# **Compare CNN Architectures**

## **ConvNeXT:**

In [41]:
# Use PyTorch

import pandas as pd
import os
from sklearn.model_selection import train_test_split
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torchvision.models as models
import torch.nn as nn
from torch.optim.lr_scheduler import StepLR
from tqdm import tqdm

In [42]:
path = 'drive/MyDrive/Images'

In [10]:
def image_exists(id):
    filepath = f"drive/MyDrive/Images/{id}"
    return os.path.isfile(filepath)

In [11]:
train = train[train["id"].apply(image_exists)]

In [12]:
train.shape

(11040, 2)

**Take a random sample of the training set in order to train on a smaller set of images, while maintaining the balanced ratio between the two classes:**

In [13]:
train_sample = train.groupby("label", group_keys=False).apply(lambda x:x.sample(frac=0.5))

  train_sample = train.groupby("label", group_keys=False).apply(lambda x:x.sample(frac=0.5))


In [14]:
train_df, val_df = train_test_split(
    train_sample,
    test_size=0.05,
    random_state=42,
    stratify=train_sample['label']
)

In [15]:
# Print shapes of the splits
print(f'Train shape: {train_df.shape}')
print(f'Validation shape: {val_df.shape}')

Train shape: (5244, 2)
Validation shape: (276, 2)


In [16]:
class AIImageDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.dataframe.iloc[idx, 0])
        image = Image.open(img_name).convert('RGB')

        if self.transform:
            image = self.transform(image)

        label = self.dataframe.iloc[idx, 1]
        return image, label

In [43]:
train_transforms = transforms.Compose([
    transforms.Resize(232),  # Resize to match ConvNeXt preprocessing
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [44]:
# Set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_dataset = AIImageDataset(train_df, root_dir=path, transform=train_transforms)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, pin_memory=True)

# Validation dataset and loader
val_dataset = AIImageDataset(val_df, root_dir=path, transform=train_transforms)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2, pin_memory=True)


In [45]:
# Load pretrained ConvNeXt Base model
model = models.convnext_base(weights="DEFAULT")

# Freeze all layers initially
for param in model.features.parameters():
    param.requires_grad = False

# Unfreeze the last two stages
for param in model.features[-2:].parameters():
    param.requires_grad = True

# Replace the classifier head with a custom one
model.classifier = nn.Sequential(
    nn.AdaptiveAvgPool2d((1, 1)),  # Global average pooling
    nn.Flatten(),                  # Flatten the tensor
    nn.BatchNorm1d(1024),          # Add BatchNorm here
    nn.Linear(1024, 512),          # First fully connected layer
    nn.ReLU(),                     # Activation function
    nn.Dropout(0.4),               # Dropout for regularization
    nn.Linear(512, 2)              # Output layer (binary classification)
)

optimizer = torch.optim.AdamW([
    {'params': model.features[-2:].parameters(), 'lr': 1e-5},  # Lower LR for backbone
    {'params': model.classifier.parameters(), 'lr': 1e-4}      # Higher LR for classifier
])

criterion = nn.CrossEntropyLoss()
scheduler = StepLR(optimizer, step_size=5, gamma=0.7)

In [47]:
# Ensure model and criterion are on the GPU
model.to(device)
criterion.to(device)

# Training Loop
epochs = 10
train_losses, train_accuracies, val_losses, val_accuracies = [], [], [], []  # Removed val_f1s

# Use Gradient Accumulation to simulate larger batch size without increasing memory usage
grad_accum_steps = 2  # Accumulate gradients over 2 batches

for epoch in range(epochs):
    # Training
    model.train()
    epoch_loss = 0.0
    epoch_accuracy = 0.0
    optimizer.zero_grad()

    for i, (data, label) in enumerate(tqdm(train_loader, desc=f"Training Epoch {epoch+1}")):
        # Move data and label to the GPU
        data, label = data.to(device), label.to(device)

        output = model(data)
        loss = criterion(output, label)
        loss.backward()

        # Gradient Accumulation: update weights only every grad_accum_steps
        if (i + 1) % grad_accum_steps == 0:
            optimizer.step()
            optimizer.zero_grad()

        epoch_loss += loss.item()
        preds = output.argmax(dim=1)
        acc = (preds == label).float().mean().item()
        epoch_accuracy += acc

    epoch_loss /= len(train_loader)
    epoch_accuracy /= len(train_loader)

    train_losses.append(epoch_loss)
    train_accuracies.append(epoch_accuracy)

    print(f"Epoch [{epoch+1}/{epochs}] Train Loss: {epoch_loss:.4f} | Train Acc: {epoch_accuracy:.4f}")

    # Validation
    model.eval()
    val_loss = 0.0
    val_accuracy = 0.0

    with torch.no_grad():  # No need to calculate gradients during validation
        for data, label in val_loader:
            # Move data and label to the GPU
            data, label = data.to(device), label.to(device)

            output = model(data)
            loss = criterion(output, label)

            val_loss += loss.item()
            preds = output.argmax(dim=1)
            acc = (preds == label).float().mean().item()
            val_accuracy += acc

    val_loss /= len(val_loader)
    val_accuracy /= len(val_loader)

    val_losses.append(val_loss)
    val_accuracies.append(val_accuracy)

    print(f"Epoch [{epoch+1}/{epochs}] Val Loss: {val_loss:.4f} | Val Acc: {val_accuracy:.4f}")

    scheduler.step()

Training Epoch 1: 100%|██████████| 164/164 [01:33<00:00,  1.76it/s]

Epoch [1/10] Train Loss: 0.4544 | Train Acc: 0.8197





Epoch [1/10] Val Loss: 0.3206 | Val Acc: 0.8958


Training Epoch 2: 100%|██████████| 164/164 [01:33<00:00,  1.76it/s]

Epoch [2/10] Train Loss: 0.2998 | Train Acc: 0.8845





Epoch [2/10] Val Loss: 0.2609 | Val Acc: 0.9042


Training Epoch 3: 100%|██████████| 164/164 [01:32<00:00,  1.78it/s]

Epoch [3/10] Train Loss: 0.2404 | Train Acc: 0.9071





Epoch [3/10] Val Loss: 0.2131 | Val Acc: 0.8951


Training Epoch 4: 100%|██████████| 164/164 [01:32<00:00,  1.78it/s]

Epoch [4/10] Train Loss: 0.2148 | Train Acc: 0.9167





Epoch [4/10] Val Loss: 0.2073 | Val Acc: 0.9271


Training Epoch 5: 100%|██████████| 164/164 [01:34<00:00,  1.74it/s]

Epoch [5/10] Train Loss: 0.2011 | Train Acc: 0.9205





Epoch [5/10] Val Loss: 0.1730 | Val Acc: 0.9285


Training Epoch 6: 100%|██████████| 164/164 [01:32<00:00,  1.77it/s]

Epoch [6/10] Train Loss: 0.1902 | Train Acc: 0.9243





Epoch [6/10] Val Loss: 0.1755 | Val Acc: 0.9299


Training Epoch 7: 100%|██████████| 164/164 [01:32<00:00,  1.76it/s]

Epoch [7/10] Train Loss: 0.1770 | Train Acc: 0.9296





Epoch [7/10] Val Loss: 0.1742 | Val Acc: 0.9319


Training Epoch 8: 100%|██████████| 164/164 [01:32<00:00,  1.77it/s]

Epoch [8/10] Train Loss: 0.1788 | Train Acc: 0.9264





Epoch [8/10] Val Loss: 0.1364 | Val Acc: 0.9528


Training Epoch 9: 100%|██████████| 164/164 [01:32<00:00,  1.78it/s]

Epoch [9/10] Train Loss: 0.1786 | Train Acc: 0.9313





Epoch [9/10] Val Loss: 0.1616 | Val Acc: 0.9319


Training Epoch 10: 100%|██████████| 164/164 [01:32<00:00,  1.77it/s]

Epoch [10/10] Train Loss: 0.1594 | Train Acc: 0.9408





Epoch [10/10] Val Loss: 0.1914 | Val Acc: 0.9250


## **InceptionV3:**

In [24]:
# Try TensorFlow

import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import matplotlib.pyplot as plt
import tensorflow_hub as hub
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetV2B0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [25]:
path = 'drive/MyDrive/Images/'

In [28]:
import tensorflow as tf # Make sure you have tensorflow installed
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np
from tqdm import tqdm

def preprocess_image(image_path):
    img = load_img(image_path, target_size=(299, 299))  # Adjust target_size if needed
    img = img_to_array(img)
    img = img / 255.0  # Normalize pixel values
    # No need to add batch dimension, done later
    return img

# Preprocess training data with tqdm progress bar
train_images = []
train_labels = []
for index, row in tqdm(train_df.iterrows(), total=train_df.shape[0], desc="Preprocessing images"):
    image_path = os.path.join(path, row['id'])
    train_images.append(preprocess_image(image_path))
    train_labels.append(row['label'])

# Convert to NumPy arrays outside the loop
train_images = np.array(train_images)
train_labels = np.array(train_labels)


Preprocessing images: 100%|██████████| 5244/5244 [54:15<00:00,  1.61it/s]


In [30]:
# Preprocess validation data using list comprehensions and pre-allocation
val_image_paths = [os.path.join(path, image_id) for image_id in val_df['id']]
val_images = np.empty((len(val_df), 299, 299, 3), dtype=np.float32)

# Wrap the loop with tqdm for a progress bar
for i, image_path in enumerate(tqdm(val_image_paths, desc="Preprocessing validation images")):
    val_images[i] = preprocess_image(image_path)[0]

val_labels = val_df['label'].to_numpy()

Preprocessing validation images: 100%|██████████| 276/276 [03:47<00:00,  1.21it/s]


In [31]:
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3))

base_model.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step


In [32]:
# Add layers

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.3)(x)  # Prevent overfitting
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)
output = Dense(1, activation='sigmoid')(x)

# Create final model
model = Model(inputs=base_model.input, outputs=output)

In [33]:
model.compile(optimizer = 'adam',
                    loss='binary_crossentropy',
                    metrics=['accuracy'])

In [36]:
model_history = model.fit(train_images, train_labels, epochs=8, validation_data=(val_images, val_labels))

Epoch 1/8
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 30ms/step - accuracy: 0.9449 - loss: 0.1381 - val_accuracy: 0.5652 - val_loss: 3.2416
Epoch 2/8
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 27ms/step - accuracy: 0.9325 - loss: 0.1530 - val_accuracy: 0.5870 - val_loss: 3.6289
Epoch 3/8
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 27ms/step - accuracy: 0.9236 - loss: 0.1801 - val_accuracy: 0.5507 - val_loss: 3.1137
Epoch 4/8
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 27ms/step - accuracy: 0.9448 - loss: 0.1380 - val_accuracy: 0.5833 - val_loss: 4.0561
Epoch 5/8
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 27ms/step - accuracy: 0.9418 - loss: 0.1431 - val_accuracy: 0.5797 - val_loss: 3.9135
Epoch 6/8
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 28ms/step - accuracy: 0.9403 - loss: 0.1505 - val_accuracy: 0.5616 - val_loss: 3.5651
Epoch 7/8
[1m164/164[0m [

In [38]:
# Evaluate the model on the training data
train_loss, train_accuracy = model.evaluate(train_images, train_labels)

print(f"Accuracy on train data: {train_accuracy:.2%} | Loss: {train_loss:.4f}")

[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 54ms/step - accuracy: 0.9596 - loss: 0.1085
Accuracy on train data: 96.45% | Loss: 0.0976
