In [1]:
import pandas as pd
train = pd.read_csv("train-no-tma.csv")
train.head()

Unnamed: 0,image_id,label,image_width,image_height,is_tma
0,38366,LGSC,31951,21718,False
1,63298,HGSC,26067,20341,False
2,54928,CC,36166,31487,False
3,18813,CC,54671,32443,False
4,63429,EC,67783,29066,False


In [2]:
validation = pd.read_csv("validation-no-tma.csv")
validation.head()

Unnamed: 0,image_id,label,image_width,image_height,is_tma
0,9658,CC,52900,45380,False
1,12522,EC,46605,45511,False
2,34845,HGSC,42908,25840,False
3,38585,LGSC,64822,30320,False
4,23523,MC,74723,45387,False


In [3]:
import os

def get_image_path(image_id:int):
    return os.path.join('tiles', str(image_id))

train['tile_path'] = train['image_id'].apply(lambda x: get_image_path(x))
train.head()

Unnamed: 0,image_id,label,image_width,image_height,is_tma,tile_path
0,38366,LGSC,31951,21718,False,tiles/38366
1,63298,HGSC,26067,20341,False,tiles/63298
2,54928,CC,36166,31487,False,tiles/54928
3,18813,CC,54671,32443,False,tiles/18813
4,63429,EC,67783,29066,False,tiles/63429


In [4]:
validation['tile_path'] = validation['image_id'].apply(lambda x: get_image_path(x))
validation.head()

Unnamed: 0,image_id,label,image_width,image_height,is_tma,tile_path
0,9658,CC,52900,45380,False,tiles/9658
1,12522,EC,46605,45511,False,tiles/12522
2,34845,HGSC,42908,25840,False,tiles/34845
3,38585,LGSC,64822,30320,False,tiles/38585
4,23523,MC,74723,45387,False,tiles/23523


In [5]:
import timm
import torch
from torch import nn

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load the EfficientNetV2 model
model_name = 'efficientnetv2_s'  # You can choose from different versions of EfficientNetV2 like 's', 'm', 'l'
model = timm.create_model(model_name, pretrained=False)

# Update the input size if necessary - EfficientNetV2 models can handle a range of input sizes
model.default_cfg['input_size'] = (3, 224, 224)

# Modify the classifier head to have 5 output classes
# The name of the last linear layer could be different based on the model architecture
# For EfficientNetV2 the last linear layer is named 'classifier' or 'head.fc'
if hasattr(model, 'classifier') and isinstance(model.classifier, nn.Linear):
    model.classifier = nn.Linear(model.classifier.in_features, 5)
elif hasattr(model, 'head') and hasattr(model.head, 'fc'):
    model.head.fc = nn.Linear(model.head.fc.in_features, 5)
else:
    print("The model doesn't have a single linear classifier layer as expected")
    
model = model.to(device)


In [6]:
import os
from PIL import Image, ImageOps
from torch.utils.data import Dataset
import torchvision.transforms as transforms

integer_to_label = {
    0: 'HGSC',
    1: 'CC',
    2: 'EC',
    3: 'LGSC',
    4: 'MC',
}

label_to_integer = {
    'HGSC': 0,
    'CC': 1,
    'EC': 2,
    'LGSC': 3,
    'MC': 4,
}

class ImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
        self.image_paths = []
        self.labels = []
        for index, row in dataframe.iterrows():
            folder_path = row['tile_path']
            label = row['label']
            if os.path.isdir(folder_path):  # Check if the folder_path is a valid directory
                for image_name in os.listdir(folder_path):
                    if image_name.lower().endswith('.png'):  # Check if the file is a PNG
                        image_path = os.path.join(folder_path, image_name)
                        self.image_paths.append(image_path)
                        self.labels.append(label)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx])
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label_to_integer[label]


In [7]:
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torchvision.transforms import autoaugment

# Define the image transformations - normalization values are usually model-specific, these are common for EfficientNet
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize the image to 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

train_dataset = ImageDataset(dataframe=train, transform=transform)
val_dataset = ImageDataset(dataframe=validation, transform=transform)

train_dataloader = DataLoader(train_dataset, batch_size=32, num_workers=8, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, num_workers=8, shuffle=True)

In [8]:
import logging
import sys

# Get the root logger
logger = logging.getLogger()

# Optional: Remove all existing handlers from the logger
for handler in logger.handlers[:]:
    logger.removeHandler(handler)

# Set the logging level
logger.setLevel(logging.INFO)

# Create a FileHandler and add it to the logger
file_handler = logging.FileHandler('training_log_scratch_effnetv2_non_tma.txt')
file_handler.setLevel(logging.INFO)
logger.addHandler(file_handler)

# Create a StreamHandler for stderr and add it to the logger
stream_handler = logging.StreamHandler(sys.stderr)
stream_handler.setLevel(logging.ERROR)  # Only log ERROR and CRITICAL messages to stderr
logger.addHandler(stream_handler)

In [None]:
import torch
import torch.optim as optim
import logging
import numpy as np
import math
from sklearn.metrics import accuracy_score

initial_lr = 2e-3

# Function for linear warmup
def warmup_linear(step, warmup_steps=10000):
    if step < warmup_steps:
        return float(step) / float(max(1, warmup_steps))
    progress = float(step - warmup_steps) / float(max(1, len(train_dataloader) - warmup_steps))
    return 0.5 * (1.0 + math.cos(math.pi * progress))

# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=initial_lr, weight_decay=1e-5)

scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda step: warmup_linear(step))

# Calculate class weights
class_counts = np.array([3521456, 1876772, 2126428, 589002, 1053114], dtype=np.float32) # These were derived by looking at the number of files in tile_path for each label
# class_counts = np.array([703, 690, 631, 581, 706], dtype=np.float32) # These were derived by looking at the number of files in tile_path for each label
class_weights = 1. / class_counts
class_weights /= class_weights.sum()

# Convert class weights to tensor
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

# Define the loss function with class weights
criterion = torch.nn.CrossEntropyLoss(weight=class_weights)

num_epochs = 1
best_val_accuracy = 0.0
step = 0

for epoch in range(num_epochs):
    model.train()  # set the model to training mode
    
    for i, (images, labels) in enumerate(train_dataloader, 0):
        # Convert images to PIL format
        images = images.to(device)
        labels = labels.to(device)
        
        # Linearly increase the learning rate
        lr_scale = warmup_linear(step)
        for g in optimizer.param_groups:
            g['lr'] = lr_scale * initial_lr
            
        # Zero the parameter gradients
        optimizer.zero_grad()

        outputs = model(images)
        logits_per_image = outputs
        loss = criterion(logits_per_image, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        scheduler.step()
        step += 1

        logging.info('[%d, %5d] loss: %.3f' % (epoch + 1, step, loss.item()))

        if i % 1000 == 0:
            model.eval()

            all_preds = []
            all_labels = []

            with torch.no_grad():
                for images, labels in val_dataloader:
                    images = images.to(device)
                    labels = labels.numpy()  # Convert labels to numpy array for later use in accuracy calculation

                    outputs = model(images)
                    logits_per_image = outputs
                    probs = logits_per_image.softmax(dim=1)

                    # Get predicted labels
                    preds = torch.argmax(probs, dim=1).cpu().numpy()

                    # Store predictions and labels
                    all_preds.extend(preds)
                    all_labels.extend(labels)
                    
                    if len(all_preds) > 10000:
                        break
        
            # Calculate accuracy
            accuracy = accuracy_score(all_labels, all_preds)
            logging.info("Validation Accuracy: %s" % accuracy)
            model.train()

        if i % 1000 == 0:
            # Assuming 'model' is defined
            torch.save(model.state_dict(), f'effnetv2-scratch-non-tma-models/epoch_{epoch}_batch_{i}.pth')

    # Save model after each epoch
    torch.save(model.state_dict(), f'effnetv2-scratch-non-tma-models/model_epoch_{epoch+1}.pth')
    logging.info(f'Model saved after epoch {epoch+1}')

logging.info('Finished Training')
