In [1]:
MODEL_NAME = 'tma_from_start'

In [2]:
import pandas as pd
import os

def get_image_path(image_id:int):
    return os.path.join('../tiles_768', str(image_id))

train = pd.read_csv(f"../data/train.csv")

train['tile_path'] = train['image_id'].apply(lambda x: get_image_path(x))
train.head()

Unnamed: 0,image_id,label,image_width,image_height,is_tma,tile_path
0,4,HGSC,23785,20008,False,../tiles_768/4
1,66,LGSC,48871,48195,False,../tiles_768/66
2,91,HGSC,3388,3388,True,../tiles_768/91
3,281,LGSC,42309,15545,False,../tiles_768/281
4,286,EC,37204,30020,False,../tiles_768/286


In [3]:
from PIL import Image
import torch
import torch.nn as nn
import timm
from timm.models.layers import DropPath
import copy
from itertools import cycle

device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "timm/eva02_base_patch14_448.mim_in22k_ft_in22k_in1k"

print(f"Using device {device} and model {model_name}")

model = timm.create_model(model_name, pretrained=True)

# 25000 cutmix_mixup was 0.3 and 0.1
drop_path_rate = 0.5
dropout_rate = 0.
head_dropout_rate = 0.3
drop_path_rates = [x.item() for x in torch.linspace(0, drop_path_rate, len(model.blocks))]

# Assign drop path rates
for i, block in enumerate(model.blocks):
    block.drop_path1 = DropPath(drop_prob=drop_path_rates[i])
    block.drop_path2 = DropPath(drop_prob=drop_path_rates[i])
    block.attn.attn_drop = nn.Dropout(p=dropout_rate, inplace=False)
    block.attn.proj_drop = nn.Dropout(p=dropout_rate, inplace=False)
    block.mlp.drop1 = nn.Dropout(p=dropout_rate, inplace=False)
    block.mlp.drop2 = nn.Dropout(p=dropout_rate, inplace=False)

model.head = nn.Linear(model.head.in_features, 5)
model.pos_drop = nn.Dropout(dropout_rate)
model.head_drop = nn.Dropout(head_dropout_rate)

# model_location = 'eva02_base_models_tma_special_pt_2/ema_0.9999_step_50000.pth' # chosen because this is 1 epoch's worth of data
# state_dict = torch.load(model_location, map_location=device)
# model.load_state_dict(state_dict, strict=False)
model = model.to(device)

# Initialize EMA model
ema_decays = [0.999, 0.9995, 0.9998, 0.9999, 0.99995, 0.99998, 0.99999]
ema_models = [copy.deepcopy(model) for _ in range(len(ema_decays))]
for i_ema, ema_model in enumerate(ema_models):
    ema_model = ema_model.to(device)
    ema_model.eval()

Using device cuda and model timm/eva02_base_patch14_448.mim_in22k_ft_in22k_in1k


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [4]:
integer_to_label = {
    0: 'HGSC',
    1: 'CC',
    2: 'EC',
    3: 'LGSC',
    4: 'MC',
}

label_to_integer = {
    'HGSC': 0,
    'CC': 1,
    'EC': 2,
    'LGSC': 3,
    'MC': 4,
}

In [5]:
import os
from PIL import Image
from torch.utils.data import Dataset
import random

class ImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
        self.all_images = []  # Store all images in an interlaced fashion
        self.wsi_label_images = [[] for _ in range(5)]
        self.tma_label_images = [[] for _ in range(5)]

        # Step 1: Collect all images from each folder
        for index, row in dataframe.iterrows():
            folder_path = row['tile_path']
            label = row['label']
            image_id = row['image_id']
            is_tma = row['is_tma']
            if os.path.isdir(folder_path):
                image_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.lower().endswith('.png')]
                if is_tma:
                    self.tma_label_images[label_to_integer[label]].extend([(image_file, label, image_id, is_tma) for image_file in image_files])
                else:
                    self.wsi_label_images[label_to_integer[label]].extend([(image_file, label, image_id, is_tma) for image_file in image_files])

        for i in range(5):
            random.shuffle(self.tma_label_images[i])
            random.shuffle(self.wsi_label_images[i])

        # Step 3: Interlace the images, repeating data as needed
        max_length = max(max(len(tma) for tma in self.tma_label_images), max(len(wsi) for wsi in self.wsi_label_images))
        for i in range(max_length):
            for label in range(5):
                if len(self.tma_label_images[label]) > 0:
                    tma_index = i % len(self.tma_label_images[label])  # Repeat TMA data
                    self.all_images.append(self.tma_label_images[label][tma_index])
                if len(self.wsi_label_images[label]) > 0:
                    wsi_index = i % len(self.wsi_label_images[label])  # Repeat WSI data
                    self.all_images.append(self.wsi_label_images[label][wsi_index])

    def __len__(self):
        return 1_000_000_000

    def __getitem__(self, idx):
        image_path, label, image_id, is_tma = self.all_images[idx]
        image = Image.open(image_path)
        
        if self.transform:
            image = self.transform(image)

        return image, label_to_integer[label]

In [6]:
from torch.utils.data import DataLoader, WeightedRandomSampler
import torchvision.transforms as transforms
import torchvision.transforms.v2 as v2
from torch.utils.data import default_collate

BATCH_SIZE = 16

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(size=448, scale=(0.5, 1.0), ratio=(0.75, 1.33)),
    transforms.RandAugment(9, 15, 31),
    transforms.Resize(448),
    transforms.ToTensor(),
    transforms.Normalize(mean=[
        0.48145466,
        0.4578275,
        0.40821073
    ], std=[
        0.26862954,
        0.26130258,
        0.27577711
    ]),
    # transforms.RandomErasing(p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False),
    # transforms.RandomErasing(p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False),
    # transforms.RandomErasing(p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False),
])

val_transform = transforms.Compose([
    transforms.Resize(448),
    transforms.ToTensor(),
    transforms.Normalize(mean=[
        0.48145466,
        0.4578275,
        0.40821073
    ], std=[
        0.26862954,
        0.26130258,
        0.27577711
    ]),
])

train_dataset = ImageDataset(dataframe=train, transform=train_transform)

cutmix = v2.CutMix(num_classes=5)
mixup = v2.MixUp(num_classes=5)
cutmix_or_mixup = v2.RandomChoice([cutmix, mixup])

def collate_fn(batch):
    return cutmix_or_mixup(*default_collate(batch))

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=7, collate_fn=collate_fn)

In [7]:
import logging
import sys

# Get the root logger
logger = logging.getLogger()

# Optional: Remove all existing handlers from the logger
for handler in logger.handlers[:]:
    logger.removeHandler(handler)

# Set the logging level
logger.setLevel(logging.INFO)

# Create a FileHandler and add it to the logger
file_handler = logging.FileHandler(f'logs/eva02_base_train_{MODEL_NAME}.txt')
file_handler.setLevel(logging.INFO)
logger.addHandler(file_handler)

# Create a StreamHandler for stderr and add it to the logger
stream_handler = logging.StreamHandler(sys.stderr)
stream_handler.setLevel(logging.ERROR)  # Only log ERROR and CRITICAL messages to stderr
logger.addHandler(stream_handler)

In [None]:
import torch
import torch.optim as optim
import logging
import numpy as np
import math
from sklearn.metrics import balanced_accuracy_score
import random
from torch.cuda.amp import GradScaler, autocast

initial_lr = 3e-6
final_lr = 3e-8
num_epochs = 10000

# Function for linear warmup
def learning_rate(step, warmup_steps=500, max_steps=50000):
    if step < warmup_steps:
        return initial_lr * (float(step) / float(max(1, warmup_steps)))
    elif step < max_steps:
        progress = (float(step - warmup_steps) / float(max(1, max_steps - warmup_steps)))
        cos_component = 0.5 * (1 + math.cos(math.pi * progress))
        return final_lr + (initial_lr - final_lr) * cos_component
    else:
        return final_lr

def update_ema_variables(model, ema_model, alpha, global_step):
    # Update the EMA model parameters
    with torch.no_grad():
        for ema_param, param in zip(ema_model.parameters(), model.parameters()):
            ema_param.data.mul_(alpha).add_(param.data, alpha=1 - alpha)

scaler = GradScaler()
optimizer = optim.AdamW(model.parameters(), lr=initial_lr, weight_decay=1e-7)

# Define the weighted loss function
criterion = torch.nn.CrossEntropyLoss()

best_val_accuracy = 0.0
step = 0

for epoch in range(num_epochs):
    model.train()  # set the model to training mode
    
    for i, (images, labels) in enumerate(train_dataloader, 0):
        # Convert images to PIL format
        images = images.to(device)
        labels = labels.to(device)
        
        # Linearly increase the learning rate
        lr = learning_rate(step)
        for g in optimizer.param_groups:
            # g['lr'] = g['lr'] * lr / initial_lr
            g['lr'] = lr

        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass with autocast
        with autocast():
            outputs = model(images)
            logits_per_image = outputs
            loss = criterion(logits_per_image, labels)

        # Backward pass
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        for i_ema, ema_model in enumerate(ema_models):
            update_ema_variables(model, ema_model, ema_decays[i_ema], step)

        logging.info('[%d, %5d] loss: %.3f' % (epoch + 1, step, loss.item()))

        if step % 1000 == 0:
            for i_ema, ema_model in enumerate(ema_models):
                torch.save(ema_model.state_dict(), f'eva02_base_models_{MODEL_NAME}/ema_{ema_decays[i_ema]}_step_{step}.pth')
            logging.info(f'Models saved after epoch {epoch} and step {step}')\

            model.train()

        if step == 50000:
            break

        step += 1

    if step >= 50000:
        break

In [None]:
import shutil
import os

def duplicate_ipynb_with_new_name(src_file_path, dest_dir, new_name):
    """
    Duplicate an IPython notebook file to a new directory with a new file name.

    Parameters:
    src_file_path (str): The path of the source IPython notebook file.
    dest_dir (str): The destination directory where the file will be copied.
    new_name (str): The new file name for the duplicated notebook.

    Returns:
    str: The path of the duplicated file with the new name.
    """
    # Check if the new name contains the '.ipynb' extension, add if not
    if not new_name.endswith('.ipynb'):
        new_name += '.ipynb'

    # Creating the destination file path with the new name
    dest_file_path = os.path.join(dest_dir, new_name)

    # Copying the file to the new directory
    shutil.copy(src_file_path, dest_file_path)

    return dest_file_path

src_file = "eva02-base-finetune.ipynb"
dest_directory = "notebook_history"
new_filename = f"{MODEL_NAME}.ipynb"
duplicate_ipynb_with_new_name(src_file, dest_directory, new_filename)


In [7]:
torch.cuda.empty_cache()

In [12]:
import random
from sklearn.metrics import balanced_accuracy_score

ema_model = model
# state_dict = torch.load('eva02_base_models_tma_special/ema_0.9999_step_10000.pth', map_location=device)
state_dict = torch.load('eva02_base_models_tma_special_pt_2/ema_0.9999_step_50000.pth', map_location=device)
ema_model.load_state_dict(state_dict, strict=False)
ema_model = ema_model.to(device)
ema_model.eval()

# Maximum number of tiles per image
MAX_TILES_PER_IMAGE = 1

# Maximum number of images per batch
MAX_IMAGES_PER_BATCH = 8  # Adjust based on model capacity and memory constraints

image_ids = []
logits = []
labels = []
is_tmas = []

with torch.no_grad():
    # Temporary storage for the current batch
    batch_tiles = []
    batch_image_ids = []
    batch_labels = []
    batch_is_tmas = []

    for _, row in train.iterrows():
        print(row['image_id'])
        path = row['tile_path']
        all_files = [f for f in os.listdir(path) if f.lower().endswith('.png')]
        
        # Randomly sample tiles from this image
        sample_size = min(MAX_TILES_PER_IMAGE, len(all_files))
        sampled_files = random.sample(all_files, sample_size)

        image_tiles = []
        for image_name in sampled_files:
            image_path = os.path.join(path, image_name)
            sub_image = Image.open(image_path)
            tile = val_transform(sub_image).unsqueeze(0)
            image_tiles.append(tile)

        # Add this image's tiles to the batch
        batch_tiles.append(torch.concat(image_tiles, dim=0))
        batch_image_ids.append(row['image_id'])
        batch_labels.append(row['label'])
        batch_is_tmas.append(row['is_tma'])

        # Process the batch if it's full or this is the last row
        if len(batch_tiles) == MAX_IMAGES_PER_BATCH or row.equals(train.iloc[-1]):
            # Concatenate tiles from each image in the batch
            batch_input = torch.concat(batch_tiles, dim=0).to(device)

            # Run the batch through the model
            batch_output = ema_model(batch_input)

            # Split the outputs back into per-image groups and store them
            start = 0
            for i, tiles in enumerate(batch_tiles):
                end = start + tiles.shape[0]
                logits.append(batch_output[start:end])
                start = end

            image_ids.extend(batch_image_ids)
            labels.extend(batch_labels)
            is_tmas.extend(batch_is_tmas)

            # Reset the batch
            batch_tiles = []
            batch_image_ids = []
            batch_labels = []
            batch_is_tmas = []
        

4
66
91
281
286
431
706
970
1020
1080
1101
1252
1289
1295
1660
1666
1774
1925
1943
1952
2097
2227
2391
2666
2706
2906
3055
3084
3092
3098
3191
3222
3264
3511
3672
3881
3997
4134
4211
4608
4797
4827
4877
4963
5015
5114
5251
5264
5265
5307
5456
5851
5852
5970
5992
6140
6175
6281
6359
6363
6449
6558
6582
6793
6843
6898
6951
7204
7329
7482
7490
7955
8130
8213
8240
8279
8280
8531
8713
8805
8985
9154
9183
9200
9254
9341
9509
9658
9697
10077
10143
10246
10252
10469
10548
10642
10800
10896
11263
11417
11431
11557
11559
12159
12222
12244
12442
12522
12902
13364
13387
13526
13568
13987
14039
14051
14127
14312
14401
14424
14532
14542
14617
15139
15188
15209
15221
15231
15293
15470
15486
15583
15671
15742
15871
15912
16042
16064
16209
16325
16494
16876
16986
17067
17174
17291
17365
17416
17487
17637
17738
17854
18014
18138
18196
18547
18568
18607
18810
18813
18896
18914
18981
19030
19157
19255
19512
19569
20205
20312
20316
20329
20463
20670
20858
20882
21020
21232
21260
21303
21373
21432
21445
219

In [11]:
# 50000 tma pt 2 0.9999 16

predictions = []
for image_logits in logits:
    summed_logits = image_logits.sum(dim=0)
    
    max_vote_key = summed_logits.argmax().cpu().item()
    predictions.append(integer_to_label[max_vote_key])

tma_accuracy, non_tma_accuracy = balanced_accuracy_score([label for i, label in enumerate(labels) if is_tmas[i]], [prediction for i, prediction in enumerate(predictions) if is_tmas[i]]), balanced_accuracy_score([label for i, label in enumerate(labels) if not is_tmas[i]], [prediction for i, prediction in enumerate(predictions) if not is_tmas[i]])
tma_accuracy, non_tma_accuracy, (tma_accuracy + non_tma_accuracy) / 2

(1.0, 0.9574833130600616, 0.9787416565300309)

In [9]:
# 50000 tma pt 2 0.9999 8

predictions = []
for image_logits in logits:
    summed_logits = image_logits.sum(dim=0)
    
    max_vote_key = summed_logits.argmax().cpu().item()
    predictions.append(integer_to_label[max_vote_key])

tma_accuracy, non_tma_accuracy = balanced_accuracy_score([label for i, label in enumerate(labels) if is_tmas[i]], [prediction for i, prediction in enumerate(predictions) if is_tmas[i]]), balanced_accuracy_score([label for i, label in enumerate(labels) if not is_tmas[i]], [prediction for i, prediction in enumerate(predictions) if not is_tmas[i]])
tma_accuracy, non_tma_accuracy, (tma_accuracy + non_tma_accuracy) / 2

(1.0, 0.9579579847406763, 0.9789789923703381)

In [13]:
# 50000 tma pt 2 0.9999 1

predictions = []
for image_logits in logits:
    summed_logits = image_logits.sum(dim=0)
    
    max_vote_key = summed_logits.argmax().cpu().item()
    predictions.append(integer_to_label[max_vote_key])

tma_accuracy, non_tma_accuracy = balanced_accuracy_score([label for i, label in enumerate(labels) if is_tmas[i]], [prediction for i, prediction in enumerate(predictions) if is_tmas[i]]), balanced_accuracy_score([label for i, label in enumerate(labels) if not is_tmas[i]], [prediction for i, prediction in enumerate(predictions) if not is_tmas[i]])
tma_accuracy, non_tma_accuracy, (tma_accuracy + non_tma_accuracy) / 2

(1.0, 0.8604749089484631, 0.9302374544742316)

In [9]:
# 50000 tma from start 0.9999

predictions = []
for image_logits in logits:
    summed_logits = image_logits.sum(dim=0)
    
    max_vote_key = summed_logits.argmax().cpu().item()
    predictions.append(integer_to_label[max_vote_key])

tma_accuracy, non_tma_accuracy = balanced_accuracy_score([label for i, label in enumerate(labels) if is_tmas[i]], [prediction for i, prediction in enumerate(predictions) if is_tmas[i]]), balanced_accuracy_score([label for i, label in enumerate(labels) if not is_tmas[i]], [prediction for i, prediction in enumerate(predictions) if not is_tmas[i]])
tma_accuracy, non_tma_accuracy, (tma_accuracy + non_tma_accuracy) / 2

(1.0, 0.8632343340268964, 0.9316171670134482)

In [36]:
import os
from PIL import Image, ImageFile
import torch
import timm
from timm.models.layers import DropPath
import pandas as pd
import torchvision.transforms as transforms
import random
import torch.nn as nn
import numpy as np
import time
import torch.nn.functional as F
from timm.layers import RotaryEmbeddingCat

Image.MAX_IMAGE_PIXELS = None
ImageFile.LOAD_TRUNCATED_IMAGES = True

# Set up the device and the model
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load the CSV file
test = pd.read_csv("../data/train.csv")

# Define the paths
image_dir = "../data/train_images"
model_locations = {
    'all_the_data': "eva02_base_models_tma_special_pt_2/ema_0.9999_step_50000.pth",
#     'all_the_data': "/kaggle/input/eva02-cutmix/eva02-cutmix/cutmix_mixup_all_the_data/ema_0.9999_step_115000.pth",
}

model_name = "timm/eva02_base_patch14_448.mim_in22k_ft_in22k_in1k"
models = { fold: timm.create_model(model_name, pretrained=False) for fold in model_locations }
NEW_IMG_SIZE = 672

for fold in models:
    
    drop_path_rate = 0.5
    dropout_rate = 0.
    head_dropout_rate = 0.3
    drop_path_rates = [x.item() for x in torch.linspace(0, drop_path_rate, len(models[fold].blocks))]

    # Assign drop path rates
    for i, block in enumerate(models[fold].blocks):
        block.drop_path1 = DropPath(drop_prob=drop_path_rates[i])
        block.drop_path2 = DropPath(drop_prob=drop_path_rates[i])
        block.attn.attn_drop = nn.Dropout(p=dropout_rate, inplace=False)
        block.attn.proj_drop = nn.Dropout(p=dropout_rate, inplace=False)
        block.mlp.drop1 = nn.Dropout(p=dropout_rate, inplace=False)
        block.mlp.drop2 = nn.Dropout(p=dropout_rate, inplace=False)

    models[fold].head = nn.Linear(models[fold] .head.in_features, 5)
    models[fold].pos_drop = nn.Dropout(dropout_rate)
    models[fold].head_drop = nn.Dropout(head_dropout_rate)

    # Load the weights
    state_dict = torch.load(model_locations[fold], map_location=device)
    models[fold].load_state_dict(state_dict, strict=False)
    
    ########################################################################################################################################

    # Calculate the size of the grid of patches
    num_patches_side = NEW_IMG_SIZE // 14
    num_patches = num_patches_side ** 2

    # Extract the original positional embeddings, excluding the class token
    pos_embed = models[fold].pos_embed
    old_num_patches_side = int((pos_embed.size(1) - 1) ** 0.5)
    pos_grid = pos_embed[:, 1:].reshape(1, old_num_patches_side, old_num_patches_side, -1)
    pos_grid = pos_grid.permute(0, 3, 1, 2).contiguous()

    # Resize using bilinear interpolation (make sure to keep the embedding dimension unchanged)
    new_pos_grid = F.interpolate(pos_grid, size=(num_patches_side, num_patches_side), mode='bilinear', align_corners=False)

    # Flatten the grid back to a sequence and re-add the class token
    new_pos_embed = torch.cat([pos_embed[:, :1], new_pos_grid.permute(0, 2, 3, 1).contiguous().view(1, num_patches_side * num_patches_side, -1)], dim=1)

    # Update the positional embeddings
    models[fold].pos_embed = nn.Parameter(new_pos_embed)
    models[fold].patch_embed.img_size = (NEW_IMG_SIZE, NEW_IMG_SIZE)
    models[fold].patch_embed.grid_size = (NEW_IMG_SIZE // 14, NEW_IMG_SIZE // 14)
    
    models[fold].rope = RotaryEmbeddingCat(
        768 // 12,
        in_pixels=False,
        feat_shape=models[fold].patch_embed.grid_size,
        ref_feat_shape=(16, 16),
    )

    ########################################################################################################################################
    
    models[fold] = models[fold].to(device)
    models[fold] = models[fold].eval()

    

def calculate_entropy(tile):
    # Convert to grayscale if the image is RGB
    if tile.mode == 'RGB':
        tile = tile.convert('L')

    # Flatten the tile and calculate histogram
    pixel_counts = np.histogram(tile, bins=range(256))[0]

    # Normalize to get probabilities
    probabilities = pixel_counts / np.sum(pixel_counts)

    # Filter out zero probabilities and calculate entropy
    probabilities = probabilities[probabilities > 0]
    entropy = -np.sum(probabilities * np.log2(probabilities))

    return entropy

integer_to_label = {
    0: 'HGSC',
    1: 'CC',
    2: 'EC',
    3: 'LGSC',
    4: 'MC',
}

label_to_integer = {
    'HGSC': 0,
    'CC': 1,
    'EC': 2,
    'LGSC': 3,
    'MC': 4,
}

transform = transforms.Compose([
    transforms.Resize(NEW_IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[
        0.48145466,
        0.4578275,
        0.40821073
    ], std=[
        0.26862954,
        0.26130258,
        0.27577711
    ]),
])

def process_image(image_path, batch_size=16, total_samples=64):
    random.seed(0)
    with Image.open(image_path) as img:
        width, height = img.size

        # Define uniform tiles
        total_tiles_to_choose_from = 512
        uniform_tiles = [(j, i) for j in range(0, width - 768, int((((width - 768) ** 2) / total_tiles_to_choose_from) ** 0.5)) for i in range(0, height - 768, int((((height - 768) ** 2) / total_tiles_to_choose_from) ** 0.5))]
        # uniform_tiles = [(j, i) for j in range(0, width, int((((width - 768) ** 2) / total_tiles_to_choose_from) ** 0.5)) for i in range(0, (height - 768), int(((width ** 2) / total_tiles_to_choose_from) ** 0.5))]
        random.shuffle(uniform_tiles)

        # Initialize statistics tensors
        sum_logits = torch.zeros(5).to(device)
        sum_probabilities = torch.zeros(5).to(device)
        sum_log_probabilities = torch.zeros(5).to(device)
        sum_log_neg_probabilities = torch.zeros(5).to(device)
        max_logits = torch.full((5,), float('-inf')).to(device)
        max_probabilities = torch.full((5,), float('-inf')).to(device)
        total_tiles_processed = 0

        # Prepare a list to hold image tiles
        batch_tiles = []

        while total_tiles_processed < total_samples and len(uniform_tiles) > 0:
            if len(batch_tiles) < batch_size:
                j, i = uniform_tiles[0]
                del uniform_tiles[0]

                tile = img.crop((j, i, j + 768, i + 768))

                print(tile.size)
                if tile.size[0] == 768 and tile.size[1] == 768 and calculate_entropy(tile) > 3:
                    tile = transform(tile).unsqueeze(0)
                    batch_tiles.append(tile)

#             # Process tiles in batches
            if len(batch_tiles) == batch_size or total_tiles_processed + len(batch_tiles) == total_samples or len(uniform_tiles) == 0:
                batch_tiles_tensor = torch.cat(batch_tiles, dim=0).to(device)
                
                for fold in models:
                    outputs = models[fold](batch_tiles_tensor)
                    logits = outputs
                    probs = logits.softmax(dim=1)
                    sum_logits += logits.sum(dim=0) / len(models)
                    sum_probabilities += probs.sum(dim=0) / len(models)
                    sum_log_probabilities += torch.log(probs).sum(dim=0) / len(models)
                    sum_log_neg_probabilities += torch.log(1 - probs).sum(dim=0) / len(models)
                    max_logits = torch.max(max_logits, logits.max(dim=0)[0])
                    max_probabilities = torch.max(max_probabilities, probs.max(dim=0)[0])
                
                # Reset the batch
                total_tiles_processed += len(batch_tiles)
                batch_tiles = []

    return {
        'average_logits': sum_logits / total_tiles_processed, 
        'average_probabilities': sum_probabilities / total_tiles_processed, 
        'average_log_probabilities': sum_log_probabilities / total_tiles_processed, 
        'average_log_neg_probabilities': sum_log_neg_probabilities / total_tiles_processed, 
        'max_logits': max_logits, 
        'max_probabilities': max_probabilities,
    }


# Initialize a DataFrame to store the results
submission = pd.DataFrame(columns=['image_id', 'label'])

total = 0
total_correct = 0
# Loop over the image IDs and process the images
shuffled_image_ids = test['image_id'].tolist()

random.seed(0)
random.shuffle(shuffled_image_ids)

# Dictionary to store probabilities
probabilities_dict = {}
labels_dict = {}

with torch.no_grad():
    # Process images and store their probabilities
#     do_it = False
    
    for image_id in shuffled_image_ids:
#         if not do_it:
#             if image_id == 52308:
#                 do_it = True
#             continue
        image_path = os.path.join(image_dir, f"{image_id}.png")
#         t0 = time.time()
        results = process_image(image_path)
        
        average_result = torch.zeros(5)
        for method in results:
            results[method] = results[method].cpu()
            if method == 'average_log_neg_probabilities':
                mean = (-results[method]).mean()
                std = (-results[method]).std()
                average_result += (-results[method] - mean) / std
            else:
                mean = results[method].mean()
                std = results[method].std()
                average_result += (results[method] - mean) / std

        average_result = average_result / len(results)
#         print(time.time() - t0)
        
        probabilities_dict[image_id] = (results['average_logits']).max().item()
        labels_dict[image_id] = integer_to_label[(results['average_logits']).argmax().item()]
        print(f"{image_id}_{test[test['image_id'] == image_id]['is_tma'].item()}: Guessed {labels_dict[image_id]} was really {test[test['image_id'] == image_id]['label'].item()} | {results['average_logits']}")

sorted_probabilities = sorted(probabilities_dict.items(), key=lambda x: x[1])

threshold_prob = sorted_probabilities[int(len(sorted_probabilities) * 0.25)][1]

# Initialize a list to store tuples of (image_id, label)
modified_labels = []

# Iterate over sorted probabilities and create tuples of image_id and label
for index, (image_id, probability) in enumerate(sorted_probabilities):
    label = 'Other' if probability < threshold_prob else labels_dict[image_id]
    modified_labels.append((image_id, label))

# Create a new DataFrame from the list of tuples
new_submission = pd.DataFrame(modified_labels, columns=['image_id', 'label'])

# Concatenate this new DataFrame with the original submission DataFrame
submission = pd.concat([submission, new_submission])

# Ensure there are no duplicate entries and reset the index
submission = submission.drop_duplicates(subset='image_id').reset_index(drop=True)

# submission.to_csv("/kaggle/working/submission.csv", index=False)

12902_False: Guessed EC was really EC | tensor([-0.4551, -1.7309,  3.3703, -2.5687,  1.3749])
18014_False: Guessed CC was really CC | tensor([-1.4731,  5.9430,  1.7641, -3.1101, -2.2189])
24617_False: Guessed HGSC was really HGSC | tensor([ 5.7943, -0.0595, -1.1560, -2.6102, -0.0116])
39990_False: Guessed HGSC was really HGSC | tensor([ 4.9722, -0.5116, -0.9595, -0.1688, -0.7827])
1925_False: Guessed HGSC was really HGSC | tensor([ 2.7392e+00,  1.9131e-01, -5.7958e-04, -1.4853e+00, -1.9090e-02])
21232_False: Guessed HGSC was really EC | tensor([ 1.3298, -1.8386,  1.1597,  0.8315, -0.3904])
6175_False: Guessed HGSC was really HGSC | tensor([ 5.6998,  1.1837, -2.0748, -1.7332, -0.4406])
47960_False: Guessed CC was really CC | tensor([ 0.8279,  5.6552,  1.1812, -3.1329, -3.1216])
36063_False: Guessed CC was really CC | tensor([-1.2599,  6.5254,  1.7078, -3.0790, -3.0787])
53688_False: Guessed HGSC was really HGSC | tensor([ 4.3264e+00, -1.5734e-03, -2.6865e-01, -8.7101e-01, -1.1869e+00])


KeyboardInterrupt: 

In [31]:
total_tiles_to_choose_from = 512
for width in range(0, 10000, 100):
    for height in range(0, 10000, 100):
        if width < 1000 or height < 1000:
            continue
        # print(int((((width - 768) ** 2) / total_tiles_to_choose_from) ** 0.5), int((((height - 768) ** 2) / total_tiles_to_choose_from) ** 0.5))
        uniform_tiles = [(j, i) for j in range(0, width - 768, int((((width - 768) ** 2) / total_tiles_to_choose_from) ** 0.5)) for i in range(0, height - 768, int((((height - 768) ** 2) / total_tiles_to_choose_from) ** 0.5))]
        # uniform_tiles = [(j, i) for j in range(0, width, int((((width - 768) ** 2) / total_tiles_to_choose_from) ** 0.5)) for i in range(0, (height - 768), int(((width ** 2) / total_tiles_to_choose_from) ** 0.5))]
        # print(len(uniform_tiles))
        max_0 = 0
        max_1 = 0
        for thing in uniform_tiles:
            if thing[0] > max_0:
                max_0 = thing[0]
            if thing[1] > max_1:
                max_1 = thing[1]
        if len(uniform_tiles) < total_tiles_to_choose_from:
            print(len(uniform_tiles), width, height)
        if max_0 + 768 >= width or max_1 + 768 > height:
            print(max_0, max_1, width, height)

In [6]:
import difflib

def compare_strings(string1, string2):
    lines1 = string1.splitlines()
    lines2 = string2.splitlines()

    diff = difflib.unified_diff(lines1, lines2, lineterm='')

    return list(diff)

differences = compare_strings(string_a, string_b)
for diff in differences:
    print(diff)


--- 
+++ 
@@ -25,7 +25,8 @@
 # Define the paths
 image_dir = "/kaggle/input/UBC-OCEAN/test_images"
 model_locations = {
-    'all_the_data': "/kaggle/input/eva02-tma-special/eva02-tma-special/ema_0.9999_step_10000.pth",
+    'all_the_data': "/kaggle/input/eva02-tma-special/eva02-tma-special/pt_2/ema_0.9999_step_50000.pth",
+#     'all_the_data': "/kaggle/input/eva02-cutmix/eva02-cutmix/cutmix_mixup_all_the_data/ema_0.9999_step_115000.pth",
 }
 
 model_name = "timm/eva02_base_patch14_448.mim_in22k_ft_in22k_in1k"
@@ -144,9 +145,9 @@
     random.seed(0)
     with Image.open(image_path) as img:
         width, height = img.size
-        
+
         # Define uniform tiles
-        total_tiles_to_choose_from = 128
+        total_tiles_to_choose_from = 512
         uniform_tiles = [(j, i) for j in range(0, width, int((((width - 768) ** 2) / total_tiles_to_choose_from) ** 0.5)) for i in range(0, (height - 768), int(((width ** 2) / total_tiles_to_choose_from) ** 0.5))]
         random.shuffle(u