In [1]:
import pandas as pd
import os

def get_image_path(image_id:int):
    return os.path.join('../tiles_768', str(image_id))

train = pd.read_csv(f"../data/train.csv")

train['tile_path'] = train['image_id'].apply(lambda x: get_image_path(x))
train.head()

Unnamed: 0,image_id,label,image_width,image_height,is_tma,tile_path
0,4,HGSC,23785,20008,False,../tiles_768/4
1,66,LGSC,48871,48195,False,../tiles_768/66
2,91,HGSC,3388,3388,True,../tiles_768/91
3,281,LGSC,42309,15545,False,../tiles_768/281
4,286,EC,37204,30020,False,../tiles_768/286


In [2]:
from PIL import Image
import torch
import torch.nn as nn
import timm
from timm.models.layers import DropPath
import copy

device = "cuda" if torch.cuda.is_available() else "cpu"
    model_name = "timm/eva02_base_patch14_448.mim_in22k_ft_in22k_in1k"

print(f"Using device {device} and model {model_name}")

model = timm.create_model(model_name, pretrained=True)

# Define the maximum drop path rate
# for folds 0 1 3 4
max_drop_path_rate = 0.3
dropout_rate = 0.1
"""
# for folds 2
max_drop_path_rate = 0.15
dropout_rate = 0.0
"""

drop_path_rates = [x.item() for x in torch.linspace(0, max_drop_path_rate, len(model.blocks))]

# Assign drop path rates
for i, block in enumerate(model.blocks):
    block.drop_path1 = DropPath(drop_prob=drop_path_rates[i])
    block.drop_path2 = DropPath(drop_prob=drop_path_rates[i])
    block.attn.attn_drop = nn.Dropout(p=dropout_rate, inplace=False)
    block.attn.proj_drop = nn.Dropout(p=dropout_rate, inplace=False)
    block.mlp.drop1 = nn.Dropout(p=dropout_rate, inplace=False)
    block.mlp.drop2 = nn.Dropout(p=dropout_rate, inplace=False)

model.head = nn.Linear(model.head.in_features, 5)

state_dict = torch.load('eva02_base_models/fold_1/epoch_0_step_16000.pth', map_location=device)
model.load_state_dict(state_dict, strict=False)

# model.head == nn.Linear(768, 5)
model.head = nn.Identity()

model = model.to(device)

Using device cuda and model timm/eva02_base_patch14_448.mim_in22k_ft_in22k_in1k


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [3]:
import os
from PIL import Image
from torch.utils.data import Dataset
import random

integer_to_label = {
    0: 'HGSC',
    1: 'CC',
    2: 'EC',
    3: 'LGSC',
    4: 'MC',
}

label_to_integer = {
    'HGSC': 0,
    'CC': 1,
    'EC': 2,
    'LGSC': 3,
    'MC': 4,
}

class ImageDataset(Dataset):
    def __init__(self, folder_path, transform=None):
        self.folder_path = folder_path
        self.transform = transform
        self.image_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.lower().endswith('.png')]

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_path = random.choice(self.image_files)
        image = Image.open(image_path)

        if self.transform:
            image = self.transform(image)
        
        return image
            

In [4]:
import torchvision.transforms as transforms

train_transform = transforms.Compose([
    transforms.Resize(448),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]),
])

In [None]:
from torch.utils.data import DataLoader
import torch
import os

BATCH_SIZE = 256
save_folder = '../image_tensors'

# Make sure the save directory exists
os.makedirs(save_folder, exist_ok=True)

model.eval()
with torch.no_grad():
    for index, row in train.iterrows():
        image_id = row['image_id']
        save_path = os.path.join(save_folder, f"{image_id}.pt")

        # Check if the tensor for this image_id already exists
        if os.path.exists(save_path):
            print(f"Tensor for image_id {image_id} already exists at {save_path}. Skipping computation.")
            continue

        folder_path = row['tile_path']
        label = row['label']
        dataset = ImageDataset(folder_path=folder_path, transform=train_transform)
        dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, num_workers=8)
        all_tensors = []
        for images in dataloader:
            images = images.to(device)
            output = model(images)
            all_tensors.append(output)
        concatenated_tensor = torch.cat(all_tensors, dim=0)

        # Save the concatenated tensor
        torch.save(concatenated_tensor, save_path)
        print(f"Saved tensor for image_id {image_id} at {save_path} with shape {concatenated_tensor.shape}")


Tensor for image_id 4 already exists at ../image_tensors/4.pt. Skipping computation.
Tensor for image_id 66 already exists at ../image_tensors/66.pt. Skipping computation.
Tensor for image_id 91 already exists at ../image_tensors/91.pt. Skipping computation.
Tensor for image_id 281 already exists at ../image_tensors/281.pt. Skipping computation.
Tensor for image_id 286 already exists at ../image_tensors/286.pt. Skipping computation.
Tensor for image_id 431 already exists at ../image_tensors/431.pt. Skipping computation.
Tensor for image_id 706 already exists at ../image_tensors/706.pt. Skipping computation.
Tensor for image_id 970 already exists at ../image_tensors/970.pt. Skipping computation.
Tensor for image_id 1020 already exists at ../image_tensors/1020.pt. Skipping computation.
Saved tensor for image_id 1080 at ../image_tensors/1080.pt with shape torch.Size([620, 768])
Saved tensor for image_id 1101 at ../image_tensors/1101.pt with shape torch.Size([454, 768])
Saved tensor for im

In [None]:
# import torch
# import os

# save_folder = '../image_tensors'
# image_id = 'your_image_id'  # replace with the actual image_id you want to load

# # Construct the path to the file
# load_path = os.path.join(save_folder, f"{image_id}.pt")

# # Check if the file exists
# if os.path.exists(load_path):
#     # Load the tensor from the file
#     loaded_tensor = torch.load(load_path)
#     print(f"Loaded tensor for image_id {image_id} from {load_path}")

#     # You can now use loaded_tensor as needed in your code
#     # For example, print its shape
#     print("Shape of the loaded tensor:", loaded_tensor.shape)
# else:
#     print(f"No saved tensor found for image_id {image_id} at {load_path}")
