In [11]:
import pandas as pd
from pathlib import Path
import os
import torch
from PIL import Image
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import pytorch_lightning as pl
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2 as cv
import math
import numpy as np

Image.MAX_IMAGE_PIXELS = None

In [12]:
root = Path('/home/cqwu/lw/UBC')
test_ann = root / "test.csv"
test_dir = root / "test_thumbnails"
tma_test_dir = root / "test_images"

In [13]:



def crop_image_ratio(image):
    height, width, _ = image.shape
    aspect_ratio = float(width) / height
    inverse_aspect_ratio = float(height) / width
    if aspect_ratio < 1.5 and inverse_aspect_ratio < 1.5:
        new_width = width // 2
        new_height = height // 2
        cropped_images = [
            image[:new_height, :new_width],
            image[:new_height, new_width:],
            image[new_height:, :new_width],
            image[new_height:, new_width:]
        ]
    elif aspect_ratio >= 1.5:
        num_crops = math.ceil(aspect_ratio)
        crop_width = width // num_crops
        cropped_images = []
        for i in range(num_crops):
            start_col = i * crop_width
            end_col = start_col + crop_width
            cropped_image = image[:, start_col:end_col]
            cropped_images.append(cropped_image)
    elif inverse_aspect_ratio >= 1.5:
        num_crops = math.ceil(inverse_aspect_ratio)
        crop_height = height // num_crops
        cropped_images = []
        for i in range(num_crops):
            start_row = i * crop_height
            end_row = start_row + crop_height
            cropped_image = image[start_row:end_row, :]
            cropped_images.append(cropped_image)

    return cropped_images


# def crop_image(image):
#     min_area = image.shape[0] * image.shape[1] * 0.07
#     gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
#     _, binary = cv.threshold(gray, 127, 255, cv.THRESH_BINARY)
#     contours, _ = cv.findContours(binary, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
#     del gray, binary
#     output_images = []
#     for contour in contours:
#         x, y, w, h = cv.boundingRect(contour)
#         area = w * h
#         if area >= min_area:
#             output_images.append(image[y:y + h, x:x + w].copy())
#     del image
#     return output_images

def crop_image(image, min_factor=0.1):
    cropped_images = []
    gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    _, binary = cv.threshold(gray, 127, 255, cv.THRESH_BINARY)
    horizontal_projection = np.sum(binary, axis=1)
    vertical_projection = np.sum(binary, axis=0)
    horizontal_index = np.nonzero(horizontal_projection)[0]
    vertical_index = np.nonzero(vertical_projection)[0]

    if horizontal_index.size != horizontal_projection.size or vertical_index.size != vertical_projection.size:
        y_ranges = find_discontinuous_ranges(horizontal_index)
        x_ranges = find_discontinuous_ranges(vertical_index)
        for y_range in y_ranges:
            if y_range[1] - y_range[0] + 1 >= min_factor * image.shape[0]:
                for x_range in x_ranges:
                    if x_range[1] - x_range[0] + 1 >= min_factor * image.shape[1]:
                        cropped_images.append(image[y_range[0]:y_range[1] + 1, x_range[0]:x_range[1] + 1].copy())
    else:
        cropped_images.append(image)
    return cropped_images


def find_discontinuous_ranges(lst):
    # input [1, 2, 3, 6, 7, 9, 10, 13, 14]
    # output [(1, 3), (6, 7), (9, 10), (13, 14)]
    diff = np.diff(lst)
    boundaries = np.where(diff != 1)[0] + 1
    start_values = np.insert(lst[boundaries], 0, lst[0])
    end_values = np.append(lst[boundaries - 1], lst[-1])
    ranges = list(zip(start_values, end_values))
    return ranges


def get_img_path(image_id):
    path = test_dir / f"{image_id}_thumbnail.png"
    if path.exists():
        return path
    else:
        return tma_test_dir / f"{image_id}.png"


def resize_short_edge(image, target_size=512):
    height, width = image.shape[:2]
    aspect_ratio = float(target_size) / min(height, width)
    new_height = int(round(height * aspect_ratio))
    new_width = int(round(width * aspect_ratio))
    resized_image = cv.resize(image, (new_width, new_height))
    return resized_image


class CancerThumbnailDataset(Dataset):

    def __init__(self, df_data, img_root_dir, transforms=None, split: float = 0.90):
        self.split = split
        self.img_root_dir = img_root_dir
        self.transforms = None
        self.data = df_data
        self.transforms = transforms

        self.imgs = [get_img_path(id) for id in self.data["image_id"]]

    def __getitem__(self, idx: int) -> tuple:
        img = cv.imread(str(self.imgs[idx]))
        # img = cv.resize(img, None, fx=1 / 3, fy=1 / 3)
        resize_short_edge(img, target_size=512)
        ims = crop_image(img)
        imms = []
        images = []
        for m in ims:
            imms += crop_image_ratio(m)

        for img in imms:
            img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
            if self.transforms:
                c_img = self.transforms(image=img)['image']
                images.append(c_img)

        return images

    def __len__(self) -> int:
        return len(self.data)



In [18]:
from torch import nn
import torchvision.models as models


class Net(nn.Module):
    def __init__(self, num_classes=5):
        super(Net, self).__init__()
        self.model = models.resnext50_32x4d()
        self.model.fc = nn.Linear(self.model.fc.in_features, 500)
        self.fc = nn.Linear(500, 100)
        self.dropout = nn.Dropout(p=0.2)
        self.fc1 = nn.Linear(100, num_classes)
        self.sf = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.model(x)
        x = self.dropout(x)
        x = self.fc(x)
        x = self.fc1(x)
        # x = self.sf(x)
        return x


In [20]:
from torchinfo import summary
mol=Net()
summary(mol, input_size=(1,3, 224, 224))

Layer (type:depth-idx)                        Output Shape              Param #
Net                                           [1, 5]                    --
├─ResNet: 1-1                                 [1, 500]                  --
│    └─Conv2d: 2-1                            [1, 64, 112, 112]         9,408
│    └─BatchNorm2d: 2-2                       [1, 64, 112, 112]         128
│    └─ReLU: 2-3                              [1, 64, 112, 112]         --
│    └─MaxPool2d: 2-4                         [1, 64, 56, 56]           --
│    └─Sequential: 2-5                        [1, 256, 56, 56]          --
│    │    └─Bottleneck: 3-1                   [1, 256, 56, 56]          63,488
│    │    └─Bottleneck: 3-2                   [1, 256, 56, 56]          71,168
│    │    └─Bottleneck: 3-3                   [1, 256, 56, 56]          71,168
│    └─Sequential: 2-6                        [1, 512, 28, 28]          --
│    │    └─Bottleneck: 3-4                   [1, 512, 28, 28]          349,184

In [5]:
bs = 1
num_workers = 4
df_test_data = pd.read_csv(test_ann)
test_transforms = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])
test_dataset = CancerThumbnailDataset(df_test_data, img_root_dir=test_dir, transforms=test_transforms)
test_dataloader = DataLoader(test_dataset, batch_size=bs, shuffle=False)

In [6]:
for imgs in test_dataloader:
    print(f'image size: {torch.cat(imgs).shape}')

image size: torch.Size([2, 3, 224, 224])
image size: torch.Size([4, 3, 224, 224])
image size: torch.Size([4, 3, 224, 224])
image size: torch.Size([4, 3, 224, 224])
image size: torch.Size([8, 3, 224, 224])
image size: torch.Size([4, 3, 224, 224])
image size: torch.Size([4, 3, 224, 224])
image size: torch.Size([12, 3, 224, 224])
image size: torch.Size([2, 3, 224, 224])


In [7]:

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = Net()
model = model.to(device)

model_path = r'/home/cqwu/lw/UBC/logs/ubc/version_10/checkpoints/best-model-epoch=142-val_acc=0.93.ckpt'
state = torch.load(model_path)
model.load_state_dict(state['state_dict'], strict=True)


<All keys matched successfully>

In [8]:
# @torch.no_grad()
# def predict(model, dl):
#     model.eval()
#     pp = []
#     for i, t in enumerate(dl):
#         p = model(t.to(device))
#         pp.append(p)
#         print("\r{}/{}".format(i + 1, len(dl) // bs), end="")
#     print()
#     result = torch.cat(pp)
#     conf, cls = result.topk(1, 1)
#     conf = conf.squeeze(1).cpu().numpy().tolist()
#     cls = cls.squeeze(1).cpu().numpy().tolist()
#     # 可选
#     cls = [cs if c > 0.6 else 5 for cs, c in zip(cls, conf)]
#     return cls
@torch.no_grad()
def predict(model, dl):
    model.eval()
    pp = []
    for i, t in enumerate(dl):
        p = model(torch.cat(t).to(device))
        sp = torch.sum(p, dim=0)
        pp.append(sp)
        print("\r{}/{}".format(i + 1, len(dl) // bs), end="")
    print()
    result = torch.stack(pp, dim=0)
    print(result.shape)
    conf, cls = result.topk(1, 1)
    conf = conf.squeeze(1).cpu().numpy().tolist()
    cls = cls.squeeze(1).cpu().numpy().tolist()
    # 可选
    cls = [cs if c > 0.6 else 5 for cs, c in zip(cls, conf)]
    return cls


res = predict(model, test_dataloader)
# 2 2 3 2 3 1 2 2 2
display(res)

9/9
torch.Size([9, 5])


[2, 2, 3, 2, 2, 1, 2, 2, 2]

In [9]:
labels = ['CC', 'EC', 'HGSC', 'LGSC', 'MC', 'Other']
label_list = list(map(lambda x: labels[x], res))
df_test_data['label'] = label_list
display(df_test_data)

Unnamed: 0,image_id,image_width,image_height,label
0,41,28469,16987,HGSC
1,4,23785,20008,HGSC
2,66,48871,48195,LGSC
3,91,3388,3388,HGSC
4,281,42309,15545,HGSC
5,286,37204,30020,EC
6,431,39991,40943,HGSC
7,706,75606,25965,HGSC
8,970,32131,18935,HGSC


In [10]:
df_test_data[["image_id", "label"]].to_csv("submission.csv", index=False)