In [1]:
# !pip uninstall lightning-bolts -y
# !pip install git+https://github.com/PytorchLightning/lightning-bolts.git@master --upgrade


In [2]:
import numpy as np
import glob
import pandas as pd
import matplotlib.pyplot as plt
import json
import os
import pytorch_lightning as pl
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.utils import make_grid
from torchvision.io import read_image, ImageReadMode
from PIL import Image
import pdb

In [3]:
from pytorch_lightning.callbacks import early_stopping, model_checkpoint, ProgressBar, LearningRateMonitor
from pl_bolts.models.self_supervised import Moco_v2, BYOL
from pl_bolts.transforms.dataset_normalizations import imagenet_normalization
from pl_bolts.datamodules import AsynchronousLoader
from pytorch_lightning.loggers import TensorBoardLogger

In [4]:
import warnings
warnings.filterwarnings('ignore')

In [5]:
# dataset_path="processed_data/"

# dfs= []

# for filepath in glob.glob(dataset_path+"/*/*.json"):
#     with open(filepath, "r") as file:
        
#         data = json.load(file)
#         df = pd.DataFrame(data["result"])
#         slide = os.path.basename(data["slide"]).split(".")[0]
#         df["local_class"] = df["class"] == "tumor"
#         df["global_class"] = slide.split("_")[0] == "tumor"
#         df["path"] = os.path.join(os.path.dirname(filepath), "patches") + "/" + df["class"] + "/" + df["x"].astype(str).str.zfill(6)+"_"+df["y"].astype(str).str.zfill(6)+".png"
#         df = df.drop(columns=["w", "h", "class"])
#         dfs.append(df)

# dataset = pd.concat(dfs)

# dataset_path="processed_data/"
# dataset.set_index(["path"])
# dataset.to_csv(dataset_path+"data.csv")

# dataset["path"].values

In [6]:
device = "cuda"
class Camelyon16PreprocessedDataset(torch.utils.data.Dataset):
    """
    Dataset of unlabelled patches
    """
    
    def __init__(self, data, transforms=None):
        self.data = data
        self.transforms = transforms
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        patch = self.data.iloc[idx]
        img = read_image(patch["path"], mode=ImageReadMode.RGB)/255
        label = patch["local_class"]
        
#         if self.transforms is not None:
#             img_t = img
#             img2_t = img
#         else:
#             img_t = img
#             img2_t = img
        
        return (img, img), label
    
    

In [7]:
# dataset = Camelyon16PreprocessedDataset("./processed_data/data.csv",
#                                        transforms=transforms.Compose([transforms.ToTensor()]))

# dataloader = torch.utils.data.DataLoader(dataset=dataset,
#                                          batch_size=1024,
#                                          shuffle=True,
#                                          prefetch_factor=3,
#                                          num_workers=8)

# for imgs, labels in dataloader:

#     plt.figure(figsize=(10, 10))
#     plt.title((1.0*labels).mean(), fontsize=18)
#     plt.imshow(make_grid(imgs, 32).numpy().transpose(1, 2, 0))
#     plt.show()

In [8]:
device = "cuda"
moco_transform = nn.Sequential(transforms.ColorJitter(0.8, 0.8, 0.8, 0.2),
                                   transforms.GaussianBlur(23, sigma=(0.1, 2.0)),
                                   transforms.RandomHorizontalFlip(),
                                   transforms.RandomVerticalFlip(),
                                   transforms.RandomResizedCrop(224),
                                   transforms.RandomGrayscale(),
                                   imagenet_normalization()).to(device).eval()
moco_transform_scripted = torch.jit.script(moco_transform).cuda()

In [9]:
  
class Camelyon16Preprocessed(pl.LightningDataModule):
    """"""
    
    def __init__(self, data_path="processed_data", valid_portion=0.2, warmup=1):
        super().__init__()
        self.data_path = data_path
        self.data = pd.read_csv(os.path.join(self.data_path, "data.csv"))
        idx = np.arange(len(self.data))
        np.random.shuffle(idx)
        idx = idx[:int(len(idx)*warmup)]
        self.train_idx = idx[int(len(idx)*valid_portion):]
        self.valid_idx = idx[:int(len(idx)*valid_portion)]
        

    def vale_dataloader(self):
    
        dataset = Camelyon16PreprocessedDataset(self.data.iloc[self.valid_idx],
                                       transforms=[transforms.ToTensor(),transforms.ToTensor()])
            
        dataloader = AsynchronousLoader(torch.utils.data.DataLoader(dataset=dataset,
                                         batch_size=128,
                                         prefetch_factor=4,
                                         num_workers=8,
                                         pin_memory=True,
                                         drop_last=True), device=device)
        
        return dataloader
    
    def train_dataloader(self):
        dataset = Camelyon16PreprocessedDataset(self.data.iloc[self.train_idx],
                                       transforms=[transforms.ToTensor(),transforms.ToTensor()])
                                        
        
        dataloader = AsynchronousLoader(torch.utils.data.DataLoader(dataset=dataset,
                                         batch_size=256,
                                         shuffle=True,
                                         prefetch_factor=8,
                                         num_workers=8,
                                         pin_memory=True,
                                         drop_last=True), device=device)
        return dataloader
    

In [10]:
class Mocov2_gpu_transform(Moco_v2):
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        zero_img = torch.FloatTensor(np.zeros((1, 3, 224, 224)))
        zero_queue = torch.FloatTensor(np.zeros((128, 8192)))
        self.example_input_array = (zero_img, zero_img, zero_queue)
    
#     def forward(self, img_q, img_k, queue):
#         breakpoint() 
#         return super().forward(img_q, img_k, queue)
    
    def training_step(self, batch, batch_idx):
        
        (img_q, img_k), label = batch
        
        with torch.no_grad():
            img_q = moco_transform(img_q)
            img_k = moco_transform(img_k)

        batch = (img_q, img_k), label

        return super().training_step(batch, batch_idx)
    
    def validation_step(self, batch, batch_idx):
        
        (img_q, img_k), label = batch
        
        with torch.no_grad():
            img_q = moco_transform(img_q)
            img_k = moco_transform(img_k)

        batch = (img_q, img_k), label
        
        return super().validation_step(batch, batch_idx)

    
class Byol_gpu_transform(BYOL):
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        zero_img = torch.FloatTensor(np.zeros((1, 3, 224, 224)))
        self.example_input_array = zero_img
    
#     def forward(self, img_q, img_k, queue):
#         breakpoint() 
#         return super().forward(img_q, img_k, queue)
    
    def training_step(self, batch, batch_idx):
        
        (img_q, img_k), label = batch
        
        with torch.no_grad():
            img_q = moco_transform(img_q)
            img_k = moco_transform(img_k)

        batch = (img_q, img_k), label

        return super().training_step(batch, batch_idx)
    
    def validation_step(self, batch, batch_idx):
        
        (img_q, img_k), label = batch
        
        with torch.no_grad():
            img_q = moco_transform(img_q)
            img_k = moco_transform(img_k)

        batch = (img_q, img_k), label
        
        return super().validation_step(batch, batch_idx)
    

In [11]:
checkpoint_callback = model_checkpoint.ModelCheckpoint(dirpath='models/encoder',
                                                       monitor="train_loss")

earlystop_callback = early_stopping.EarlyStopping(monitor="train_loss")

lrmonitor_callback = LearningRateMonitor(logging_interval='step')


datamodule = Camelyon16Preprocessed()


model = Mocov2_gpu_transform("resnet50",
                embd_dim=2048,
                #num_negatives=8192,
                use_mlp=True,
                batch_size=256,
                learning_rate=0.03)

# model = Byol_gpu_transform(num_classes=1,
#              input_height=224,
#              batch_size=128,
#              warmup_epochs=1,
#              max_epochs=20,
#              num_workers=8,
#              base_encoder="resnet50")

logger = TensorBoardLogger("lightning_logs", default_hp_metric=False, name="MOCO", log_graph=True)

trainer = pl.trainer.Trainer(gpus=1, callbacks=[checkpoint_callback],
                             max_epochs=10,resume_from_checkpoint="models/encoder/moco.ckpt",
                             precision=16,accumulate_grad_batches=8,
                             logger=logger)

Using native 16bit precision.
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [12]:
# test Debug if everything works
# debug_trainer = pl.trainer.Trainer(gpus=1,fast_dev_run=True,callbacks=[checkpoint_callback],
#                                  profiler="pytorch",
#                                  max_epochs=10,
#                                  precision=16)
# debug_trainer.fit(model,datamodule=datamodule)

In [None]:
trainer.fit(model,datamodule=datamodule)

Restoring states from the checkpoint file at models/encoder/moco.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Restored all states from the checkpoint file at models/encoder/moco.ckpt

  | Name      | Type   | Params | In sizes         | Out sizes
--------------------------------------------------------------------
0 | encoder_q | ResNet | 28.0 M | [1, 3, 224, 224] | [1, 128] 
1 | encoder_k | ResNet | 28.0 M | [1, 3, 224, 224] | [1, 128] 
--------------------------------------------------------------------
28.0 M    Trainable params
28.0 M    Non-trainable params
55.9 M    Total params
223.733   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: -1it [00:00, ?it/s]



In [None]:
model.encoder_q

In [None]:
feature = nn.Sequential(*list(model.encoder_q.children())[:-1]).cpu().eval()

In [None]:
torch.save(feature.state_dict(), "models/encoder/mocov3.pth")

In [None]:
torch.cuda.is_available()