In [1]:
import numpy as np
import pandas as pd
import os
import glob
import h5py
import wandb

import string
import random

import torch 
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F

import lightning as L
from lightning.pytorch.loggers import WandbLogger
from lightning.pytorch.callbacks import ModelCheckpoint


from torch import optim, utils, Tensor

import matplotlib.pyplot as plt
from sklearn.metrics import RocCurveDisplay, PrecisionRecallDisplay
import sklearn.linear_model


from itables import show
from src.model.SimpleMILModels import Attention, MaxMIL, AttentionResNet
from src.dataloaders.DataLoaders import RetCCLFeatureLoader, RetCCLFeatureLoaderMem


import zarr
import seaborn as sns

recompute_attn = False

In [2]:
def sigmoid_array(x):
    return 1 / (1 + np.exp(-x))

In [3]:


import os
os.environ['HTTP_PROXY']="http://www-int.dkfz-heidelberg.de:80"
os.environ['HTTPS_PROXY']="http://www-int.dkfz-heidelberg.de:80"


In [4]:
group = "BGUAJ0"

In [5]:

api = wandb.Api()


In [6]:
runs = api.runs(path="psmirnov/UKHD_RetCLL_299_CT", filters={"group": group})

In [7]:
runs

<Runs psmirnov/UKHD_RetCLL_299_CT>

Load in the features (not too much memory needed)

In [8]:
path_to_extracted_features = '/home/p163v/histopathology/TCGA/ffpe/299/'

In [9]:
slide_meta = pd.read_csv("../metadata/tcga_labeled_data.csv")


In [10]:
slide_meta


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,CT_Status,TCGA_Case,0,project,PatientID,slide_id,patches,labels
0,0,/omics/groups/OE0585/internal/p163v/TCGA_downl...,Chromothripsis,TCGA-DC-4745,TCGA-DC-4745-01Z-00-DX1.f5aa77b0-aa9b-4f0c-a00...,READ,TCGA-DC-4745,TCGA-DC-4745-01Z-00-DX1,3779,1
1,2,/omics/groups/OE0585/internal/p163v/TCGA_downl...,No Chromothripsis,TCGA-02-0003,TCGA-02-0003-01Z-00-DX1.6171b175-0972-4e84-999...,GBM,TCGA-02-0003,TCGA-02-0003-01Z-00-DX1,11146,0
2,3,/omics/groups/OE0585/internal/p163v/TCGA_downl...,No Chromothripsis,TCGA-02-0003,TCGA-02-0003-01Z-00-DX2.c7652d8d-d78f-49ae-825...,GBM,TCGA-02-0003,TCGA-02-0003-01Z-00-DX2,8501,0
3,4,/omics/groups/OE0585/internal/p163v/TCGA_downl...,No Chromothripsis,TCGA-02-0003,TCGA-02-0003-01Z-00-DX3.995C2924-E298-4517-82A...,GBM,TCGA-02-0003,TCGA-02-0003-01Z-00-DX3,2998,0
4,6,/omics/groups/OE0585/internal/p163v/TCGA_downl...,Chromothripsis,TCGA-CH-5767,TCGA-CH-5767-01Z-00-DX1.5a8d8c09-2837-4981-9c4...,PRAD,TCGA-CH-5767,TCGA-CH-5767-01Z-00-DX1,998,1
...,...,...,...,...,...,...,...,...,...,...
3155,12587,/omics/groups/OE0585/internal/p163v/TCGA_downl...,Chromothripsis,TCGA-66-2769,TCGA-66-2769-01Z-00-DX1.101b6493-a7b3-42cf-ae2...,LUSC,TCGA-66-2769,TCGA-66-2769-01Z-00-DX1,1796,1
3156,12594,/omics/groups/OE0585/internal/p163v/TCGA_downl...,No Chromothripsis,TCGA-D1-A17S,TCGA-D1-A17S-01Z-00-DX1.8EBE62E2-B0D1-480D-8A1...,UCEC,TCGA-D1-A17S,TCGA-D1-A17S-01Z-00-DX1,8390,0
3157,12607,/omics/groups/OE0585/internal/p163v/TCGA_downl...,No Chromothripsis,TCGA-BB-4217,TCGA-BB-4217-01Z-00-DX1.E351C8DB-F6CB-4327-9FE...,HNSC,TCGA-BB-4217,TCGA-BB-4217-01Z-00-DX1,6599,0
3158,12614,/omics/groups/OE0585/internal/p163v/TCGA_downl...,No Chromothripsis,TCGA-QR-A70E,TCGA-QR-A70E-01Z-00-DX1.1BF4CFF5-C43B-4145-B5A...,PCPG,TCGA-QR-A70E,TCGA-QR-A70E-01Z-00-DX1,4592,0


In [23]:



slide_meta = pd.read_csv("../metadata/tcga_labeled_data.csv")

# 50 random slides


gbm_slides = slide_meta[slide_meta.project=="GBM"].slide_id

test_labels = slide_meta[slide_meta.project=="GBM"].labels

#all_files = [x for x in slide_annots.file if os.path.isfile(path_to_extracted_features + "/" + x)]
#    all_features = {file: h5py.File(path_to_extracted_features + "/" + file, 'r')['feats'][:] for file in all_files}

In [24]:
file_exists = np.array([os.path.exists(path_to_extracted_features + "/" + file + ".h5") for file in gbm_slides])
gbm_slides = gbm_slides[file_exists]
test_labels = test_labels[file_exists]

In [25]:
test_labels.mean()

0.7183908045977011

In [26]:
test_features = [h5py.File(path_to_extracted_features + "/" + file + ".h5", 'r')['feats'][:] for file in gbm_slides]

# Loss

We use the loss as the early stopping criteria


In [30]:
model_list = list()
attention_list = list()
cv =  lambda x: np.std(x) / np.mean(x)

In [31]:
test_data = RetCCLFeatureLoaderMem(test_features, np.array(test_labels), patches_per_iter='all')

RetCCLTest = DataLoader(test_data, batch_size=1, num_workers=1)#, sampler=valid_Sampler)


In [32]:
for ii in range(len(runs)):
    
    arts = runs[ii].logged_artifacts()
    arts_dict = {a.name.removesuffix(':'+a.version).split('-')[0]: a for a in arts}
    checkpoint_folder_name = arts_dict['model'].name.split('-')[1].removesuffix(':'+arts_dict['model'].version)
    args = runs[0].config

    model = Attention(2048, lr=args['lr'], weight_decay=args['weight_decay'], hidden_dim=args['hidden_dim'], attention_dim=args['attention_dim'], class_weights=torch.tensor(float(args['class_weights'])))
    chkpt_file = glob.glob('lightning_logs/'+checkpoint_folder_name+'/checkpoints/best_loss*')[0]
    model = model.load_from_checkpoint(chkpt_file, map_location=torch.device('cpu'))
    model.eval()
    model_list.append(model)
    model_attention = [model.attention_forward(torch.tensor(x).to(model.device)).detach().cpu().numpy() for x,y in iter(RetCCLTest)]

    attention_list.append(model_attention)
    

In [21]:
len(attention_list)

5

In [20]:
attention_combined = [np.hstack(x) for x in zip(*attention_list)]


In [27]:
np.mean(attention_combined[0], axis=1).shape



(1, 11146)

In [30]:
for jj in range(len(np.array(gbm_slides))):
        slidename = gbm_slides.iloc[jj]
        print('Writing Attention Map ' + slidename)
        coords = h5py.File(path_to_extracted_features + "/" + slidename + ".h5", 'r')['coords'][:]
        outarray_root = zarr.open("/home/p163v/histopathology/attention_maps/tcga/"+ group + "/"+ slidename + "_per_tile_attention.zarr", mode='w') 
        outarray_root['coords'] = coords
        outarray_root['attn'] = np.mean(attention_combined[jj], axis=1)[:]


Writing Attention Map TCGA-02-0003-01Z-00-DX1
Writing Attention Map TCGA-02-0003-01Z-00-DX2
Writing Attention Map TCGA-02-0003-01Z-00-DX3
Writing Attention Map TCGA-06-0129-01Z-00-DX1
Writing Attention Map TCGA-06-0129-01Z-00-DX2
Writing Attention Map TCGA-06-0129-01Z-00-DX3
Writing Attention Map TCGA-19-0957-01Z-00-DX1
Writing Attention Map TCGA-06-0143-01Z-00-DX1
Writing Attention Map TCGA-06-0143-01Z-00-DX2
Writing Attention Map TCGA-06-0143-01Z-00-DX3
Writing Attention Map TCGA-4W-AA9R-01Z-00-DX1
Writing Attention Map TCGA-06-0138-01Z-00-DX1
Writing Attention Map TCGA-06-0138-01Z-00-DX2
Writing Attention Map TCGA-06-0138-01Z-00-DX3
Writing Attention Map TCGA-06-0138-01Z-00-DX4
Writing Attention Map TCGA-06-5858-01Z-00-DX1
Writing Attention Map TCGA-06-0646-01Z-00-DX1
Writing Attention Map TCGA-06-0646-01Z-00-DX2
Writing Attention Map TCGA-06-0646-01Z-00-DX3
Writing Attention Map TCGA-41-3915-01Z-00-DX1
Writing Attention Map TCGA-19-2624-01Z-00-DX1
Writing Attention Map TCGA-26-5139