In [67]:
import numpy as np
import pandas as pd
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from skimage import io as image_io
from torch import optim
import timm
from tqdm import tqdm
# from CLAM.models.model_clam import CLAM_SB

In [50]:
df = pd.read_csv('/mnt/d/uczelnia/magister/TCGA_breast_patch/metadata.csv', index_col=0)
df['patient_id'] = df['file_name'].apply(lambda x: x.split('/')[6])
df.set_index(['patient_id', 'file_name'], inplace=True)

In [72]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,label
patient_id,file_name,Unnamed: 2_level_1
TCGA-3C-AALI-01A-01-T_R1,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_10.png,hr+her2+
TCGA-3C-AALI-01A-01-T_R1,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_11.png,hr+her2+
TCGA-3C-AALI-01A-01-T_R1,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_12.png,hr+her2+
TCGA-3C-AALI-01A-01-T_R1,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_13.png,hr+her2+
TCGA-3C-AALI-01A-01-T_R1,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_14.png,hr+her2+


In [115]:
def get_train_test_split(df: pd.DataFrame, group_sample_size: int=50, target_column: str="label", same_train_classes_count: bool = False):
    test_df = pd.DataFrame(columns=df.columns)
    for c in df[target_column].unique():
        selected_ids = np.random.choice([elem[0] for elem in df.index[df['label'] == 'hr+her2+']], group_sample_size, replace=False)
        
        test_df = pd.concat([df.loc[selected_ids], test_df.loc[:]])
    train_df = df.loc[~df.index.isin(test_df.index)]
    
    if same_train_classes_count:
        train_df2 = pd.DataFrame(columns=df.columns)
        max_sample = np.min(df[target_column].value_counts().iloc[0:3].to_numpy())
        for c in df[target_column].unique():
            train_df2 = pd.concat([df[df[target_column] == c].sample(max_sample, random_state=420).loc[:], train_df2.loc[:]])
        train_df = train_df2

    return train_df, test_df

train_df, test_df = get_train_test_split(df)

In [112]:
selected_ids = np.random.choice([elem[0] for elem in df.index[df['label'] == 'hr+her2+']], 50, replace=False)
df.loc[selected_ids]

Unnamed: 0_level_0,Unnamed: 1_level_0,label
patient_id,file_name,Unnamed: 2_level_1
TCGA-AO-A12G-01A-01-B_R2,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-AO-A12G-01A-01-B_R2/patch_10_10.png,hr+her2+
TCGA-AO-A12G-01A-01-B_R2,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-AO-A12G-01A-01-B_R2/patch_10_11.png,hr+her2+
TCGA-AO-A12G-01A-01-B_R2,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-AO-A12G-01A-01-B_R2/patch_10_12.png,hr+her2+
TCGA-AO-A12G-01A-01-B_R2,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-AO-A12G-01A-01-B_R2/patch_10_13.png,hr+her2+
TCGA-AO-A12G-01A-01-B_R2,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-AO-A12G-01A-01-B_R2/patch_10_14.png,hr+her2+
...,...,...
TCGA-BH-A18P-11A-01-T_R2,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-BH-A18P-11A-01-T_R2/patch_9_5.png,hr+her2+
TCGA-BH-A18P-11A-01-T_R2,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-BH-A18P-11A-01-T_R2/patch_9_6.png,hr+her2+
TCGA-BH-A18P-11A-01-T_R2,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-BH-A18P-11A-01-T_R2/patch_9_7.png,hr+her2+
TCGA-BH-A18P-11A-01-T_R2,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-BH-A18P-11A-01-T_R2/patch_9_8.png,hr+her2+


In [113]:
selected_ids = np.random.choice([elem[0] for elem in df.index[df['label'] == 'hr-']], 50, replace=False)
df.loc[selected_ids]

Unnamed: 0_level_0,Unnamed: 1_level_0,label
patient_id,file_name,Unnamed: 2_level_1
TCGA-BH-A0AV-01A-03-B_R2,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-BH-A0AV-01A-03-B_R2/patch_10_1.png,hr-
TCGA-BH-A0AV-01A-03-B_R2,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-BH-A0AV-01A-03-B_R2/patch_10_2.png,hr-
TCGA-BH-A0AV-01A-03-B_R2,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-BH-A0AV-01A-03-B_R2/patch_10_3.png,hr-
TCGA-BH-A0AV-01A-03-B_R2,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-BH-A0AV-01A-03-B_R2/patch_10_4.png,hr-
TCGA-BH-A0AV-01A-03-B_R2,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-BH-A0AV-01A-03-B_R2/patch_10_5.png,hr-
...,...,...
TCGA-A7-A0CE-01A-01-T_R1,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-A7-A0CE-01A-01-T_R1/patch_9_50.png,hr-
TCGA-A7-A0CE-01A-01-T_R1,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-A7-A0CE-01A-01-T_R1/patch_9_51.png,hr-
TCGA-A7-A0CE-01A-01-T_R1,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-A7-A0CE-01A-01-T_R1/patch_9_52.png,hr-
TCGA-A7-A0CE-01A-01-T_R1,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-A7-A0CE-01A-01-T_R1/patch_9_53.png,hr-


In [102]:
len(set([elem[0] for elem in df.index[df['label'] == 'hr-'].to_list()]))

532

In [116]:
data_transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((224, 224)),
        # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
class TCGABags(Dataset):
    def __init__(self, df, labels, bag_len=10, mean_bag_length=10, var_bag_length=1, num_bag=1000, seed=7, train=True, transform = data_transforms):
        self.transform = transform
        self.mean_bag_length = mean_bag_length
        self.var_bag_length = var_bag_length
        self.num_bag = num_bag
        self.seed = seed
        self.train = train
        self.df = df
        self.idx = df.index.get_level_values(0).unique()
        self.labels = labels
        self.bag_len = bag_len

        self.r = np.random.RandomState(seed)
        
    def _form_bags(self, df: pd.Series):
        bags_list = []
        labels_list = []

        for i in df.index.get_level_values(0).unique():
            df.loc[i].sample()
            for j in df.loc[i].index:
                img = image_io.imread(j)

        return bags_list, labels_list

    def __len__(self):
        return len(self.idx)

    def __getitem__(self, index):
        batch = self.df.loc[self.idx[index]].sample(self.bag_len, replace=True)
        label = self.labels.index(batch.iloc[0, 0])
        res = torch.empty((10, 3, 224, 224))
        for i, fname in enumerate(batch.index):
            img = image_io.imread(fname)
            if self.transform:
                res[i] = self.transform(img)
        perm = res
        return perm, label
    
dl = DataLoader(TCGABags(df, ['hr+her2-', 'hr-', 'hr+her2+']), batch_size=1)
img, label = next(iter(dl))
img.shape

torch.Size([1, 10, 3, 224, 224])

In [58]:
class TimmCNNEncoder(torch.nn.Module):
    def __init__(self, model_name: str = 'resnet50.tv_in1k', 
                 kwargs: dict = {'features_only': True, 'out_indices': (3,), 'pretrained': True, 'num_classes': 3}, 
                 pool: bool = True):
        super().__init__()
        assert kwargs.get('pretrained', False), 'only pretrained models are supported'
        self.model = timm.create_model(model_name, **kwargs)
        self.model_name = model_name
        if pool:
            self.pool = torch.nn.AdaptiveAvgPool2d(1)
        else:
            self.pool = None
    
    def forward(self, x):
        out = self.model(x)
        if isinstance(out, list):
            assert len(out) == 1
            out = out[0]
        if self.pool:
            out = self.pool(out).squeeze(-1).squeeze(-1)
        return out

In [63]:
class Attn_Net(nn.Module):

    def __init__(self, L = 1024, D = 256, dropout = False, n_classes = 1):
        super(Attn_Net, self).__init__()
        self.module = [
            nn.Linear(L, D),
            nn.Tanh()]

        if dropout:
            self.module.append(nn.Dropout(0.25))

        self.module.append(nn.Linear(D, n_classes))
        
        self.module = nn.Sequential(*self.module)
    
    def forward(self, x):
        return self.module(x), x # N x n_classes

"""
Attention Network with Sigmoid Gating (3 fc layers)
args:
    L: input feature dimension
    D: hidden layer dimension
    dropout: whether to use dropout (p = 0.25)
    n_classes: number of classes 
"""
class Attn_Net_Gated(nn.Module):
    def __init__(self, L = 1024, D = 256, dropout = False, n_classes = 1):
        super(Attn_Net_Gated, self).__init__()
        self.attention_a = [
            nn.Linear(L, D),
            nn.Tanh()]
        
        self.attention_b = [nn.Linear(L, D),
                            nn.Sigmoid()]
        if dropout:
            self.attention_a.append(nn.Dropout(0.25))
            self.attention_b.append(nn.Dropout(0.25))

        self.attention_a = nn.Sequential(*self.attention_a)
        self.attention_b = nn.Sequential(*self.attention_b)
        
        self.attention_c = nn.Linear(D, n_classes)

    def forward(self, x):
        a = self.attention_a(x)
        b = self.attention_b(x)
        A = a.mul(b)
        A = self.attention_c(A)  # N x n_classes
        return A, x

"""
args:
    gate: whether to use gated attention network
    size_arg: config for network size
    dropout: whether to use dropout
    k_sample: number of positive/neg patches to sample for instance-level training
    dropout: whether to use dropout (p = 0.25)
    n_classes: number of classes 
    instance_loss_fn: loss function to supervise instance-level training
    subtyping: whether it's a subtyping problem
"""
class CLAM_SB(nn.Module):
    def __init__(self, gate = True, size_arg = "small", dropout = 0., k_sample=3, n_classes=2,
        instance_loss_fn=nn.CrossEntropyLoss(), subtyping=True, embed_dim=1024):
        super().__init__()
        # self.size_dict = {"small": [embed_dim, 512, 256], "big": [embed_dim, 512, 384]}
        self.size_dict = {"small": [embed_dim, 224, 224], "big": [embed_dim, 512, 384]}
        size = self.size_dict[size_arg]
        fc = [nn.Linear(size[0], size[1]), nn.ReLU(), nn.Dropout(dropout)]
        if gate:
            attention_net = Attn_Net_Gated(L = size[1], D = size[2], dropout = dropout, n_classes = 1)
        else:
            attention_net = Attn_Net(L = size[1], D = size[2], dropout = dropout, n_classes = 1)
        fc.append(attention_net)
        self.attention_net = nn.Sequential(*fc)
        self.classifiers = nn.Linear(size[1], n_classes)
        instance_classifiers = [nn.Linear(size[1], 2) for i in range(n_classes)]
        self.instance_classifiers = nn.ModuleList(instance_classifiers)
        self.k_sample = k_sample
        self.instance_loss_fn = instance_loss_fn
        self.n_classes = n_classes
        self.subtyping = subtyping
    
    @staticmethod
    def create_positive_targets(length, device):
        return torch.full((length, ), 1, device=device).long()
    
    @staticmethod
    def create_negative_targets(length, device):
        return torch.full((length, ), 0, device=device).long()
    
    #instance-level evaluation for in-the-class attention branch
    def inst_eval(self, A, h, classifier): 
        device=h.device
        if len(A.shape) == 1:
            A = A.view(1, -1)
        top_p_ids = torch.topk(A, self.k_sample)[1][-1]
        top_p = torch.index_select(h, dim=0, index=top_p_ids)
        top_n_ids = torch.topk(-A, self.k_sample, dim=1)[1][-1]
        top_n = torch.index_select(h, dim=0, index=top_n_ids)
        p_targets = self.create_positive_targets(self.k_sample, device)
        n_targets = self.create_negative_targets(self.k_sample, device)

        all_targets = torch.cat([p_targets, n_targets], dim=0)
        all_instances = torch.cat([top_p, top_n], dim=0)
        logits = classifier(all_instances)
        all_preds = torch.topk(logits, 1, dim = 1)[1].squeeze(1)
        instance_loss = self.instance_loss_fn(logits, all_targets)
        return instance_loss, all_preds, all_targets
    
    #instance-level evaluation for out-of-the-class attention branch
    def inst_eval_out(self, A, h, classifier):
        device=h.device
        if len(A.shape) == 1:
            A = A.view(1, -1)
        top_p_ids = torch.topk(A, self.k_sample)[1][-1]
        # print(top_p_ids)
        top_p = torch.index_select(h, dim=0, index=top_p_ids)
        p_targets = self.create_negative_targets(self.k_sample, device)
        logits = classifier(top_p)
        p_preds = torch.topk(logits, 1, dim = 1)[1].squeeze(1)
        instance_loss = self.instance_loss_fn(logits, p_targets)
        return instance_loss, p_preds, p_targets

    def forward(self, h, label=None, instance_eval=False, return_features=False, attention_only=False):
        A, h = self.attention_net(h)  # NxK        
        A = torch.transpose(A, 1, 0)  # KxN
        if attention_only:
            return A
        A_raw = A
        A = F.softmax(A, dim=1)  # softmax over N

        if instance_eval:
            total_inst_loss = 0.0
            all_preds = []
            all_targets = []
            inst_labels = F.one_hot(label, num_classes=self.n_classes).squeeze() #binarize label
            for i in range(len(self.instance_classifiers)):
                inst_label = inst_labels[i].item()
                classifier = self.instance_classifiers[i]
                if inst_label == 1: #in-the-class:
                    instance_loss, preds, targets = self.inst_eval(A, h, classifier)
                    all_preds.extend(preds.cpu().numpy())
                    all_targets.extend(targets.cpu().numpy())
                else: #out-of-the-class
                    if self.subtyping:
                        instance_loss, preds, targets = self.inst_eval_out(A, h, classifier)
                        all_preds.extend(preds.cpu().numpy())
                        all_targets.extend(targets.cpu().numpy())
                    else:
                        continue
                total_inst_loss += instance_loss

            if self.subtyping:
                total_inst_loss /= len(self.instance_classifiers)
                
        M = torch.mm(A, h) 
        logits = self.classifiers(M)
        Y_hat = torch.topk(logits, 1, dim = 1)[1]
        Y_prob = F.softmax(logits, dim = 1)
        if instance_eval:
            results_dict = {'instance_loss': total_inst_loss, 'inst_labels': np.array(all_targets), 
            'inst_preds': np.array(all_preds)}
        else:
            results_dict = {}
        if return_features:
            results_dict.update({'features': M})
        return logits, Y_prob, Y_hat, A_raw, results_dict

In [118]:
list(set(train_df.index.levels[0]) & set(test_df.index.levels[0]))


['TCGA-A7-A13F-01A-01-T_R2',
 'TCGA-E9-A1N5-11A-03-T_R2',
 'TCGA-BH-A18P-11A-01-T_R2',
 'TCGA-AC-A62Y-01A-01-T_R4',
 'TCGA-A7-A0CJ-01A-02-T_R2',
 'TCGA-AO-A0JF-01A-01-B_R1',
 'TCGA-AN-A0AL-01A-01-B_R3',
 'TCGA-AO-A0JL-01A-01-T_R2',
 'TCGA-A1-A0SO-01A-02-T_R2',
 'TCGA-S3-AA0Z-01A-01-T_R2',
 'TCGA-A8-A09X-01A-01-B_R1',
 'TCGA-A2-A0T1-01A-02-T_R1',
 'TCGA-LL-A7T0-01A-03-T_R1',
 'TCGA-AC-A2FF-01A-01-T_R1',
 'TCGA-BH-A18J-11A-02-T_R1',
 'TCGA-A8-A09Q-01A-01-B_R2',
 'TCGA-A8-A07L-01A-01-B_R1',
 'TCGA-BH-A0BZ-01A-03-B_R1',
 'TCGA-BH-A0H7-01A-01-T_R2',
 'TCGA-AO-A0JC-01A-01-B_R1',
 'TCGA-BH-A0C3-01A-02-T_R1',
 'TCGA-AO-A12E-01A-01-B_R1',
 'TCGA-E2-A1BC-11A-03-T_R1',
 'TCGA-A7-A6VY-01A-01-T_R1',
 'TCGA-AN-A0XO-01A-01-T_R1',
 'TCGA-BH-A0E6-01A-01-T_R1',
 'TCGA-E9-A1NI-01A-01-T_R1',
 'TCGA-E2-A15P-01A-01-T_R1',
 'TCGA-A8-A096-01A-01-B_R1',
 'TCGA-AC-A62Y-01A-01-T_R3',
 'TCGA-D8-A1XS-01A-01-T_R1',
 'TCGA-A7-A4SA-11A-01-T_R2',
 'TCGA-A8-A07R-01A-01-B_R2',
 'TCGA-AQ-A1H3-01A-03-T_R1',
 'TCGA-E2-A15M

In [126]:
np.sum(np.array([1, 2, 3]) == np.array([1, 3, 3]))

2

In [1]:
train_dataset = TCGABags(train_df, ['hr+her2-', 'hr-', 'hr+her2+'])
test_dataset = TCGABags(test_df, ['hr+her2-', 'hr-', 'hr+her2+'])
train_dl = DataLoader(train_dataset, batch_size=1)
test_dl = DataLoader(test_dataset, batch_size=1)
instance_loss_fn = nn.CrossEntropyLoss()
global_loss_fn = nn.CrossEntropyLoss()
model = CLAM_SB(n_classes=3, instance_loss_fn=instance_loss_fn).to('cpu')
optimizer = optim.Adam(model.parameters(), lr=1e-3,
                       betas=(0.9, 0.999))

device = 'cuda:0'
bag_weight = 0.7

timm_model = TimmCNNEncoder().to(device)
model = model.to(device)
train_losses = []
valid_losses = []
valid_accs = []
for epoch in tqdm(range(15)):
    train_loss = 0.
    val_loss = 0.
    train_error = 0.
    train_inst_loss = 0.
    val_inst_loss = 0
    model.train()
    for i, (data, label) in enumerate(train_dl):
        data, label = data.to(device), label.to(device)
        logits, Y_prob, Y_hat, _, instance_dict = model(timm_model(data[0]), label=label, instance_eval=True)
        loss = global_loss_fn(logits, label)
        loss_value = loss.item()

        instance_loss = instance_dict['instance_loss']
        instance_loss_value = instance_loss.item()
        train_inst_loss += instance_loss_value
        
        total_loss = bag_weight * loss + (1-bag_weight) * instance_loss 

        inst_preds = instance_dict['inst_preds']
        inst_labels = instance_dict['inst_labels']

        train_loss += loss_value
        
        total_loss.backward()
        # step
        optimizer.step()
        optimizer.zero_grad()
    
    train_losses.append(train_loss/len(train_dl))
    actual_labels = []
    predicted_labels = []
    model.eval()
    with torch.inference_mode():
        for batch_idx, (data, label) in enumerate(test_dl):
            data, label = data.to(device), label.to(device)      
            logits, Y_prob, Y_hat, _, instance_dict = model(timm_model(data[0]), label=label, instance_eval=True)
            
            actual_labels.append(label.cpu()[0])
            predicted_labels.append(Y_hat.cpu()[0][0])
            loss = global_loss_fn(logits, label)

            val_loss += loss.item()

            instance_loss = instance_dict['instance_loss']
            
            instance_loss_value = instance_loss.item()
            val_inst_loss += instance_loss_value
        
        correct_sum = np.sum(np.array(predicted_labels) == np.array(actual_labels))
        valid_accs.append(correct_sum/len(predicted_labels))
        valid_losses.append(val_loss/len(test_dl))   

NameError: name 'TCGABags' is not defined

In [64]:
dl = DataLoader(TCGABags(df, ['hr+her2-', 'hr-', 'hr+her2+']), batch_size=1)
emb_model = Attention().to('cuda')
optimizer = optim.Adam(emb_model.parameters(), lr=1e-3,
                       betas=(0.9, 0.999))

emb_model.train()
train_loss = 0.
train_error = 0.
for i, (data, bag_label) in enumerate(dl):
    print(data.shape)
    bag_label = bag_label.to('cuda')
    p = data.permute(0, 1, 4, 2, 3).to('cuda')
    optimizer.zero_grad()
    # calculate loss and metrics
    loss, _ = emb_model.calculate_objective(p, bag_label)
    train_loss += loss.data[0]
    print(bag_label)
    error, _ = emb_model.calculate_classification_error(p, bag_label)
    train_error += error
    
    loss.backward()
        # step
    optimizer.step()
    # backward pass

torch.Size([1, 10, 3, 224, 224])


RuntimeError: Given groups=1, weight of size [20, 3, 5, 5], expected input[10, 224, 3, 224] to have 3 channels, but got 224 channels instead

In [None]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,label
patient_id,file_name,Unnamed: 2_level_1
TCGA-3C-AALI-01A-01-T_R1,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_10.png,hr+her2+
TCGA-3C-AALI-01A-01-T_R1,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_11.png,hr+her2+
TCGA-3C-AALI-01A-01-T_R1,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_12.png,hr+her2+
TCGA-3C-AALI-01A-01-T_R1,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_13.png,hr+her2+
TCGA-3C-AALI-01A-01-T_R1,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_14.png,hr+her2+


label    hr+her2+
Name: /mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_10.png, dtype: object
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_10.png
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_11.png
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_12.png
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_13.png
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_14.png
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_15.png
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_16.png
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_17.png
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_18.png
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_19.png
/mnt/d/uczelnia/magister/TCGA_breast_

KeyboardInterrupt: 

In [None]:
df.loc['TCGA-3C-AALI-01A-01-T_R1'].sample(10, random_state=10)#.loc['/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_10.png']

Unnamed: 0_level_0,label
file_name,Unnamed: 1_level_1
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_5_14.png,hr+her2+
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_13_15.png,hr+her2+
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_7_6.png,hr+her2+
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_5_18.png,hr+her2+
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_3_16.png,hr+her2+
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_8_11.png,hr+her2+
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_20.png,hr+her2+
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_11_17.png,hr+her2+
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_9_15.png,hr+her2+
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_13_13.png,hr+her2+


In [None]:
df.loc['TCGA-3C-AALI-01A-01-T_R2'].sample(10, random_state=10)

Unnamed: 0_level_0,label
file_name,Unnamed: 1_level_1
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R2/patch_5_11.png,hr+her2+
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R2/patch_9_6.png,hr+her2+
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R2/patch_12_7.png,hr+her2+
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R2/patch_7_15.png,hr+her2+
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R2/patch_9_17.png,hr+her2+
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R2/patch_4_15.png,hr+her2+
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R2/patch_6_17.png,hr+her2+
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R2/patch_3_12.png,hr+her2+
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R2/patch_5_15.png,hr+her2+
/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R2/patch_7_11.png,hr+her2+


In [None]:
df.index.get_level_values(0).unique()

Unnamed: 0_level_0,Unnamed: 1_level_0,label
patient_id,file_name,Unnamed: 2_level_1
TCGA-3C-AALI-01A-01-T_R1,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_10.png,hr+her2+
TCGA-3C-AALI-01A-01-T_R1,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_11.png,hr+her2+
TCGA-3C-AALI-01A-01-T_R1,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_12.png,hr+her2+
TCGA-3C-AALI-01A-01-T_R1,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_13.png,hr+her2+
TCGA-3C-AALI-01A-01-T_R1,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-3C-AALI-01A-01-T_R1/patch_10_14.png,hr+her2+
...,...,...
TCGA-S3-AA17-01A-01-T_R2,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-S3-AA17-01A-01-T_R2/patch_9_5.png,hr+her2-
TCGA-S3-AA17-01A-01-T_R2,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-S3-AA17-01A-01-T_R2/patch_9_6.png,hr+her2-
TCGA-S3-AA17-01A-01-T_R2,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-S3-AA17-01A-01-T_R2/patch_9_7.png,hr+her2-
TCGA-S3-AA17-01A-01-T_R2,/mnt/d/uczelnia/magister/TCGA_breast_patch/TCGA-S3-AA17-01A-01-T_R2/patch_9_8.png,hr+her2-
