In [31]:
import pandas as pd
from torch.utils.data import Dataset
from typing import Optional, List, TypeVar, Union
from pathlib import Path
from torch.utils.data import Dataset
import rasterio
from tqdm import tqdm
import numpy as np
import cv2
import warnings
from segmentation.config import Configs as CFG
from segmentation.scr.rle_coding import *
from segmentation.scr.tilling_loader import random_sub_df
import matplotlib.pyplot as plt


PandasDataFrame = TypeVar('pandas.core.frame.DataFrame')

In [2]:
df = pd.read_csv(CFG.path_df_kidney_1_til)

In [29]:
class Tilling_loader(Dataset):
    """Creating a dataloader for image tiling
    """

    def __init__(self,
                 name_data: str,
                 path_to_df: str,
                 use_random_sub: bool = False,
                 empty_tile_pct: int = 0,
                 sample_limit :Optional[int] = None,
                 transform=None

                 ):
        super().__init__()
        self.name_data = name_data
        self.path_to_df = Path(path_to_df)
        self.use_random_sub = use_random_sub
        self.empty_tile_pct = empty_tile_pct
        self.sample_limit = sample_limit
        self.transform = transform

        df = pd.read_csv(self.path_to_df)
        if self.use_random_sub:
            self.df = random_sub_df(df=df, sample_limit=self.sample_limit,
                                    empty_tile_pct=self.empty_tile_pct)
        else:
            self.df = df

    def __len__(self) -> int:
        return self.df.shape[0]

    def __getitem__(self, idx) -> tuple:
        img_path, lb_path, is_empty, bbx, px_stats, size = self.df.iloc[idx, :].values

        return img_path, lb_path, is_empty, bbx, px_stats, size


data_loader = Tilling_loader(
    name_data='kidney_1_tilling',
    path_to_df='data\\kidney_1_tilling.csv',
    use_random_sub=True,
    empty_tile_pct=10,
    sample_limit=20
)

20
Dataset contains 1528 empty and 12146 non-empty tiles.
Sample 2 empty and 18 non-empty tiles.


In [27]:
data_loader.df['is_empty'].value_counts(normalize=True)

is_empty
False    0.9
True     0.1
Name: proportion, dtype: float64

In [30]:
for i in range(len(data_loader)):
    x = data_loader[i]

In [28]:
data_loader.df

Unnamed: 0,path_img,path_lb,is_empty,bbx,px_stats,size
478,data\kidney_1_tilling\images\0079_0_791_512_51...,data\kidney_1_tilling\labels\0079_0_791_512_51...,True,"(0, 791, 512, 512)","[18597, 55056]","(1303, 912)"
847,data\kidney_1_tilling\images\0141_400_0_512_51...,data\kidney_1_tilling\labels\0141_400_0_512_51...,False,"(400, 0, 512, 512)","[19180, 48425]","(1303, 912)"
848,data\kidney_1_tilling\images\0141_0_395_512_51...,data\kidney_1_tilling\labels\0141_0_395_512_51...,False,"(0, 395, 512, 512)","[19180, 48425]","(1303, 912)"
1079,data\kidney_1_tilling\images\0179_400_791_512_...,data\kidney_1_tilling\labels\0179_400_791_512_...,True,"(400, 791, 512, 512)","[19040, 32293]","(1303, 912)"
2339,data\kidney_1_tilling\images\0389_400_791_512_...,data\kidney_1_tilling\labels\0389_400_791_512_...,False,"(400, 791, 512, 512)","[18557, 34849]","(1303, 912)"
2390,data\kidney_1_tilling\images\0398_0_395_512_51...,data\kidney_1_tilling\labels\0398_0_395_512_51...,False,"(0, 395, 512, 512)","[18595, 33713]","(1303, 912)"
2703,data\kidney_1_tilling\images\0450_400_395_512_...,data\kidney_1_tilling\labels\0450_400_395_512_...,False,"(400, 395, 512, 512)","[18478, 34685]","(1303, 912)"
3412,data\kidney_1_tilling\images\0568_0_791_512_51...,data\kidney_1_tilling\labels\0568_0_791_512_51...,False,"(0, 791, 512, 512)","[18166, 65440]","(1303, 912)"
3867,data\kidney_1_tilling\images\0644_400_395_512_...,data\kidney_1_tilling\labels\0644_400_395_512_...,False,"(400, 395, 512, 512)","[18649, 42467]","(1303, 912)"
4928,data\kidney_1_tilling\images\0821_0_395_512_51...,data\kidney_1_tilling\labels\0821_0_395_512_51...,False,"(0, 395, 512, 512)","[18772, 32847]","(1303, 912)"


In [32]:
len(data_loader)

13674

In [None]:
    name_data='kidney_1_tilling',
    strong_empty=False,
    path_img_dir=CFG.path_img_kidney1,
                         path_lb_dir=CFG.path_lb_kidney1,
                         cache_dir=CFG.cache_dir 

In [9]:
type(pd.DataFrame())

pandas.core.frame.DataFrame

In [16]:
new_df['is_empty'].value_counts(normalize=True)

is_empty
False    0.960006
True     0.039994
Name: proportion, dtype: float64

In [105]:
df_empty = df[df['is_empty'] == True].sample(1000)
df_no_empty = df[df['is_empty'] == False].sample(6000)
frames = [df_empty, df_no_empty]
pd.concat(frames).sort_index()

Unnamed: 0,path_img,path_lb,is_empty,bbx,px_stats,size
2,data\kidney_1_tilling\images\0000_0_395_512_51...,data\kidney_1_tilling\labels\0000_0_395_512_51...,True,"(0, 395, 512, 512)","[18515, 36640]","(1303, 912)"
4,data\kidney_1_tilling\images\0000_0_791_512_51...,data\kidney_1_tilling\labels\0000_0_791_512_51...,True,"(0, 791, 512, 512)","[18515, 36640]","(1303, 912)"
5,data\kidney_1_tilling\images\0000_400_791_512_...,data\kidney_1_tilling\labels\0000_400_791_512_...,True,"(400, 791, 512, 512)","[18515, 36640]","(1303, 912)"
6,data\kidney_1_tilling\images\0001_0_0_512_512.png,data\kidney_1_tilling\labels\0001_0_0_512_512.png,True,"(0, 0, 512, 512)","[18320, 37358]","(1303, 912)"
8,data\kidney_1_tilling\images\0001_0_395_512_51...,data\kidney_1_tilling\labels\0001_0_395_512_51...,True,"(0, 395, 512, 512)","[18320, 37358]","(1303, 912)"
...,...,...,...,...,...,...
13668,data\kidney_1_tilling\images\2278_0_0_512_512.png,data\kidney_1_tilling\labels\2278_0_0_512_512.png,True,"(0, 0, 512, 512)","[10069, 18379]","(1303, 912)"
13669,data\kidney_1_tilling\images\2278_400_0_512_51...,data\kidney_1_tilling\labels\2278_400_0_512_51...,True,"(400, 0, 512, 512)","[10069, 18379]","(1303, 912)"
13671,data\kidney_1_tilling\images\2278_400_395_512_...,data\kidney_1_tilling\labels\2278_400_395_512_...,True,"(400, 395, 512, 512)","[10069, 18379]","(1303, 912)"
13672,data\kidney_1_tilling\images\2278_0_791_512_51...,data\kidney_1_tilling\labels\2278_0_791_512_51...,True,"(0, 791, 512, 512)","[10069, 18379]","(1303, 912)"


In [89]:
800 / 7200

0.1111111111111111

In [82]:
1528/ (1528+3565)

0.30001963479285293

In [55]:
empty_tiles = df[df['is_empty'] == True].index.values

In [None]:
num_empty_tiles_to_sample 

In [None]:
np.random.choice(empty_tiles, min(num_empty_tiles_to_sample, len(empty_tiles)), replace=False)

In [54]:
df[df['is_empty'] == True].index.values

array([    0,     1,     2, ..., 13671, 13672, 13673], dtype=int64)

In [37]:
df = pd.read_csv(CFG.path_df_kidney_1_til)

In [39]:
df['is_empty'].value_counts(normalize=True)[False]

0.8882550826385842

In [16]:
num_tiles = df.shape[0]
num_tiles 

13674

In [18]:
empty_tile_pct = 20
num_empty_tiles_to_sample = int(num_tiles * empty_tile_pct / 100)
num_pos_tiles_to_sample = int(num_tiles * (1 - empty_tile_pct / 100))

In [19]:
num_empty_tiles_to_sample

2734

In [20]:
num_pos_tiles_to_sample

10939

In [21]:
1367 + 12306

13673

In [31]:
np.random.choice(10, min(5, 4), replace=False)

array([3, 2, 7, 6])

In [None]:
num_empty_tiles_to_sample = int(self.sample_limit * self.empty_tile_pct)
            num_pos_tiles_to_sample = int(self.sample_limit * (1 - self.empty_tile_pct))

            empty_idxs_to_sample = np.random.choice(len(empty_tiles), min(num_empty_tiles_to_sample, len(empty_tiles)), replace=False)
            pos_idxs_to_sample = np.random.choice(len(populated_tiles), min(num_pos_tiles_to_sample, len(populated_tiles)), replace=False)

            neg_samples = list(map(empty_tiles.__getitem__, empty_idxs_to_sample))
            pos_samples = list(map(populated_tiles.__getitem__, pos_idxs_to_sample))

            new_samples = pos_samples + neg_samples

            self.samples = new_samples
            if self.empty_tile_pct == 0.0:
                print(f'Dropped {empty} empty tiles.')
            print(f'Dataset contains {len(neg_samples)} empty and {len(pos_samples)} non-empty tile

In [8]:
df

Unnamed: 0,path_img,path_lb,is_empty,bbx,px_stats,size
0,data\kidney_1_tilling\images\0000_0_0_512_512.png,data\kidney_1_tilling\labels\0000_0_0_512_512.png,True,"(0, 0, 512, 512)","[18515, 36640]","(1303, 912)"
1,data\kidney_1_tilling\images\0000_400_0_512_51...,data\kidney_1_tilling\labels\0000_400_0_512_51...,True,"(400, 0, 512, 512)","[18515, 36640]","(1303, 912)"
2,data\kidney_1_tilling\images\0000_0_395_512_51...,data\kidney_1_tilling\labels\0000_0_395_512_51...,True,"(0, 395, 512, 512)","[18515, 36640]","(1303, 912)"
3,data\kidney_1_tilling\images\0000_400_395_512_...,data\kidney_1_tilling\labels\0000_400_395_512_...,True,"(400, 395, 512, 512)","[18515, 36640]","(1303, 912)"
4,data\kidney_1_tilling\images\0000_0_791_512_51...,data\kidney_1_tilling\labels\0000_0_791_512_51...,True,"(0, 791, 512, 512)","[18515, 36640]","(1303, 912)"
...,...,...,...,...,...,...
13669,data\kidney_1_tilling\images\2278_400_0_512_51...,data\kidney_1_tilling\labels\2278_400_0_512_51...,True,"(400, 0, 512, 512)","[10069, 18379]","(1303, 912)"
13670,data\kidney_1_tilling\images\2278_0_395_512_51...,data\kidney_1_tilling\labels\2278_0_395_512_51...,True,"(0, 395, 512, 512)","[10069, 18379]","(1303, 912)"
13671,data\kidney_1_tilling\images\2278_400_395_512_...,data\kidney_1_tilling\labels\2278_400_395_512_...,True,"(400, 395, 512, 512)","[10069, 18379]","(1303, 912)"
13672,data\kidney_1_tilling\images\2278_0_791_512_51...,data\kidney_1_tilling\labels\2278_0_791_512_51...,True,"(0, 791, 512, 512)","[10069, 18379]","(1303, 912)"
