In [1]:
import os
#os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [2]:
import os
from pathlib import Path

#import cv2

import pandas as pd
import numpy as np

#from sklearn.model_selection import train_test_split
#from pdb import set_trace as st

#import pretrainedmodels as pm

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms

#from albumentations import Compose, JpegCompression, CLAHE, RandomRotate90, Transpose, ShiftScaleRotate, \
#        Blur, OpticalDistortion, GridDistortion, HueSaturationValue, Flip, VerticalFlip

from kekas import Keker, DataOwner, DataKek
from kekas.transformations import Transformer, to_torch, normalize
from kekas.metrics import accuracy
from kekas.modules import Flatten, AdaptiveConcatPool2d
from kekas.callbacks import Callback, Callbacks, DebuggerCallback



In [3]:
import fastai_sparse # 3D 

In [4]:
import sparseconvnet as scn

from fastai_sparse import utils, visualize
from fastai_sparse.utils import log
#from fastai_sparse.data import DataSourceConfig, MeshesDataset, SparseDataBunch
#from fastai_sparse.learner import SparseModelConfig, Learner
#from fastai_sparse.callbacks import TimeLogger, SaveModelCallback, CSVLogger
from fastai_sparse.transforms import Transform, Compose

from data import merge_fn

# Experiment environment and system metrics

In [5]:
import neptune
from neptune_callbacks import NeptuneMonitor

ModuleNotFoundError: No module named 'neptune_callbacks'

In [5]:
params={'n_epoch': 256+128,
        'max_lr': 0.5,
        'wd':0.000003
        }
params

{'n_epoch': 384, 'max_lr': 0.5, 'wd': 3e-06}

In [None]:
with open('NEPTUNE_API_TOKEN.txt','r') as f:
    NEPTUNE_API_TOKEN = f.readline().splitlines()[0]
    
neptune.init(api_token=NEPTUNE_API_TOKEN,
             project_qualified_name='roma-goodok/fastai-sparse-scannet')



# create experiment in the project defined above
exp = neptune.create_experiment(params=params)
print(exp.id)
exp.append_tag('study')
exp.append_tag('kekas')
exp.append_tag('unet24')
exp.append_tag('1cycle')

In [6]:
try:
    experiment_name = exp.id
except Exception as e:
    experiment_name = "kekas"
experiment_name        

'kekas'

In [7]:
utils.watermark(pandas=True, kekas=True)

virtualenv:     (aseg_torch1) 
python:         3.6.8
nvidia driver:  b'384.130'
nvidia cuda:    9.0, V9.0.176
cudnn:          7.1.4
torch:          1.0.0
pandas:         0.24.2
kekas:          0.1.17
fastai:         1.0.48
fastai_sparse:  0.0.4.dev0


In [8]:
!git log1 -n3

[33ma24855f[m Updates
[33m87b37dc[m Initial commit
[33m87cb41a[m Initial commit


In [9]:
!nvidia-smi

Thu Apr 18 22:48:30 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 384.130                Driver Version: 384.130                   |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 108...  Off  | 00000000:28:00.0  On |                  N/A |
| 35%   56C    P0    69W / 250W |   1431MiB / 11163MiB |      4%      Default |
+-------------------------------+----------------------+----------------------+
|   1  GeForce GTX 108...  Off  | 00000000:29:00.0 Off |                  N/A |
|  0%   52C    P8    18W / 250W |     11MiB / 11172MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                            

In [10]:
!lscpu | grep "Model"

Model name:            AMD Ryzen 7 1700 Eight-Core Processor


## Jupyter notebook display options

In [11]:
utils.wide_notebook()
# uncomment this lines if you want switch off interactive and save visaulisation as screenshoots:
# For rendering run command in terminal:    `chromium-browser --remote-debugging-port=9222`
if  False:
    visualize.options.interactive = False
    visualize.options.save_images = True
    visualize.options.verbose = True
    visualize.options.filename_pattern_image = Path('images', experiment_name, 'fig_{fig_number}')

# Dataset creation

## Downloading

see how download and preprocess data by the following link from fastai_sparse library: https://github.com/goodok/fastai_sparse/tree/master/examples/scannet/data

In [12]:
SOURCE_DIR = Path('data', 'scannet_merged_ply')
assert SOURCE_DIR.exists(), "Run prepare_data.ipynb"

definition_of_spliting_dir = Path('data', 'ScanNet_Tasks_Benchmark')
assert definition_of_spliting_dir.exists()


os.listdir(SOURCE_DIR / 'scene0000_01')

['scene0000_01.merged.ply']

In [13]:
def find_files(path, ext='merged.ply'):
    pattern = str(path / '*' / ('*' + ext))
    fnames = glob.glob(pattern)
    return fnames

def get_df_list(verbose=0):
    # train /valid / test splits
    fn_lists = {}

    fn_lists['train'] = definition_of_spliting_dir / 'scannetv1_train.txt'
    fn_lists['valid'] = definition_of_spliting_dir / 'scannetv1_val.txt'
    fn_lists['test'] = definition_of_spliting_dir / 'scannetv1_test.txt'

    for datatype in ['train', 'valid', 'test']:
        assert fn_lists[datatype].exists(), datatype

    dfs = {}
    total = 0
    for datatype in ['train', 'valid', 'test']:
        df = pd.read_csv(fn_lists[datatype], header=None, names=['example_id'])
        df = df.assign(datatype=datatype)
        df = df.assign(subdir=df.example_id)
        df = df.sort_values('example_id')
        dfs[datatype] = df

        if verbose:
            print(f"{datatype:5} counts: {len(df):>4}")
        
        total += len(df)
    if verbose:
        print(f"total:     {total}")
    return dfs

In [14]:
df_list = get_df_list(verbose=1)

train counts: 1045
valid counts:  156
test  counts:  312
total:     1513


In [15]:
df_list['train'].head()

Unnamed: 0,example_id,datatype,subdir
827,scene0000_00,train,scene0000_00
828,scene0000_01,train,scene0000_01
829,scene0000_02,train,scene0000_02
496,scene0001_00,train,scene0001_00
497,scene0001_01,train,scene0001_01


In [16]:
os.listdir(os.path.join(SOURCE_DIR, 'scene0000_00'))

['scene0000_00.merged.ply']

# Augmentation

In [17]:
from fastai_sparse.data_items  import MeshItem, PointsItem
from fastai_sparse.learner import SparseModelConfig
import transforms as T

In [18]:
# at first we need to create a reader function that will define how image will be opened
def reader_fn(i, row):
    fn = SOURCE_DIR / row['subdir'] / f'{row["example_id"]}.merged.ply'
    m = MeshItem.from_file(fn, label_field='label')
    return m

In [19]:
m = reader_fn(0, df_list['train'].iloc[0])
m.describe()

MeshItem (scene0000_00.merged.ply)
vertices:                shape: (81369, 3)            dtype: float64        min:   -0.01657,  max:    8.74040,  mean:    3.19051
faces:                   shape: (153587, 3)           dtype: int64          min:          0,  max:      81368,  mean: 40549.68796
colors:                  shape: (81369, 4)            dtype: uint8          min:    1.00000,  max:  255.00000,  mean:  145.80430
labels:                  shape: (81369,)              dtype: uint16         min:    0.00000,  max:  230.00000,  mean:   12.97057
Colors from vertices
Labels from vertices


In [20]:
m.is_colors_from_vertices, m.is_labels_from_vertices

(True, True)

In [21]:
# Map relevant classes to {0,1,...,19}, and ignored classes to -100
remapper = np.ones(3000, dtype=np.int32) * (-100)
for i, x in enumerate([1,2,3,4,5,6,7,8,9,10,11,12,14,16,24,28,33,34,36,39]):
    remapper[x] = i

In [22]:
PRE_TFMS = [T.to_points_cloud(method='vertices', normals=False), 
            T.remap_labels(remapper=remapper, inplace=False),
            T.colors_normalize(),
            T.normalize_spatial(),
           ]

_scale = 20

AUGS_TRAIN = [
    T.noise_affine(amplitude=0.1),
    T.flip_x(p=0.5),
    T.scale(scale=_scale),
    T.rotate_XY(),
    
    T.elastic(gran=6 * _scale // 50, mag=40 * _scale / 50),
    T.elastic(gran=20 * _scale // 50, mag=160 * _scale / 50),
    
    T.specific_translate(full_scale=4096),
    T.crop_points(low=0, high=4096),
    T.colors_noise(amplitude=0.1),
]

AUGS_VALID = [
    T.noise_affine(amplitude=0.1),
    T.flip_x(p=0.5),
    T.scale(scale=_scale),
    T.rotate_XY(),

    T.translate(offset=4096 / 2),
    T.rand_translate(offset=(-2, 2, 3)),  # low, high, dimention
    
    T.specific_translate(full_scale=4096),
    T.crop_points(low=0, high=4096),
    T.colors_noise(amplitude=0.1),
        
    ]

SPARSE_TFMS = [
    T.merge_features(ones=False, colors=True, normals=False),
    T.to_sparse_voxels(),
]


# reimplement to_torch
def _to_torch(x):
    x.coords 
    x.features
    x.labels
    
    return x

# to_torch = Transform(_to_torch)



In [23]:
from data import merge_fn
merge_fn

functools.partial(<function SparseDataBunch.merge_fn at 0x7f3b939e4598>, keys_lists=['id', 'labels_raw', 'filtred_mask', 'random_seed', 'num_points'])

In [24]:
def get_transforms(dataset_key):
        
    return  Compose(PRE_TFMS + AUGS_TRAIN + SPARSE_TFMS), Compose(PRE_TFMS + AUGS_VALID + SPARSE_TFMS)

## DataKeks creation

In [25]:
train_df = df_list['train']
val_df = df_list['valid']

# now let's create DataKeks
train_tfms, val_tfms = get_transforms("mesh")

train_dk = DataKek(df=train_df, reader_fn=reader_fn, transforms=train_tfms)
val_dk = DataKek(df=val_df, reader_fn=reader_fn, transforms=val_tfms)

In [26]:
b = train_dk[0]
b.describe()
b.show()

id: scene0000_00.merged
coords                   shape: (81369, 3)            dtype: int64          min:        349,  max:       4031,  mean: 1669.08843
features                 shape: (81369, 3)            dtype: float32        min:   -1.09001,  max:    1.06099,  mean:   -0.19566
x                        shape: (81369,)              dtype: int64          min:        603,  max:        765,  mean:  679.72172
y                        shape: (81369,)              dtype: int64          min:       3863,  max:       4031,  mean: 3943.69753
z                        shape: (81369,)              dtype: int64          min:        349,  max:        447,  mean:  383.84605
labels                   shape: (81369,)              dtype: int64          min:       -100,  max:         17,  mean:  -48.51506
voxels: 55154
points / voxels: 1.4753055082133661


VBox(children=(Figure(camera=PerspectiveCamera(fov=46.0, position=(0.0, 0.0, 2.0), quaternion=(0.0, 0.0, 0.0, …

## DataLoaders

In [28]:
# and DataLoaders
batch_size = 4
workers = 4

train_dl = DataLoader(train_dk, batch_size=batch_size, num_workers=workers, shuffle=True, drop_last=True, collate_fn=merge_fn)
val_dl = DataLoader(val_dk, batch_size=batch_size, num_workers=workers, shuffle=False, collate_fn=merge_fn)

In [29]:
for i, batch in enumerate(train_dl):
    break

In [30]:
type(batch)

dict

In [31]:
batch

{'coords': tensor([[3861,  173, 3632,    0],
         [3861,  173, 3632,    0],
         [3860,  174, 3632,    0],
         ...,
         [2904, 1623,  485,    3],
         [2905, 1622,  484,    3],
         [2904, 1622,  484,    3]]),
 'features': tensor([[-0.4175, -0.4925, -0.2817],
         [-0.4331, -0.5239, -0.2895],
         [-0.4253, -0.5161, -0.2895],
         ...,
         [ 0.5567,  0.5725,  0.0537],
         [ 0.6116,  0.6195,  0.1086],
         [ 0.5881,  0.5725,  0.1008]]),
 'labels': tensor([-100, -100, -100,  ..., -100, -100, -100]),
 'id': ['scene0232_00.merged',
  'scene0468_02.merged',
  'scene0601_00.merged',
  'scene0279_02.merged'],
 'labels_raw': [array([-100, -100, -100, -100, ...,    6, -100, -100,    0], dtype=int32),
  array([-100, -100, -100, -100, ...,    0,    0,    0,    0], dtype=int32),
  array([-100, -100, -100, -100, ...,    0,    0,    0,    0], dtype=int32),
  array([-100, -100, -100, -100, ..., -100, -100, -100, -100], dtype=int32)],
 'filtred_mask'

In [32]:
len(batch['coords']), sum(batch['num_points'])

(694404, 694404)

# Model

In [33]:
# spatial_size  is full_scale
model_config = SparseModelConfig(spatial_size=4096, num_classes=20, num_input_features=3, mode=4,
                                 m=16, num_planes_coeffs=[1, 2, 3, 4, 5, 6, 7])
model_config

SparseModelConfig;
   spatial_size: 4096
   dimension: 3
   block_reps: 1
   m: 16
   num_planes: [16, 32, 48, 64, 80, 96, 112]
   residual_blocks: False
   num_classes: 20
   num_input_features: 3
   mode: 4
   downsample: [2, 2]
   bias: False

In [34]:
class Model(nn.Module):
    def __init__(self, cfg):
        nn.Module.__init__(self)
        self.sparseModel = scn.Sequential(
            scn.InputLayer(cfg.dimension, cfg.spatial_size, mode=cfg.mode),
            scn.SubmanifoldConvolution(cfg.dimension, nIn=cfg.num_input_features, nOut=cfg.m, filter_size=3, bias=cfg.bias),
            scn.UNet(cfg.dimension, cfg.block_reps, cfg.num_planes, residual_blocks=cfg.residual_blocks, downsample=cfg.downsample),
            scn.BatchNormReLU(cfg.m),
            scn.OutputLayer(cfg.dimension),
        )
        self.linear = nn.Linear(cfg.m, cfg.num_classes)

    def forward(self, xb):
        x = [xb['coords'], xb['features']]
        x = self.sparseModel(x)
        x = self.linear(x)
        return x

model = Model(model_config)

## Initialization

In [35]:
# the three whales of your pipelane are: the data, the model and the loss (hi, Jeremy)

# the data is represented in Kekas by DataOwner. It is a namedtuple with three fields:
# 'train_dl', 'val_dl', 'test_dl'
# For training process we will need at least two of them, and we can skip 'test_dl' for now
# so we will initialize it with `None` value.
dataowner = DataOwner(train_dl, val_dl, None)

# model is just a pytorch nn.Module, that we created vefore
#model = Net(num_classes=2)

# loss or criterion is also a pytorch nn.Module. For multiloss scenarios it can be a list of nn.Modules
# for our simple example let's use the standart cross entopy criterion
criterion = nn.CrossEntropyLoss()

In [36]:
# Also we need to specify, what model will do with each batch of data on each iteration
# We should define a `step_fn` function
# The code below repeats a `keker.default_step_fn` code to provide you with a concept of step function

def step_fn(model: torch.nn.Module,
            batch: torch.Tensor) -> torch.Tensor:
    """Determine what your model will do with your data.

    Args:
        model: the pytorch module to pass input in
        batch: the batch of data from the DataLoader

    Returns:
        The models forward pass results
    """
    
    # you could define here whatever logic you want
    inp = batch  # here we get an "image" from our dataset
    return model(inp)

In [37]:
# previous preparations was mostly out of scope of Kekas library (except DataKeks creation)
# Now let's dive into kekas a little bit

# firstly, we create a Keker - the core Kekas class, that provides all the keks for your pipeline
keker = Keker(model=model,
              dataowner=dataowner,
              criterion=criterion,
              step_fn=step_fn,                    # previosly defined step function
              target_key="labels",                 # remember, we defined it in the reader_fn for DataKek?              
              opt=torch.optim.Adam,               # optimizer class. if note specifiyng, 
                                                  # an SGD is using by default
              opt_params={"weight_decay": 1e-5})  # optimizer kwargs in dict format (optional too)

# Actually, there are a lot of params for kekers, but this out of scope of this example
# you can read about them in Keker's docstring (but who really reads the docs, huh?)

In [38]:
# before the start of the finetuning procedure let's freeeze all the layers except the last one - the head
# the `freeze` method is mostly inspired (or stolen) from fastai
# but you should define a model's attribute to deal with
# for example, our model is actually model.net, so we need to specify the 'net' attr
# also this method does not freezes batchnorm layers by default. To change this set `freeze_bn=True`

#keker.freeze(model_attr="net")

## Learning Rate Find

In [39]:
# let's find an 'optimal' learning rate with learning rate find procedure
# for details please see the fastai course and this articles:
# https://arxiv.org/abs/1803.09820
# https://sgugger.github.io/how-do-you-find-a-good-learning-rate.html

# NOTE: this is an optional step and you can skip it and use your favorite learning rate

# you MUST specify the logdir to see graphics
# keker will write a tensorboard logs into this folder
# to see them start a tensorboard with `--logdir /path/to/logdir`
# OR you can use keker.plot_kek_lr method (see cell below)
keker.kek_lr(final_lr=0.1, logdir="logdir")

Epoch 1/1: 100% 261/261 [03:22<00:00,  1.83it/s, loss=1.0766]
End of LRFinder



In [40]:
keker.plot_kek_lr(logdir="logdir")

In [42]:
# Ok, now let's start training!
# It's as simple as:
keker.kek(lr=1e-5, epochs=3)

Epoch 1/3: 100% 261/261 [03:38<00:00,  1.19it/s, loss=2.8771, val_loss=2.8088]
Epoch 2/3:   7% 17/261 [00:16<03:09,  1.28it/s, loss=2.8607]


Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.6/multiproce

KeyboardInterrupt: 

# from kekas Tutorial

## Dataframe creation and train/val split

In [41]:
# Let's create a pandas DataFrame to help us with data handling
root_dir = Path("PetImages/")  # path to Cats and Dogs dataset root directory

fpaths = []
labels = []
for d in root_dir.iterdir():
    for f in d.iterdir():
        img = cv2.imread(str(f))  # some files there are corrupted, so add only good ones
        if img is not None:
            labels.append(d.name)
            fpaths.append(str(f))

df = pd.DataFrame(data={"fpath": fpaths, "label": labels})
df.head()

FileNotFoundError: [Errno 2] No such file or directory: 'PetImages'

In [None]:
# split dataset to train and val parts
train_df, val_df = train_test_split(df, test_size=2000)
train_df.shape, val_df.shape

## Augmentations

In [None]:
# create train and val datasets using DataKek class - a pytorch Dataset that uses pandas DataFrame as data source

# at first we need to create a reader function that will define how image will be opened
def reader_fn(i, row):
    # it always gets i and row as parameters
    # where i is an index of dataframe and row is a dataframes row
    image = cv2.imread(row["fpath"])[:,:,::-1]  # BGR -> RGB
    if row["label"] == "Dog":
        label = 0
    else:
        label = 1
    return {"image": image, "label": label}


# Then we should create transformations/augmentations
# We will use awesome https://github.com/albu/albumentations library
def augs(p=0.5):
    return Compose([
        CLAHE(),
        RandomRotate90(),
        Transpose(),
        ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.50, rotate_limit=45, p=.75),
        Blur(blur_limit=3),
        OpticalDistortion(),
        GridDistortion(),
        HueSaturationValue()
    ], p=p)

def get_transforms(dataset_key, size, p):
    # we need to use a Transformer class to apply transformations to DataKeks elements
    # dataset_key is an image key in dict returned by reader_fn
    
    PRE_TFMS = Transformer(dataset_key, lambda x: cv2.resize(x, (size, size)))

    AUGS = Transformer(dataset_key, lambda x: augs()(image=x)["image"])

    NRM_TFMS = transforms.Compose([
        Transformer(dataset_key, to_torch()),
        Transformer(dataset_key, normalize())
    ])
    
    train_tfms = transforms.Compose([PRE_TFMS, AUGS, NRM_TFMS])
    val_tfms = transforms.Compose([PRE_TFMS, NRM_TFMS])  # because we don't want to augment val set yet
    
    return train_tfms, val_tfms

## DataKeks creation

In [None]:
# now let's create DataKeks
train_tfms, val_tfms = get_transforms("image", 224, 0.5)

train_dk = DataKek(df=train_df, reader_fn=reader_fn, transforms=train_tfms)
val_dk = DataKek(df=val_df, reader_fn=reader_fn, transforms=val_tfms)

## DataLoaders

In [None]:
# and DataLoaders
batch_size = 32
workers = 8

train_dl = DataLoader(train_dk, batch_size=batch_size, num_workers=workers, shuffle=True, drop_last=True)
val_dl = DataLoader(val_dk, batch_size=batch_size, num_workers=workers, shuffle=False)

# Model

In [None]:
# create a simple neural network using pretrainedmodels library
# https://github.com/Cadene/pretrained-models.pytorch

class Net(nn.Module):
    def __init__(
            self,
            num_classes: int,
            p: float = 0.5,
            pooling_size: int = 2,
            last_conv_size: int = 2048,
            arch: str = "se_resnext50_32x4d",
            pretrained: str = "imagenet") -> None:
        """A simple model to finetune.
        
        Args:
            num_classes: the number of target classes, the size of the last layer's output
            p: dropout probability
            pooling_size: the size of the result feature map after adaptive pooling layer
            last_conv_size: size of the flatten last backbone conv layer
            arch: the name of the architecture form pretrainedmodels
            pretrained: the mode for pretrained model from pretrainedmodels
        """
        super().__init__()
        net = pm.__dict__[arch](pretrained=pretrained)
        modules = list(net.children())[:-2]  # delete last layers: pooling and linear
        
        # add custom head
        modules += [nn.Sequential(
            # AdaptiveConcatPool2d is a concat of AdaptiveMaxPooling and AdaptiveAveragePooling 
            AdaptiveConcatPool2d(size=pooling_size),
            Flatten(),
            nn.BatchNorm1d(2 * pooling_size * pooling_size * last_conv_size),
            nn.Dropout(p),
            nn.Linear(2 * pooling_size * pooling_size * last_conv_size, num_classes)
        )]
        self.net = nn.Sequential(*modules)

    def forward(self, x):
        logits = self.net(x)
        return logits

# Keker

## Initialization

In [None]:
# the three whales of your pipelane are: the data, the model and the loss (hi, Jeremy)

# the data is represented in Kekas by DataOwner. It is a namedtuple with three fields:
# 'train_dl', 'val_dl', 'test_dl'
# For training process we will need at least two of them, and we can skip 'test_dl' for now
# so we will initialize it with `None` value.
dataowner = DataOwner(train_dl, val_dl, None)

# model is just a pytorch nn.Module, that we created vefore
model = Net(num_classes=2)

# loss or criterion is also a pytorch nn.Module. For multiloss scenarios it can be a list of nn.Modules
# for our simple example let's use the standart cross entopy criterion
criterion = nn.CrossEntropyLoss()

In [None]:
# Also we need to specify, what model will do with each batch of data on each iteration
# We should define a `step_fn` function
# The code below repeats a `keker.default_step_fn` code to provide you with a concept of step function

def step_fn(model: torch.nn.Module,
            batch: torch.Tensor) -> torch.Tensor:
    """Determine what your model will do with your data.

    Args:
        model: the pytorch module to pass input in
        batch: the batch of data from the DataLoader

    Returns:
        The models forward pass results
    """
    
    # you could define here whatever logic you want
    inp = batch["image"]  # here we get an "image" from our dataset
    return model(inp)

In [None]:
# previous preparations was mostly out of scope of Kekas library (except DataKeks creation)
# Now let's dive into kekas a little bit

# firstly, we create a Keker - the core Kekas class, that provides all the keks for your pipeline
keker = Keker(model=model,
              dataowner=dataowner,
              criterion=criterion,
              step_fn=step_fn,                    # previosly defined step function
              target_key="label",                 # remember, we defined it in the reader_fn for DataKek?
              metrics={"acc": accuracy},          # optional, you can not specify any metrics at all
              opt=torch.optim.Adam,               # optimizer class. if note specifiyng, 
                                                  # an SGD is using by default
              opt_params={"weight_decay": 1e-5})  # optimizer kwargs in dict format (optional too)

# Actually, there are a lot of params for kekers, but this out of scope of this example
# you can read about them in Keker's docstring (but who really reads the docs, huh?)

In [None]:
# before the start of the finetuning procedure let's freeeze all the layers except the last one - the head
# the `freeze` method is mostly inspired (or stolen) from fastai
# but you should define a model's attribute to deal with
# for example, our model is actually model.net, so we need to specify the 'net' attr
# also this method does not freezes batchnorm layers by default. To change this set `freeze_bn=True`
keker.freeze(model_attr="net")

## Learning Rate Find

In [None]:
# let's find an 'optimal' learning rate with learning rate find procedure
# for details please see the fastai course and this articles:
# https://arxiv.org/abs/1803.09820
# https://sgugger.github.io/how-do-you-find-a-good-learning-rate.html

# NOTE: this is an optional step and you can skip it and use your favorite learning rate

# you MUST specify the logdir to see graphics
# keker will write a tensorboard logs into this folder
# to see them start a tensorboard with `--logdir /path/to/logdir`
# OR you can use keker.plot_kek_lr method (see cell below)
keker.kek_lr(final_lr=0.1, logdir="/path/to/logdir")

## Plot Learning Rate find results

In [None]:
# Zoom in plot to see on which step the loss was still decreasing
# and choose LR from this step
keker.plot_kek_lr(logdir="/path/to/logdir")

## Simple Kek

In [None]:
# Ok, now let's start training!
# It's as simple as:
keker.kek(lr=1e-5, epochs=3)  

## Kek with different optimizer

In [None]:
# SomeKekasUser: Wait, and what if I want to train with the different optimizer?
#
# Me:
keker.kek(lr=1e-5, 
          epochs=1,
          opt=torch.optim.RMSprop,            # optimizer class
          opt_params={"weight_decay": 1e-5})  # optimizer kwargs in dict format (if you want)

# by default, the optimizer specified on Keker initialization is used

## Kek with scheduler

In [None]:
# SomeKekasUser: OK, and what if I want to use a pytorch scheduler?
#
# Me:
keker.kek(lr=1e-5,
          epochs=2,
          sched=torch.optim.lr_scheduler.StepLR,       # pytorch lr scheduler class
          sched_params={"step_size":1, "gamma": 0.9})  # schedulres kwargas in dict format

# by default, no scheduler is using

## Log your keks

In [None]:
# SomeKekasUser: How about logging?
#
# Me:
keker.kek(lr=1e-5,
          epochs=1,
          logdir="/mnt/hdd3_4/belskikh/keks/forplot")

# It will create a `train` and `val` subfolders in logdir, and will write tensorboard logs into them
# to see them start a tensorboard with `--logdir /path/to/logdir`
# OR you can use keker.plot_kek method! (see cell below)

## Plot your keks

In [None]:
# kekas uses plotly lib and tensorboard logs to plot inside NB
keker.plot_kek(logdir="/path/to/logdir",  # path to logdir with logs to plot
               step="batch",              # (optional) default is "step". another option is "epoch"
                                          # It determines discreteness of ploting
               metrics=["loss",           # (optional) list of metrics names
                        "acc",            # by default ["loss", "lr"] is using
                        "lr"],            # the order of the names determines the order of the plot
                                          # NOTE: names of metrics must match names in metrics dict
                                          # which was specified on Keker init step
               height=1200,               # (optional) height of the total plot 
               width=800)                 # (optional) width of the total plot

## Checkpoints saving

In [None]:
# SomeKekasUser: Also I want to save best checkpoints to later use them for SWA or ensembling!
#                And I want to measure them by custom metric, control their number, specify their name prefix,
#                and control what I need - minimize or maximize metric!
# Me: Here it is:
keker.kek(lr=1e-5,
          epochs=1,
          cp_saver_params={
              "savedir": "/path/to/save/dir",  # a directory for checkpoints
              "metric": "acc",  # (optional) from `metrics` dict on Keker init. 
                                # default is validation loss
              "n_best": 3,      # (optional) default is 3
              "prefix": "kek",  # (optional) default prefix is `checkpoint`
              "mode": "max"     # (optional) default is 'min'
          })   

# It will create a `savedir` directory, and will save best checkpoints there
# with naming `{prefix}.{epoch_num}.h5`. The best checkpoint will be dublicated with `{prefix}.best.h5` name
# look at the report down here

## Early stopping

In [None]:
# SomeKekasUser: Allright, and I don't want to train model, if validation loss doesn't improve for several epochs.
# 
# Me: You mean, early stopping? Here:
keker.kek(lr=1e-5,
          epochs=1, 
          early_stop_params={
              "patience": 3,   # number of bad epochs to wait before stopping
              "metric": "acc", # (optional) metric name from 'metric' dict. default is val loss
              "mode": "min",   # (optional) what you want from you metric, max or min? default is 'min'
              "min_delta": 0   # (optional) a minimum delta to count an epoch as 'bad'
          })

## Just do it

In [None]:
# SomeAdvancedKekasUser: I WANT IT ALL!
# 
# Me: Well, okay then...
keker.kek(lr=1e-5,
          epochs=5,
          opt=torch.optim.RMSprop,
          opt_params={"weight_decay": 1e-5},
          sched=torch.optim.lr_scheduler.StepLR,
          sched_params={"step_size":1, "gamma": 0.9},
          logdir="/path/to/logdir",
          cp_saver_params={
              "savedir": "/path/to/save/dir",  
              "metric": "acc",  
              "n_best": 3,      
              "prefix": "kek",  
              "mode": "max"},     
          early_stop_params={
              "patience": 3,   
              "metric": "acc", 
              "mode": "min",   
              "min_delta": 0
          })

## One Cycle Kek!

In [None]:
# SomeFastaiFan: Did you stole something else from fastai?
#
# Me: Yes! One Cycle Policy!
keker.kek_one_cycle(max_lr=1e-5,                  # the maximum learning rate
                    cycle_len=5,                  # number of epochs, actually, but not exactly
                    momentum_range=(0.95, 0.85),  # range of momentum changes
                    div_factor=25,                # max_lr / min_lr
                    increase_fraction=0.3)        # the part of cycle when learning rate increases

# If you don't understand these parameters, read this - https://sgugger.github.io/the-1cycle-policy.html
# NOTE: you cannot use schedulers and early stopping with one cycle!
# another options are the same as for `kek` method

## Other Keker features

### Freezing / unfreezing

In [None]:
# We've already talk about freezing. But what if I want to unfreeze?
# It has the same interface:
keker.unfreeze(model_attr="net")

# If you want to freeze till some layer:
layer_num = -2
keker.freeze_to(layer_num, model_attr="net")

### Saving / Loading

In [None]:
# saving
keker.save("/path/to/file")

# loading
keker.load("/path/to/file")

### Device and DataParallel

In [None]:
# Keker is using all avialable GPUs by default
# To limit it, use 'CUDA_VISIBLE_DEVICES' environment variable (available in os.environ dict)

# if you want to specify cuda device for your model, specify `device` parameter on Keker initialization

### Inference

In [None]:
# there are 4 (yes, four) ways to get a predictions with keker

# 1st
keker.predict(savepath="/path/to/save/dir")
# it will makes predicts on your 'test_dl' dataloader (remember, we initialized it with 'None'), if it specified,
# and saves models output in numpy.ndarray format to 'savepath'

# 2nd
loader = val_dl
keker.predict_loader(loader=loader, savepath="/path/to/save/dir")
# it will do the same as `predict()` but on any custom loader you want

# 3rd
tensor = torch.zeros(4, 224, 224, 3)
preds = keker.predict_tensor(tensor=tensor, to_numpy=False)
# it will return a predictions of the model in numpy format if `'to_numpy==True', else - torch.Tensor

# 4th
array = np.zeros((4, 224, 224, 3))
preds = keker.predict_array(array=array, to_numpy=False)
# it will do the same as `predict_tensor()` but with np.ndarra as input

### TTA

In [None]:
# I am sure that it is not very convinient way for test time augmentations,
# but here is how you can do it with Kekas

# first, specify several augmentations for TTA
flip_ = Flip(always_apply=True)
vertical_flip_ = VerticalFlip(always_apply=True)
transpose_ = Transpose(always_apply=True)

# second, create the whole augmentations with theese ones inside
def insert_aug(aug, dataset_key="image", size=224):    
    PRE_TFMS = Transformer(dataset_key, lambda x: cv2.resize(x, (size, size)))
    
    AUGS = Transformer(dataset_key, lambda x: aug(image=x)["image"])
    
    NRM_TFMS = transforms.Compose([
        Transformer(dataset_key, to_torch()),
        Transformer(dataset_key, normalize())
    ])
    
    tfm = transforms.Compose([PRE_TFMS, AUGS, NRM_TFMS])
    return tfm


flip = insert_aug(flip_)
vertical_flip = insert_aug(vertical_flip_)
transpose = insert_aug(transpose_)

tta_tfms = {"flip": flip, "v_flip": vertical_flip, "transpose": transpose}

# third, run TTA
keker.TTA(loader=val_dl,                # loader to predict on 
          tfms=tta_tfms,                # list or dict of always applying transforms
          savedir="/path/to/save/dir",  # savedir
          prefix="preds")               # (optional) name prefix. default is 'preds'

# it will saves predicts for each augmentation to savedir with name
#  - {prefix}_{name_from_dict}.npy if tfms is a dict
#  - {prefix}_{index}.npy          if tfms is a list

# Callbacks

## Adding callbacks

In [None]:
# Callbacks is the way in which Kekas customizes its pipeline
# each callback implements six methods, which names tell when it applies
# on_train_begin()
#     on_epoch_begin()
#         on_batch_begin()
#             >>>... step here ...<<<
#         on_batch_end()
#     on_epoch_end()
# on_train_end()

# Callbacks are widely using under the hood of Kekas
# For example - loss, opimizer, progressbar, lr scheduling, checkpoint saving, early stopping etc
# are realized as callbacks

# Callback has access to `state` attr of a keker. Here is a docs from Keker about state:

        # The state is an object that stores many variables and represents
        # the state of your train-val-repdict pipeline. _state passed to every
        # callback call.
        # You can use it as a container for your custom variables, but
        # DO NOT USE the following ones:
        #
        # loss, batch, model, dataowner, criterion, opt, parallel, checkpoint,
        # stop_iter, stop_epoch, stop_train, out, sched, mode, loader, pbar,
        # metrics, epoch_metrics

# You can write your own callback, or use something useful from kekas.callbacks

# Callbacks should be passes as a list at the Keker initiation
# For example, let's use a DebuggerCallback, that just insert a pdb.set_trace() call in pipeline
# For more info, please see a DebuggerCallback docs and source code
debugger = DebuggerCallback(when=["on_epoch_begin"], modes["train"])

keker = Keker(model=model, dataowner=dataowner, criterion=criterion, callbacks=[debugger])

# also there is a method to add a callbacks to existing Keker

keker.add_callbacks([debugger])

## Custom loss and opimizer callbacks

In [None]:
# As was said, loss and optimezer behavior is realiesed as Callbacks.
# If you use some tricky loss or optimizer logic, you can create your own Callback
# and specify it during Keker initialization

# here are the callbacks, that are using by default
class LossCallback(Callback):
    def __init__(self, target_key: str, preds_key: str) -> None:
        # target_key and preds_key are the parameters of Keker
        self.target_key = target_key
        self.preds_key = preds_key

    def on_batch_end(self, i: int, state: DotDict) -> None:
        target = state.batch[self.target_key]
        preds = state.out[self.preds_key]

        state.loss = state.criterion(preds, target)

class OptimizerCallback(Callback):
    def on_batch_end(self, i: int, state: DotDict) -> None:
        if state.mode == "train":
            state.opt.zero_grad()
            state.loss.backward()
            state.opt.step()
            
# and here is how you should specify them during Keker initialization
keker = Keker(model=model, 
              dataowner=dataowner,
              criterion=criterion,
              loss_cb=LossCallback,
              opt_cb=OptimizerCallback)

# Notes

I hope you now got an idea how to use Kekas.

I will be happy to get feedback about my library and this tutorial.

You can find me in [OpenDataScience](http://ods.ai) community by @belskikh nikname or create an issue on GitHub.

Have a good keks!