# Neural network to detect horizonts

Welcome! It is about time to create our ML model to automatically detect horizonts

In [None]:
%env CUDA_VISIBLE_DEVICES=0

import os
import sys

import segyio

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import dill

sys.path.append('..')
from seismiqb.batchflow import Dataset, Pipeline, FilesIndex
from seismiqb.batchflow import B, V, C, L, F, D, P, R
from seismiqb.batchflow.models.tf import UNet, TFModel, DenseNet
from seismiqb.batchflow.models.tf.layers import conv_block
from seismiqb import SeismicCropBatch, SeismicGeometry, SeismicCubeset

from glob import glob
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
path_data_0 = '/notebooks/SEISMIC_DATA/CUBE_1/E_anon.hdf5'
path_data_1 = '/notebooks/SEISMIC_DATA/CUBE_3/P_cube.hdf5'
path_data_2 = '/notebooks/SEISMIC_DATA/CUBE_VUONGMK/Repaired_cube.hdf5'

dsi = FilesIndex(path=[path_data_0, path_data_1, path_data_2], no_ext=True)
ds = SeismicCubeset(dsi)

ds = (ds.load_geometries()
        .load_point_clouds(path = path_pc_saved)
        .load_labels()
        .load_samplers(p=[0.4, 0.2, 0.4])
      )

# ~80 seconds

### Define all of the constants at one place
**Note:** to use Dice-coefficient as loss function, we need to add axis to masks

In [None]:
# SHAPES
EPOCHS = 100
NUM_CROPS = 16
CROP_SHAPE = [2, 256, 256]                              # i, x, h
MODEL_SHAPE = CROP_SHAPE[-2:] + [CROP_SHAPE[0]]         # x, h, i
MODEL_SHAPE_DICE = MODEL_SHAPE + [1]

MODEL_SHAPE = tuple(MODEL_SHAPE)
MODEL_SHAPE_DICE = tuple(MODEL_SHAPE_DICE)

### Configuration

We use smaller version of [one hundred layer Tiramisu](https://arxiv.org/abs/1611.09326) as our base model.

As noted before, we add axis to the output of neural network in order to correctly compute Dice-coefficient.

In [None]:
def predictions(x):
    return tf.expand_dims(x, axis=-1, name='expand')

# DenseNet config
model_config_dense = {
                    'inputs': dict(cubes={'shape': MODEL_SHAPE},
                                   masks={'name': 'targets', 'shape': MODEL_SHAPE_DICE}), 
                    'initial_block/inputs': 'cubes',
                    'body': {'num_layers': [2]*3,
                             'block/growth_rate': 8},
                    'loss': 'dice',
                    'optimizer': 'Adam',
                    'predictions': predictions,
                    'output': 'sigmoid',
                    'common': {'data_format': 'channels_last'}
                     }

pipeline_config = {'model': DenseNetFC,
                   'model_config': model_config_dense}

### Pipeline of training:
* create positions to cut data from. Note that we use `truncate` method of sampler to sample only from first 80% of ilines
* load data from cubes, create segmentation masks
* some quality of life augmentations: rotate axis (so we can think of our crop as of image), scale to force cubes to have values in the same range
* start training!

**Note:** 

In [None]:
train_pipeline = (Pipeline(config=pipeline_config)
                  .load_component(src=[D('geometries'), D('labels')],
                                  dst=['geometries', 'labels'])
                  .crop(points=L(ds.sampler.truncate(high=0.8, expr=lambda p: p[:, 1]).sample, NUM_CROPS), shape=CROP_SHAPE)
                  .load_cubes(dst='data_crops')
                  .load_masks(dst='mask_crops')
                  .apply_transform(rotate_axis, src=['data_crops', 'mask_crops'], dst=['data_crops', 'mask_crops'])
                  .scale(mode='normalize', src='data_crops')
                  # Training
                  .apply_transform(add_axis, src='mask_crops', dst='mask_crops')
                  .init_variable('loss_history', init_on_each_run=list)
                  .init_variable('current_loss')
                  .init_model('dynamic', C('model'), 'Dense', C('model_config'))
                  .train_model('Dense', 
                               fetches='loss',
                               make_data={'cubes': B('data_crops'), 'masks': B('mask_crops')},
                               save_to=V('loss_history'), mode='a')) << ds

In [None]:
train_pipeline.run(3, n_epochs=EPOCHS, bar=True)

## Check how our model performs on train

In [None]:
plt.plot(train_pipeline.get_variable('loss_history'))
plt.xlabel("Iterations"), plt.ylabel("Loss")
plt.show()

## Check performance on unseen part of the cube

Due to slow changes in data along ilines in any given cube, it might be a good idea to test our model against completely new cube