In [1]:
import os
import sys
import numpy as np
import pandas as pd
import glob
import matplotlib.pyplot as plt
import shutil

sys.path.insert(0, os.path.join("..", "..", ".."))

from petroflow import Well, WellBatch, WS, WellDataset
from petroflow.batchflow.models.torch import UNet
from petroflow.batchflow import Dataset, DatasetIndex, FilesIndex, Pipeline, V, B, action, inbatch_parallel, I, W, F, L, ImagesBatch, R, P

In [2]:
BATCH_SIZE = 16
N_CROPS = 4
N_EPOCH = 500
LENGTH = 0.1
SHAPE = (3, int(2500 * LENGTH), 250)

In [3]:
import torch
from torch import nn
import torch.nn.functional as F

class MyWellBatch(WellBatch):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, pixels_per_cm=25, **kwargs)
    
    @action
    def create_images_batch(self, core_dl, core_uv, targets):
        index = DatasetIndex(len(core_dl))
        batch = ImagesBatch(index)
        batch = batch.add_components(('core_dl', 'core_uv', 'targets'),
                                     (np.array(core_dl).astype(np.uint8),
                                      np.array(core_uv).astype(np.uint8),
                                      np.array(targets)))
        return batch

In [4]:
index = FilesIndex(path='/notebooks/data/september_dataset/core_photo/*/*', dirs=True)
ds = Dataset(index=index, batch_class=MyWellBatch)

filter_ppl = (ds.p
              .init_variable('wells', default=[])
              .has_attr('core_lithology')
              .update(V('wells', mode='e'), B().indices)
              .run(10, n_epochs=1, shuffle=False, bar=True))

filtered_index = index.create_subset(filter_ppl.v('wells'))
ds = Dataset(index=filtered_index, batch_class=MyWellBatch)
ds.split()

 88%|████████▊ | 30/34 [00:00<00:00, 75.46it/s]


In [5]:
classes_ppl = (ds.p
       .init_variable('classes', default=[])
       .update(V('classes', mode='a'), WS('core_lithology')['FORMATION'].values.ravel())
)

(classes_ppl.after
    .add_namespace(np)
    .concatenate(L(sum)(V('classes'), []), save_to=V('classes', mode='w'))
    .unique(V('classes'), save_to=V('classes'))
)

<petroflow.batchflow.batchflow.once_pipeline.OncePipeline at 0x7fca49af3588>

In [6]:
classes_ppl.run(BATCH_SIZE)

<petroflow.batchflow.batchflow.pipeline.Pipeline at 0x7fca49af3ba8>

In [7]:
reverse_mapping = dict(enumerate(classes_ppl.v('classes')))
mapping = {value: key for key, value in reverse_mapping.items()}

In [8]:
crop_template = (Pipeline()
       .add_namespace(np)
       .create_segments(src='samples', connected=True)
       .create_segments(src='core_lithology', connected=True)
       .random_crop(length=LENGTH, n_crops=N_CROPS)
       .create_mask(src='core_lithology', column='FORMATION', mapping=mapping, mode='core')
       .update(B('core'), WS('core_dl').ravel())
       .update(B('masks'), WS('mask').ravel())
       .array(B('core'), save_to=B('core'))
       .array(B('masks'), save_to=B('masks'))
       .transpose(B('core'), axes=(0, 3, 1, 2), save_to=B('core'))
)

In [9]:
# batch = ppl.next_batch(BATCH_SIZE)

In [10]:
# def add_lithology_position(well, segment=0):
#     segment = well.iter_level()[segment]
#     core_lithology = segment.core_lithology
#     image = segment.core_dl
#     factor = image.shape[0] / segment.length
#     positions = []
#     for (depth_from, depth_to), formation in core_lithology.iterrows():
#         positions.append(
#             (max(0, depth_from - segment.depth_from) * factor,
#              min(segment.length, depth_to - segment.depth_from) * factor)
#         )
#     return positions

In [11]:
# for well in batch.wells:
#     for i, segment in enumerate(well.iter_level()):
#         if len(segment.core_lithology) > 1:
#             plt.figure(figsize=(4, 20))
#             print(segment.core_lithology)
#             img = segment.core_dl / 255
#             mask = segment.mask
#             lithology = segment.core_lithology
#             plt.imshow(img)
#             for a, b in add_lithology_position(well, segment=i):
#                 plt.hlines(a, 0, img.shape[1], colors='r')
#                 plt.hlines(b, 0, img.shape[1], colors='r')
#             plt.show()
#             break

In [12]:
model_config = {'initial_block/inputs': 'images',
                'inputs/images/shape': SHAPE,
                'inputs/masks/shape': (1, 1, SHAPE[1]),
                'body/filters': ((2 ** np.arange(4)) * 4).tolist(),
                'head': {
                    'num_classes': len(mapping),
                    'layout': 'c',
                    'kernel_size': (1, SHAPE[2]),
                    'padding': 'valid'
                },
                'optimizer': 'Adam',
                'device': 'gpu:7',
                'output': 'proba',
                'loss': 'ce'
}
        
train_template = (Pipeline()
    .init_variable('loss_history', default=[])
    .init_model('dynamic', UNet, 'model', model_config)
    .train_model('model', B('core'), B('masks').reshape(-1, SHAPE[1], 1), fetches=['loss', 'proba'],
                 save_to=[V('loss_history', mode='a'), B('proba')])
    
)

train_ppl = (crop_template + train_template) << ds.train

In [13]:
train_ppl.run(BATCH_SIZE, n_epochs=N_EPOCHS, bar=True, bar_desc=W(V('loss_history')[-1]))

NameError: name 'N_EPOCHS' is not defined

In [None]:
train_ppl.v('loss_history')

In [None]:
plt.plot(batch.proba[0].argmax(axis=0))
plt.plot(batch.masks[0])