In [1]:
import os
import sys
import numpy as np
import pandas as pd
import pickle
import collections
import matplotlib.pyplot as plt
import torch
import time
import collections
from functools import reduce

sys.path.insert(0, os.path.join("..", "..", ".."))

from petroflow import CoreBatch, CoreIndex
from petroflow.batchflow.models.torch import ResNet18
from petroflow.batchflow import Dataset, Pipeline, V, B, W, F, C
from petroflow.batchflow.research import Research, Option, KV

import utils

In [2]:
PATH = '/notebooks/data/august_dataset/cropped_wells'

## Pipelines

In [3]:
BATCH_SIZE = 8
N_EPOCH = 50
SHAPE = (3, 500, 250)

model_config = {'initial_block/inputs': 'images',
                'inputs/images/shape': SHAPE,
                'inputs/labels/classes': F(lambda x: len(x.pipeline.config['labels_mapping'])),
                'initial_block/inputs': 'images',
                'optimizer': 'Adam',
                'output': 'proba',
                'device': C('device'),
                'loss': 'ce'#, dict(weight=V('weight')))}
               }

train_ppl = (Pipeline()
    .add_namespace(utils)
    .set_dataset(C('dataset'))
    .load(uv=False, dst=['dl'])
    .normalize(src='dl', dst='dl')
    .add_namespace(np)
    .create_labels(C('annotation').LITHOLOGY.loc)
    .update(B('labels'), B('labels').tolist())
    .to_array(src='dl', dst='images', dtype='float32')
    .make_random_crops(src=['images'], dst=['images'], channels='last', shape=SHAPE[1:], n_crops=1)
    .concatenate(B('images'), axis=0, save_to=B('images'))
    .init_variable('loss', default=None)
    .transpose(B('images'), axes=(0, 3, 1, 2), save_to=B('images'))
    .encode(B('labels'), C('labels_mapping'), save_to=B('labels'))
    .init_model('dynamic', ResNet18, 'model', model_config)
    .train_model('model', B('images'), B('labels'), use_lock=True, fetches='loss',
             save_to=V('loss', mode='w'))
    .run_later(64, n_epochs=100, shuffle=42)
)

def init_research(iteration, experiment, ppl_train, ppl_test):
    ppl = experiment[ppl_train].pipeline
    cfg = experiment[ppl_train].config.config()
    ann, ds_train, ds_test, _ = utils.get_input_data(PATH, cfg['lithology'])
    labels_mapping, reverse_mapping, counter, weights = utils.get_statistics(ds_train, ann)
    #labels_mapping = {v: k for k,v in enumerate(cfg['lithology'])}

    new_cfg = {
        'lithology': cfg['lithology'],
        'annotation': ann,
        'dataset': ds_train,
        'dataset_test': ds_test,
        'labels_mapping': labels_mapping
    }
    
    experiment[ppl_train].pipeline.set_config(new_cfg)
    experiment[ppl_test].pipeline.set_config(new_cfg)
    
    import pickle
    
    with open(os.path.join(experiment[ppl_train].path, 'inputs.pkl'), 'wb') as f:
        pickle.dump({
            'annotation': ann,
            'dataset': ds_train,
            'dataset_test': ds_test,
            'labels_mapping': labels_mapping,
            'reverse_mapping': reverse_mapping,
            'counter': counter,
            'weights': weights
        }, f)


def get_model(iteration, experiment, pipeline):
    experiment[pipeline].pipeline.get_model_by_name('model').save(
        os.path.join(experiment[pipeline].path, 'model.torch')
    )

In [4]:
test_ppl = (Pipeline()
    .add_namespace(utils)
    .set_dataset(C('dataset_test'))
    .load(uv=False, dst=['dl'])
    .normalize(src='dl', dst='dl')
    .add_namespace(np)
    .create_labels(C('annotation').LITHOLOGY.loc)
    .update(B('labels'), B('labels').tolist())
    .to_array(src='dl', dst='images', dtype='float32')
    .make_random_crops(src=['images'], dst=['images'], channels='last', shape=SHAPE[1:], n_crops=1)
    .concatenate(B('images'), axis=0, save_to=B('images'))
    .init_variable('metrics', default=None)
    .transpose(B('images'), axes=(0, 3, 1, 2), save_to=B('images'))
    .encode(B('labels'), C('labels_mapping'), save_to=B('labels'))
    .import_model('model', C('import_from'))
    .predict_model('model', B('images'), fetches='proba', save_to=B('proba'))
    .gather_metrics('class', targets=B('labels'), predictions=B('proba'),
                    fmt='proba', axis=-1, save_to=V('metrics', mode='u'))
    .run_later(64, shuffle=False, n_epochs=1, bar=False, drop_last=False)
)

# Research

In [5]:
! rm -r research

In [None]:
options = [
    ['Песчаник', 'Аргиллит'],
    ['Песчаник', 'Алевролит'],
    ['Аргиллит', 'Алевролит'],
    ['Аргиллит', 'Алевролит', 'Песчаник']
]

options = Option('lithology', [KV(item, '_'.join(item)) for item in options])

research = (Research()
            .add_grid(options)
            .add_function(init_research, execute='#0', ppl_train='train', ppl_test='test',
                          returns='mapping', name='init', logging=True)
            .add_pipeline(train_ppl, variables='loss', name='train')
            .add_pipeline(test_ppl, name='test', run=True, execute='last',
                          import_from='train', variables='metrics', logging=True)
            .get_metrics('test',
                         metrics_var='metrics', 
                         metrics_name='f1_score',
                         returns='f1_score',
                         execute='last')
            .add_function(get_model, execute='last', pipeline='train')
           )

research.run(1, None, workers=4, gpu=[0,1,2,3], bar=True)

Research research is starting...


  0%|          | 0/4 [00:00<?, ?it/s]

Distributor has 4 jobs


In [None]:
research.load_results()

In [None]:
path = 'research/results/lithology_Аргиллит_Алевролит/0/'

test_ppl = (Pipeline()
    .add_namespace(utils)
    .set_dataset(C('dataset_test'))
    .load(uv=False, dst=['dl'])
    .normalize(src='dl', dst='dl')
    .add_namespace(np)
    .create_labels(C('annotation').LITHOLOGY.loc)
    .update(B('labels'), B('labels').tolist())
    .to_array(src='dl', dst='images', dtype='float32')
    .make_random_crops(src=['images'], dst=['images'], channels='last', shape=SHAPE[1:], n_crops=1)
    .concatenate(B('images'), axis=0, save_to=B('images'))
    .init_variable('metrics', default=None)
    .transpose(B('images'), axes=(0, 3, 1, 2), save_to=B('images'))
    .encode(B('labels'), C('labels_mapping'), save_to=B('labels'))
    .init_model('dynamic', ResNet18, 'model', config={
                'device': 'gpu:0', 'load/path': os.path.join(path, 'model.torch')
    })
    .predict_model('model', B('images'), fetches='proba', save_to=B('proba'))
    .gather_metrics('class', targets=B('labels'), predictions=B('proba'),
                    fmt='proba', axis=-1, save_to=V('metrics', mode='u'))
    .run_later(64, shuffle=False, n_epochs=1, drop_last=False)
)

In [None]:
with open(os.path.join(path, 'inputs.pkl'), 'rb') as f:
    inputs = pickle.load(f)

In [None]:
annotation = inputs['annotation']
ds = inputs['dataset_test']
labels_mapping = inputs['labels_mapping']

In [None]:
test_ppl.set_config({
    'annotation': annotation,
    'dataset_test': ds,
    'labels_mapping': labels_mapping
})

In [None]:
test_ppl.run()

In [None]:
val_metrics = test_ppl.get_variable('metrics')
print(val_metrics._confusion_matrix)

for m in ['specificity', 'sensitivity', 'accuracy', 'f1_score']:
    print(m, ':', val_metrics.evaluate(m))

In [None]:
labels_mapping