# Исправление разметки

In [1]:
import os
import sys
import glob

sys.path.insert(0, os.path.join("..", "..", ".."))

import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import PIL
from PIL import Image

from well_logs import CoreBatch
from well_logs.batchflow import Dataset, Pipeline, B, V, FilesIndex
from well_logs.batchflow.models.torch import ResNet18
from utils import plot_pair, make_data, get_bounds, assemble, plot_images_predictions, plot_crops_predictions, fix_annotation

Загрузим разметку для отложенной скважины

In [14]:
PATH = '/notebooks/data/processed_dataset/509'

index = FilesIndex(path=os.path.join(PATH, 'samples_dl/*.png'))

## Предобработка

In [15]:
df = pd.concat([pd.read_feather(filename) for filename in glob.glob(PATH+'/samples.feather')])
df['QC'] = 1 - df['QC']
df = df.set_index('SAMPLE')

In [16]:
index = index.create_subset(np.intersect1d(df.index.values, index.indices))
ds = Dataset(index, CoreBatch)

In [17]:
SHAPE = (2, 400, 150)

In [18]:
shape_ppl = (ds.p
     .load()
     .check_shapes(dst='check')
     .init_variable('quality', init_on_each_run=list)
     .update_variable('quality', B('check'), mode='e')
     .init_variable('shape1', init_on_each_run=list)
     .run(batch_size=10, n_epochs=1, shuffle=False, drop_last=False, lazy=True, bar=False)
    )

shape_ppl.run()

<well_logs.batchflow.batchflow.pipeline.Pipeline at 0x7f07c4d074a8>

In [19]:
short_ppl = (ds.p
     .load()
     .find_short_cores(SHAPE[1:], dst='short')
     .init_variable('short', init_on_each_run=list)
     .update_variable('short', B('short'), mode='e')
     .run(batch_size=10, n_epochs=1, shuffle=False, drop_last=False, lazy=True, bar=False)
    )

short_ppl.run()

filtered_index = ds.index.create_subset(
    ds.indices[np.logical_not(short_ppl.get_variable('short') or shape_ppl.get_variable('quality'))]
)

In [20]:
filtered_df = Dataset(filtered_index, CoreBatch)

In [21]:
bounds = get_bounds(PATH)

In [22]:
load_ppl = (Pipeline().load(grayscale=True, df=df).normalize(bounds=bounds))

# Валидация

In [23]:
test_template = (Pipeline()
    .to_array(src='uv', dst='uv', channels='first')
    .to_array(src='dl', dst='dl', channels='first')
    .crop(SHAPE[1:], 200, dst=('dl_crops', 'uv_crops', 'labels_crops'))
    .init_model('dynamic', ResNet18, 'model', config={
                    'device': 'gpu:2', 'load/path': 'resnet16.torch'
                })
    .init_variable('loss', init_on_each_run=list)
    .call(make_data, save_to=(B('crops_conc'), B('labels_crops_conc')))
    .init_variable('proba', init_on_each_run=None)
    .init_variable('metrics', init_on_each_run=None)
    .predict_model('model', B('crops_conc'), targets=B('labels_crops_conc'), fetches='proba',
                 save_to=B('proba', mode='w'))
    .call(assemble, save_to=B('proba', mode='w'))
    .gather_metrics('class', targets=B('labels'), predictions=B('proba'),
                            fmt='proba', axis=-1, save_to=V('metrics', mode='u'), threshold=0.5)
    .init_variable('stat', init_on_each_run=list)
    .update_variable('stat', (B('dl'), B('uv'), B('proba'), B('labels')), mode='a')
    .run(
        batch_size=8,
        n_epochs=1,
        shuffle=False,
        drop_last=False,
        lazy=True,
        bar=True
    )
)

In [24]:
test_ppl = (load_ppl + test_template) << filtered_df

In [25]:
test_ppl.reset_iter()
test_ppl.run()

 95%|█████████▌| 21/22 [00:14<00:00,  2.03it/s]


<well_logs.batchflow.batchflow.pipeline.Pipeline at 0x7f07c4d07e10>

In [26]:
metrics = test_ppl.get_variable('metrics')
print(metrics._confusion_matrix)

for m in ['specificity', 'sensitivity', 'accuracy']:
    print(m, ':', metrics.evaluate(m))

[[[97 63]
  [ 0 11]]]
specificity : 1.0
sensitivity : 0.14864864864864866
accuracy : 0.631578947368421


In [None]:
new_df = fix_annotation(test_ppl, df)

In [17]:
new_df['QC'] = 1 - new_df['QC']
new_df.reset_index().to_feather(os.path.join(PATH, 'new_samples.feather'))