In [1]:
import sys
import torch
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import segyio

sys.path.append('../..')

from seismicpro.batchflow import Pipeline, Dataset, B
from seismicpro.src import SeismicBatch, FieldIndex, TraceIndex, merge_picking_files, seismic_plot, KNNIndex

In [2]:
sys.path.append('/notebooks/kalashnikov/')
from TCN.mnist_pixel.model import TCN
input_channels = 1
n_classes = 2
nhid = 20
levels = 8
channel_sizes = [nhid] * levels
kernel_size = 3
dropout = 0
model = TCN(input_channels, n_classes, channel_sizes, kernel_size=kernel_size, dropout=dropout)
model.load_state_dict(torch.load('/notebooks/kalashnikov/TCN/mnist_pixel/model300', map_location='cpu'))

path_raw = '/notebooks/egor/FB/Pal/Pal_Flatiron_1k.sgy'
index = (FieldIndex(name='raw', path=path_raw, 
                    markup_path='/notebooks/egor/FB/Pal/Pal_Flatiron_1k_picking.csv'))

In [3]:
from seismicpro.batchflow import action, inbatch_parallel
import pandas as pd

predict = lambda x: np.argmax(np.argmax(model(x).cpu().detach().numpy(), axis=1), axis=1)

class InvBatch(SeismicBatch):
    @action
    @inbatch_parallel(init='_init_component')
    def check_inv(self, index, model, src, dst):
        pos = self.get_pos(None, src, index)
        field = getattr(self, src)[pos]
        t = torch.Tensor(field).reshape(field.shape[0], 1, field.shape[1])
        inv_t = torch.Tensor(-field).reshape(field.shape[0], 1, field.shape[1])
        pred = np.array([predict(t), predict(inv_t)]).T
        traces_pred = pred.copy()
        data = []
        for i, (trace, inv_trace, norm_predict) in enumerate(zip(field, -field, pred)):
            for j, (tr, pr) in enumerate([(trace, norm_predict[0]), (inv_trace, norm_predict[1])]):
                amp_val = tr[pr]
                if i > 0 and i < len(norm_predict):
                    amp_left = field[i-1][pred[i-1][j]]
                    amp_right = field[i+1][pred[i+1][j]]
                    diff_pred_left = np.abs(pred[i][j] - pred[i-1][j])
                    diff_pred_right = np.abs(pred[i][j] - pred[i+1][j])
                elif i == 0:
                    amp_left, diff_pred_left = 0, 0
                    amp_right = field[i+1][pred[i+1][j]]
                    diff_pred_right = np.abs(pred[i][j] - pred[i+1][j])
                else:
                    amp_right, diff_pred_right = 0, 0
                    amp_left = field[i-1][pred[i-1][j]]
                    diff_pred_left = np.abs(pred[i][j] - pred[i-1][j])

                data.append([amp_val, amp_left, amp_right, diff_pred_left, diff_pred_right, j])
        getattr(self, dst)[pos] = data
        return self

In [4]:
def f(a):
    return (a - a.mean())/a.std()

ppl = (Dataset(index, InvBatch).p
         .load(components='raw', fmt='segy')
         .load(components='markup', fmt='picks')
         .init_variable('diff', init_on_each_run=list)
         .apply_transform(f, src='raw')
         .check_inv(model, src='raw', dst='diff')
         .update_variable('diff', B('diff'), mode='e')
        )

In [5]:
z = ppl.next_batch(15)

In [6]:
diff = ppl.get_variable('diff')

In [7]:
data = np.concatenate(diff)

In [8]:
np.random.shuffle(data)
dataset, target = data[:,:-1], data[:,-1]

In [13]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(dataset, target)

In [14]:
tree = LogisticRegression()
tree.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [15]:
pred = tree.predict(X_test)

In [17]:
accuracy_score(y_test, pred)

0.9719923615531508