# Валидация метрики для оценки разницы между сейсмограммами

In [1]:
import sys
sys.path.append('..')

from functools import reduce

import pandas as pd

from seismicpro.batchflow import Dataset
from seismicpro.src import SeismicBatch, FieldIndex #, seismic_plot

from utils import get_cv, draw_modifications_dist, get_modifications_list, validate_all

## Процедура получения метрики

**Расстояние между двумя спектрограммами** с равным количеством трасс и отсчетов будем вычислять по следующей процедуре

* Выбрать область из 10% трасс с наименьшим офсетом, ограничить вычисления только в этой области
* Получить спектральную плотность мощности с помощью оконного преобразования Фурье для каждой трассы для обеих спектрограмм
* Для соответствующих значений сдвигов окна по времени и соответсвующих трасс вычислить разницу спектров в разных спектрограммах по формуле
$$
Dist(A_1, A_2) = \sum_{f}\left|{A_{1}[f]-A_{2}[f]}\right|
$$
* Усреднить полученные значения по всем трассам и всем сдвигам окна по времени

![Seismogram difference metrics](Seismogram_difference_metrics.png)

## Заргузка данных

In [2]:
# datasets = {'ds1': {'path_raw': '/data/NA/noise_dataset_1/DN02A_LIFT_AMPSCAL.sgy',
#                     'path_lift': '/data/NA/noise_dataset_1/DN02B_SHOTS_LIFT1.sgy',
#                     'tsize': 3000,
#                     'offset_lim': None,
#                     'scale_coef': 1.},
#             'ds2': {'path_raw': '/data/NA/noise_dataset_2/D01_for_Lift.sgy',
#                     'path_lift': '/data/NA/noise_dataset_2/D02_Lift_SP.sgy',
#                     'tsize': 2000,
#                     'offset_lim': 2000,
#                     'scale_coef': 3.},
#             'ds3': {'path_raw': '/data/NA/noise_dataset_3/5_NA-gr_input_raw.sgy',
#                     'path_lift': '/data/NA/noise_dataset_3/5_NA-gr_output_lift5.sgy',
#                     'tsize': 3000,
#                     'offset_lim': None,
#                     'scale_coef': 0.00045}}

#  они есть 1 для датасета 1, 2.3182 для датасета 2, 0.000558 для датасета 3. На эти константы нужно умножать лифт.

base_path = '/notebooks/egor/geolog/notebooks/processed/'  

In [2]:
!ls /notebooks/egor/geolog/notebooks/processed

DN01_shots_for_lift_well_ML_out.sgy	   ds2_ds1.sgy	ds3_ds3.sgy
DN01_shots_for_lift_well_ML_out_check.sgy  ds2_ds2.sgy	merged.sgy
ds1_ds1.sgy				   ds2_ds3.sgy	merged_cv.sgy
ds1_ds2.sgy				   ds3_ds1.sgy
ds1_ds3.sgy				   ds3_ds2.sgy


# Датасет 1

In [8]:
paths = {
    'lift': '/data/NA/noise_dataset_1/DN02B_SHOTS_LIFT1.sgy',
    'raw': '/data/NA/noise_dataset_1/DN02A_LIFT_AMPSCAL.sgy',
    'm_1': base_path + 'ds1_ds1.sgy',
    'm_2': base_path + 'ds2_ds1.sgy',
    'm_3': base_path + 'ds3_ds1.sgy',
}

scale_lift = 1

### Количество филдов в данных

In [9]:
field_index = reduce(lambda x, y: x.merge(y), 
                     (FieldIndex(name=name, path=path, extra_headers=['offset']) for name, path in paths.items()))
print(len(field_index))

51


In [10]:
batch = (Dataset(field_index, SeismicBatch)
         .next_batch(len(field_index))
         .load(fmt='segy', components=tuple(paths.keys())))



## Расчет метрики отностительно LIFT

In [11]:
res = validate_all(batch, scale_lift=scale_lift)
pd.DataFrame(res).T

Unnamed: 0,LIFT,M_1,M_2,M_3,RAW
0,0.0,0.356027,0.552936,0.484863,1.219106
1,0.0,1.365553,1.365553,1.365553,1.365553
2,0.0,0.893977,0.893977,0.893977,0.893977
3,0.0,0.238321,0.238321,0.238321,0.238321
4,0.0,0.390361,0.810882,0.650259,2.750705
5,0.0,0.84498,1.242914,1.138457,1.959987
6,0.0,0.207162,0.207162,0.207161,0.207162
7,0.0,0.791287,7.152665,6.865291,29.309875
8,0.0,0.556167,0.556167,0.556167,0.556167
9,0.0,0.775813,1.269367,1.079043,2.196694


# Датасет 2

In [3]:
paths = {
    'lift': '/data/NA/noise_dataset_2/D02_Lift_SP.sgy',
    'raw': '/data/NA/noise_dataset_2/D01_for_Lift.sgy',
    'm_1': base_path + 'ds1_ds2.sgy',
    'm_2': base_path + 'ds2_ds2.sgy',
    'm_3': base_path + 'ds3_ds2.sgy',
}

scale_lift = 2.3182

### Количество филдов в данных

In [4]:
field_index = reduce(lambda x, y: x.merge(y), 
                     (FieldIndex(name=name, path=path, extra_headers=['offset']) for name, path in paths.items()))
print(len(field_index))

91


In [5]:
batch = (Dataset(field_index, SeismicBatch)
         .next_batch(len(field_index))
         .load(fmt='segy', components=tuple(paths.keys())))



## Расчет метрики отностительно LIFT

In [17]:
res = validate_all(batch, scale_lift=scale_lift)
pd.DataFrame(res).T

Unnamed: 0,LIFT,M_1,M_2,M_3,RAW
0,0.552936,0.224071,0.0,0.2710073,0.8053297
1,1.365553,0.0,0.0,0.0,0.0
2,0.893977,0.0,0.0,0.0,0.0
3,0.238321,0.0,0.0,4.723899e-08,0.0
4,0.810882,0.465904,0.0,0.3606248,2.391758
5,1.242914,0.41761,0.0,0.2351488,1.225567
6,0.207162,0.0,0.0,1.255846e-06,0.0
7,7.152665,6.850034,0.0,1.680976,34.4679
8,0.556167,0.0,0.0,0.0,0.0
9,1.269367,0.524509,0.0,0.3935935,1.558648


# Датасет 3

In [6]:
paths = {
    'lift': '/notebooks/egor/noise_dataset_3/5_NA-gr_output_lift5.sgy',
    'raw': '/notebooks/egor/noise_dataset_3/5_NA-gr_input_raw.sgy',
    'm_1': base_path + 'ds1_ds3.sgy',
    'm_2': base_path + 'ds2_ds3.sgy',
    'm_3': base_path + 'ds3_ds3.sgy',
}

scale_lift = 0.000558

### Количество филдов в данных

In [4]:
field_index = reduce(lambda x, y: x.merge(y), 
                     (FieldIndex(name=name, path=path, extra_headers=['offset']) for name, path in paths.items()))
print(len(field_index))

ValueError: No objects to concatenate

In [16]:
batch = (Dataset(field_index, SeismicBatch)
         .next_batch(len(field_index))
         .load(fmt='segy', components=tuple(paths.keys())))

  return np.abs(np.quantile(np.stack(item for item in arrs), q))


ValueError: all input arrays must have the same shape

## Расчет метрики отностительно LIFT

In [17]:
res = validate_all(batch, scale_lift=scale_lift)
pd.DataFrame(res).T

Unnamed: 0,LIFT,M_1,M_2,M_3,RAW
0,0.552936,0.224071,0.0,0.2710073,0.8053297
1,1.365553,0.0,0.0,0.0,0.0
2,0.893977,0.0,0.0,0.0,0.0
3,0.238321,0.0,0.0,4.723899e-08,0.0
4,0.810882,0.465904,0.0,0.3606248,2.391758
5,1.242914,0.41761,0.0,0.2351488,1.225567
6,0.207162,0.0,0.0,1.255846e-06,0.0
7,7.152665,6.850034,0.0,1.680976,34.4679
8,0.556167,0.0,0.0,0.0,0.0
9,1.269367,0.524509,0.0,0.3935935,1.558648
