In [None]:
import warnings
warnings.filterwarnings(action='ignore')

from segysak.segy import segy_loader
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import cv2 as cv
import segyio
import os

In [None]:
def plot_faults(file_path, cube_path, cmap='gray_r'):

    df = pd.read_csv(file_path, delim_whitespace=True)
    cube = segyio.open(cube_path)
    
    iline_list = cube.ilines
    xline_list = cube.xlines
    iline0 = iline_list[0]
    xline0 = xline_list[0]

    print(f'Initial Values:\n   Inline: {iline0} | Crossline: {xline0}')

    rows = len(df['Inline'][df['Line'] == 'Inline'].unique()) + len(df['Xline'][df['Line'] == 'Xline'].unique())
    cols = 1

    fig, ax = plt.subplots(rows, cols, figsize=(15 * cols, 7 * rows))
    count = 0
    for line_str in df['Line'].unique():
        for line_number in df[line_str][df['Line'] == line_str].unique():
            img = cube.iline[line_number].T if line_str == 'Inline' else cube.xline[line_number].T
            mask = np.zeros(img.shape) * np.nan

            X = df['Xline'][df[line_str] == line_number][df['Line'] == line_str] - xline0 if line_str == 'Inline' else df['Inline'][df[line_str] == line_number][df['Line'] == line_str] - iline0
            Y = df['Time'][df[line_str] == line_number][df['Line'] == line_str]

            ax[count].imshow(img, cmap=cmap)
            ax[count].set_title(f'{line_str}: {line_number}')
            if line_str == 'Inline':
                for id in df['ID'][df['Line'] == line_str][df['Inline'] == line_number].unique():
                    [cv.line(mask, (int(df['Xline'][df['ID'] == id].iloc[i-1] - xline0), int(df['Time'][df['ID'] == id].iloc[i-1])), (int(df['Xline'][df['ID'] == id].iloc[i] - xline0), int(df['Time'][df['ID'] == id].iloc[i])), (1,0,0), 3) for i in range(1, len(df['ID'][df['ID'] == id]))]
                ax[count].set_xlabel('Xline')
            else:
                for id in df['ID'][df['Line'] == line_str][df['Xline'] == line_number].unique():
                    [cv.line(mask, (int(df['Inline'][df['ID'] == id].iloc[i-1] - xline0), int(df['Time'][df['ID'] == id].iloc[i-1])), (int(df['Inline'][df['ID'] == id].iloc[i] - xline0), int(df['Time'][df['ID'] == id].iloc[i])), (1,0,0), 3) for i in range(1, len(df['ID'][df['ID'] == id]))]
                ax[count].set_xlabel('Inline')
            ax[count].imshow(mask)
            ax[count].set_ylabel('Time')
            count += 1
            
def get_amplitude_mask_dataset(file_path, cube_path, width, height):

    df = pd.read_csv(file_path, delim_whitespace=True)
    cube = segyio.open(cube_path)

    iline_list = cube.ilines
    xline_list = cube.xlines
    iline0 = iline_list[0]
    xline0 = xline_list[0]

    X = {}
    y = {}

    count = 0
    for line_str in df['Line'].unique():
        for line_number in df[line_str][df['Line'] == line_str].unique():
            img = cube.iline[line_number].T if line_str == 'Inline' else cube.xline[line_number].T
            mask = np.zeros(img.shape)

            if line_str == 'Inline':
                for id in df['ID'][df['Line'] == line_str][df['Inline'] == line_number].unique():
                    [cv.line(mask, (int(df['Xline'][df['ID'] == id].iloc[i-1] - xline0), int(df['Time'][df['ID'] == id].iloc[i-1])), (int(df['Xline'][df['ID'] == id].iloc[i] - xline0), int(df['Time'][df['ID'] == id].iloc[i])), (1,0,0), 3) for i in range(1, len(df['ID'][df['ID'] == id]))]
            else:
                for id in df['ID'][df['Line'] == line_str][df['Xline'] == line_number].unique():
                    [cv.line(mask, (int(df['Inline'][df['ID'] == id].iloc[i-1] - xline0), int(df['Time'][df['ID'] == id].iloc[i-1])), (int(df['Inline'][df['ID'] == id].iloc[i] - xline0), int(df['Time'][df['ID'] == id].iloc[i])), (1,0,0), 3) for i in range(1, len(df['ID'][df['ID'] == id]))]

            for i in range(img.shape[0] - (height - 1)):
                for j in range(img.shape[1] - (width - 1)):

                    window_amplitude = img[i : i + height, j : j + width]
                    window_amplitude = norm_data(window_amplitude, np.min(window_amplitude), np.max(window_amplitude))

                    window_mask = mask[i : i + height, j : j + width]

                    X.update({f'W_{count}' : window_amplitude.reshape(1,-1)[0]})
                    y.update({f'W_{count}' : window_mask.reshape(1,-1)[0]})

                    count += 1
                
    dataset = pd.concat([pd.DataFrame(X).T, pd.DataFrame(y).T.astype('int8')], axis=1)
    renamed_columns = [f'X{i}' for i in range(width * height)] + [f'y{i}' for i in range(width * height)]
    dataset.columns = renamed_columns
    
    return dataset

def norm_data(X, x_min, x_max):
    return (X - x_min) / (x_max - x_min)

In [None]:
# faults_path = r'C:\Users\jpgom\Documents\Jão\git\facies_classification\Faults.dat'
# cube_path = r'C:\Users\jpgom\Documents\Jão\UFBA\IC\Code\Seismic_data_w_null.sgy'

faults_path = r'C:\Users\jpg\Desktop\code\Faults.dat'
cube_path = r'C:\Users\jpg\Desktop\code\Seismic_data_w_null.sgy'

In [None]:
# plot_faults(faults_path, cube_path, cmap='gray_r')

In [None]:
width = 15
height = 15

dataset = get_amplitude_mask_dataset(faults_path, cube_path, width, height)

In [None]:
dataset.info()