### load specific sections, apply preprocessing, extract patches and save on disk

In [1]:
import os
import numpy as np
from tqdm import trange
import pandas as pd
from patch_extractor import PatchExtractor as PE
from utils import normalize_only, compute_edges
from tensorflow.keras.utils import to_categorical
import glob

In [2]:
CLASS_NAMES = ["Basement", "SlopeMudA", "Deposit", "SlopeMudB", "SlopeValley", "Canyon"]
num_classes = len(CLASS_NAMES)

In [3]:
dataset = "/nas/public/dataset/geophysics/2020_aicrowd_facies_segmentation_challenge/"
out_root = "/nas/home/fpicetti/datasets/seismic_facies/"
os.makedirs(out_root, exist_ok=True)

### Train Dataset

In [4]:
image = np.load(os.path.join(dataset, "data_train.npz"), allow_pickle=True, mmap_mode='r')
image =  image['data']
label = np.load(os.path.join(dataset, "labels_train.npz"), allow_pickle=True, mmap_mode='r')
label = label['labels'].astype(np.uint8)-1

In [5]:
print('z\tx\ty')
print('%d\t%d\t%d' % (image.shape[0],image.shape[1],image.shape[2]))

z	x	y
1006	782	590


In [6]:
normalize = lambda x: normalize_only(x, image.min(), image.max(), True)

#### vertical $992\times128$ patches

In [7]:
pe = PE(dim=(992,128), stride=(2,32))

Saving patches extracted from XZ sections

In [8]:
out_path = os.path.join(out_root, "_".join([str(_) for _ in pe.dim+pe.stride]))
os.makedirs(out_path, exist_ok=True)

In [9]:
for i in trange(image.shape[2], ncols=90, desc="Inline section"):
    patches_img = normalize(pe.extract(image[:,:,i]).reshape((-1,)+pe.dim+(1,)))
    patches_msk = to_categorical(pe.extract(label[:,:,i]).reshape((-1,)+pe.dim+(1,)))
    
    for p in range(patches_img.shape[0]):
        outname = os.path.join(out_path, "XZ%s_p%s.npy" % (str(i).zfill(3), str(p).zfill(len(str(patches_img.shape[0])))))
        
        np.save(outname, dict(image=patches_img[p], mask=patches_msk[p]), allow_pickle=True)


Inline section: 100%|███████████████████████████████████| 590/590 [29:01<00:00,  2.95s/it]


In [None]:
for i in trange(image.shape[1], ncols=90, desc="Crossline section"):
    patches_img = normalize(pe.extract(image[:,i]).reshape((-1,)+pe.dim+(1,)))
    patches_msk = to_categorical(pe.extract(label[:,i]).reshape((-1,)+pe.dim+(1,)))
    
    for p in range(patches_img.shape[0]):
        outname = os.path.join(out_path, "YZ%s_p%s.npy" % (str(i).zfill(3), str(p).zfill(len(str(patches_img.shape[0])))))
        
        np.save(outname, dict(image=patches_img[p], mask=patches_msk[p]), allow_pickle=True)


Crossline section:  15%|████▋                           | 114/782 [04:23<26:55,  2.42s/it]

## Create different pandas dataframes for train/validation

In [36]:
num_sections_train = 128
num_sections_val = 64
num_patches_per_section = len(glob.glob(os.path.join(out_path, "XZ000_*.npy")))
train_start_section = 0
val_start_section = 300

train_idx0 = train_start_section*num_patches_per_section
train_idx1 = train_idx0 + num_sections_train*num_patches_per_section

val_idx0 = val_start_section*num_patches_per_section
val_idx1 = val_idx0 + num_sections_val*num_patches_per_section

In [37]:
train_paths = sorted(glob.glob(os.path.join(out_path, "XZ*.npy")))[train_idx0:train_idx1]

In [38]:
val_paths = sorted(glob.glob(os.path.join(out_path, "XZ*.npy")))[val_idx0:val_idx1]

In [39]:
df = {}
df['datapath'] = train_paths + val_paths
df['mode'] = len(train_paths)*['train'] + len(val_paths)*['val']
df = pd.DataFrame.from_dict(df)

In [44]:
df.to_csv(os.path.join(out_root, 'train1.csv'), index=False)