In [1]:
%load_ext autoreload
%autoreload 2
from labeling import dataload,ind2sub
from itertools import product
from collections import OrderedDict
from scipy.ndimage import gaussian_filter
from sklearn.neighbors import NearestNeighbors

import pandas as pd
import numpy as np
import json
import torch
import matplotlib.pyplot as plt

In [23]:
fn = '../define_path.txt'
with open(fn) as f:
    lines = f.readlines()
for idx, line in enumerate(lines):
    if idx == 1:
        dir_co2 = line.split('=')[1][:-1]
    if idx == 13:
        outpath = line.split('=')[1][:-1]

# Training dataset

##  Basic pathes for baseline data and time-lapse data

In [7]:
# file pathes
blfn = f'{dir_co2}/94p10/2010 processing/data/94p10nea.sgy' # baseline data processed in 2010
blfn2 = f'{dir_co2}/94p01/2001 processing/data/94p01nea.sgy' # baseline data processed in 2001
tlfn = f'{dir_co2}/10p10/2010 processing/data/10p10nea.sgy' # timelapse (2010) data processed in 2010
tlfn2 = f'{dir_co2}/10p11/2011 processing/data/10p11nea' # timelapse (2011) data processed in 2011
mkfn = f'../resources/label/masks.dat' # CO2 masks interpreted from 1994 and 2010 data processed in 2010

## Load data

In [8]:
# load the data head
D0 = dataload(fn=blfn)
D0n = dataload(fn=blfn2)
Dt = dataload(fn=tlfn)
Dtn = dataload(fn=tlfn2)

Total trace number: 150909
Sample number along each trace: 1001
Sampling interval along each trace: 0.002 s
data arrangement: 269 (number of ensembles) x 561 (trace number per ensemble) x 1001 (sample number per trace)
Total trace number: 166747
Sample number along each trace: 1001
Sampling interval along each trace: 0.002 s
data arrangement: 287 (number of ensembles) x 581 (trace number per ensemble) x 1001 (sample number per trace)
Total trace number: 150909
Sample number along each trace: 1001
Sampling interval along each trace: 0.002 s
data arrangement: 269 (number of ensembles) x 561 (trace number per ensemble) x 1001 (sample number per trace)
Total trace number: 242136
Sample number along each trace: 1001
Sampling interval along each trace: 0.002 s
data arrangement: 216 (number of ensembles) x 1121 (trace number per ensemble) x 1001 (sample number per trace)


In [9]:
# mask dimension
DD = (Dt.nx,Dt.ny,Dt.nt)

In [10]:
# get the entire 3D data from the three data volume
d0,xd0,yd0,td0 = D0.getdata()
d0n,xdn,ydn,_ = D0n.getdata()
dt,xd,yd,td = Dt.getdata()
dtn,xdt,ydt,tdt = Dtn.getdata()

## Preprocess d0n and dtn to make them in the same dimension as d0 and dt

In [11]:
# interpolate d0n to the same dimension as dt(d0)
X = np.stack((xd.flatten(),yd.flatten()),axis=1)
Xn = np.stack((xdn.flatten(),ydn.flatten()),axis=1)
nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(Xn)
distances, ind = nbrs.kneighbors(X)
sub = ind2sub(xdn.shape,ind)
d0i = d0n[sub[0],sub[1],:]
d0i = np.reshape(d0i,d0.shape)

In [12]:
# interpolate dtn the same dimension as dt(d0)
# downsample along xline direction
dtn1 = np.pad(dtn[:,1:-3:2,:],((0,0),(2,0),(0,0)),'edge')
# calculate the coefficient for linear interpolation
xdh,xdth = xd[:,0],xdt[:,0]
X = np.stack((xdh,np.zeros(Dt.nx)),axis=1)
Xt = np.stack((xdth,np.zeros(Dtn.nx)),axis=1)
nbrs = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(Xt)
distances, ind = nbrs.kneighbors(X)
coe = np.flip(distances,axis=1)/np.expand_dims(np.sum(distances,axis=1),1)
# linear interpolate along inline direction
dti = np.zeros(DD,dtype=np.float32)
for i in range(Dt.nx):
    dti[i,:,:] = dtn1[ind[i,0],:,:]*coe[i,0]+dtn1[ind[i,1],:,:]*coe[i,1]

In [13]:
eps = 1e-5
# normalize d0,dt,d0i,dti
d0 = (d0-np.mean(d0))/(np.std(d0)+eps)
d0i = (d0i-np.mean(d0i))/(np.std(d0i)+eps)
dt = (dt-np.mean(dt))/(np.std(dt)+eps)
dti = (dti-np.mean(dti))/(np.std(dti)+eps)

## Generate 2D patches

### load the reference CO2 masks

In [14]:
# readin CO2 mask
masks = np.fromfile(f'{mkfn}',dtype=np.float32)
masks = np.reshape(masks,DD)

### define sliced labels (same as the sparse 3D labels)

In [15]:
# 2D vs 3D test: define x and y slices as interpreted in the 3D masks
inline_itp = np.array([60,80,100,120,130,140,150,160,180,200])
xline_itp = np.array([100,150,180,210,240,270,300,330,360,390,420,450,500])
Nitp = len(inline_itp) # number of inline slices
Nxtp = len(xline_itp) # number of xline slices

In [16]:
# define the patch size
Nh,Nt = 128,256
# define the sampled number along x, y and t
Nsi,Nsx,Nst = 4,8,10
# define resized dimension of the 3D patch
rs = (64,64)
# sample strategy indicator
stg = 0 # 0-random; 1-regular
# normalize patch
eps = 1e-5 # dividing std stablizer

In [17]:
# whether to use different baseline data
diffbl = True
# whether to use different time-lapse data
difftl = True

In [18]:
# adjust inline_itp and xline_itp according to the patch size
hNh = Nh//2
hNt = Nt//2
Ir = [hNh,D0.nx-hNh]
Xr = [hNh,D0.ny-hNh]
Tr = [hNt,D0.nt-hNt]

In [19]:
# define the patch No.
if diffbl:
    NRb = 2
else:
    NRb = 1
if difftl:
    NRa = 2
else:
    NRa = 1
print(f'Theoritical maximum total patch number: {NRb*NRa*Nst*(Nsi*Nxtp+Nsx*Nitp)}')

Theoritical maximum total patch number: 5280


In [20]:
# number of patches with no co2 marks
max_0co2 = 100

In [25]:
# generate parameter csv
pm_info = OrderedDict()
pm_info['data_dim'] = [D0.nx,D0.ny,D0.nt] # [ensemble number,trace number per ensemble,sample number per trace]
if diffbl:
    pm_info['baseline_datapath'] = [blfn,blfn2]
else:
    pm_info['baseline_datapath'] = [blfn]
if difftl:
    pm_info['timelapse_datapath'] = [tlfn,tlfn2]
else:
    pm_info['timelapse_datapath'] = [tlfn]
pm_info['patch_osize'] = [Nh,Nt]
pm_info['patch_nsize'] = rs
pm_info['patch_number'] = [Nsi,Nsx,Nst]
pm_info['sample_strategy'] = stg
pm_info['max_0co2'] = max_0co2
pm_info['inline_itp'] = inline_itp.tolist()
pm_info['xline_itp'] = xline_itp.tolist()
with open(f'{outpath}/pm_info.json','w') as f:
    f.write(json.dumps(pm_info))
with open(f'{outpath}/pm_info.json','r') as read_file:
    loaded_pm = json.loads(read_file.read())
    print(loaded_pm)

{'data_dim': [269, 561, 1001], 'baseline_datapath': ['/data/libei/co2_data/94p10/2010 processing/data/94p10nea.sgy', '/data/libei/co2_data/94p01/2001 processing/data/94p01nea.sgy'], 'timelapse_datapath': ['/data/libei/co2_data/10p10/2010 processing/data/10p10nea.sgy', '/data/libei/co2_data/10p11/2011 processing/data/10p11nea'], 'patch_osize': [128, 256], 'patch_nsize': [64, 64], 'patch_number': [4, 8, 10], 'sample_strategy': 0, 'max_0co2': 100, 'inline_itp': [60, 80, 100, 120, 130, 140, 150, 160, 180, 200], 'xline_itp': [100, 150, 180, 210, 240, 270, 300, 330, 360, 390, 420, 450, 500]}


In [26]:
# sampling center range along x, y, and t directions
Is = np.array(np.linspace(Ir[0],Ir[1],Nsi),dtype=np.int16)
Xs = np.array(np.linspace(Xr[0],Xr[1],Nsx),dtype=np.int16)
Ts = np.array(np.linspace(Tr[0],Tr[1],Nst),dtype=np.int16)
# generate random/or regular samples along t and r
if stg == 1:
    Isample = np.zeros(0,dtype=np.int16)
    Xsample = np.zeros(0,dtype=np.int16)
    Tsample = np.zeros(0,dtype=np.int16)
    for i in range(Nitp):
        Isi = np.zeros(Nsx*Nst,dtype=np.int16)+inline_itp[i]
        Isample = np.concatenate((Isample,Isi),axis=0)
        Xsi,Tsi = np.meshgrid(Xs,Ts,indexing='ij')
        Xsample = np.concatenate((Xsample,Xsi.flatten()),axis=0)
        Tsample = np.concatenate((Tsample,Tsi.flatten()),axis=0)
    for j in range(Nxtp):
        Xsi = np.zeros(Nsi*Nst,dtype=np.int16)+xline_itp[j]
        Xsample = np.concatenate((Isample,Isi),axis=0)
        Isi,Tsi = np.meshgrid(Is,Ts,indexing='ij')
        Isample = np.concatenate((Isample,Isi.flatten()),axis=0)
        Tsample = np.concatenate((Tsample,Tsi.flatten()),axis=0)
else:
    Is0 = np.tile(inline_itp,(Nsx*Nst,1)).T.flatten()
    Is1 = np.random.randint(Ir[0],Ir[1],size=(Nxtp*Nsi*Nst))
    Isample = np.concatenate((Is0,Is1),axis=0)
    Xs0 = np.tile(xline_itp,(Nsi*Nst,1)).T.flatten()
    Xs1 = np.random.randint(Xr[0],Xr[1],size=(Nitp*Nsx*Nst))
    Xsample = np.concatenate((Xs1,Xs0),axis=0)
    Tsample = np.random.randint(Tr[0],Tr[1],size=(Nxtp*Nsi+Nitp*Nsx)*Nst)

In [27]:
%matplotlib
# show the sample positions in 3D view
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(Isample, Xsample, Tsample, marker='o')

Using matplotlib backend: Qt5Agg


<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7f8b4813c410>

In [28]:
# sample the patches iteratively
hNh1 = Nh-hNh
hNt1 = Nt-hNt
ixswitch = Nsx*Nst*Nitp
patch_info = []
idx_0co2 = []
c = 0
cm = 0
for i in range(len(Isample)):
    Is = Isample[i]
    Xs = Xsample[i]
    Ts = Tsample[i]
    # slice the data and mask correspondingly
    if i<ixswitch:
        R0 = d0[Is,Xs-hNh:Xs+hNh1:2,Ts-hNt:Ts+hNt1:4]
        R0i = d0i[Is,Xs-hNh:Xs+hNh1:2,Ts-hNt:Ts+hNt1:4]
        Rt = dt[Is,Xs-hNh:Xs+hNh1:2,Ts-hNt:Ts+hNt1:4]
        Rti = dti[Is,Xs-hNh:Xs+hNh1:2,Ts-hNt:Ts+hNt1:4]
        M = masks[Is,Xs-hNh:Xs+hNh1:2,Ts-hNt:Ts+hNt1:4]     
    else:
        R0 = d0[Is-hNh:Is+hNh1:2,Xs,Ts-hNt:Ts+hNt1:4]
        R0i = d0i[Is-hNh:Is+hNh1:2,Xs,Ts-hNt:Ts+hNt1:4]
        Rt = dt[Is-hNh:Is+hNh1:2,Xs,Ts-hNt:Ts+hNt1:4]
        Rti = dti[Is-hNh:Is+hNh1:2,Xs,Ts-hNt:Ts+hNt1:4]
        M = masks[Is-hNh:Is+hNh1:2,Xs,Ts-hNt:Ts+hNt1:4]
    
    # pairing R0 and Rt
    Rb = [R0,R0i]
    Ra = [Rt,Rti]
    for tyb,tya in product(range(NRb),range(NRa)):
        Rb0 = Rb[tyb]
        Ra0 = Ra[tya]
        # normalize R0 and Rt respectively
        Rm0,Rs0 = np.mean(Rb0),np.std(Rb0)
        Rmt,Rst = np.mean(Ra0),np.std(Ra0)
        Rb0 = (Rb0-Rm0)/(Rs0+eps)
        Ra0 = (Ra0-Rmt)/(Rst+eps)
        # stack R0 and Rt
        R0t = np.stack((Rb0,Ra0))
        # save the patches and corresponding masks to outpath
        R0t.tofile(f'{outpath}/R0t_{c}.dat')
        # record the patch information in patch_info dict
        pf = OrderedDict()
        pf['Ptch_id'] = c
        pf['Mask_id'] = cm
        pf['ct'] = [Is,Xs,Ts]
        pf['mean'] = [Rm0,Rmt]
        pf['std'] = [Rs0,Rst]
        patch_info.append(pf)
        if np.sum(M) == 0:
            idx_0co2.append(c)
        c += 1
    M.tofile(f'{outpath}/Mask_{cm}.dat')
    cm += 1

In [29]:
print(f'Total patch number: {len(patch_info)}')

Total patch number: 5280


In [30]:
# eliminate 0 co2 patches according to max_0co2
N_0co2 = len(idx_0co2)
p0 = max_0co2*4/N_0co2
p = np.random.random(N_0co2)
idx_keep_0co2 = []
for i,idx in enumerate(idx_0co2):
    if (p[i]<=p0) and (len(idx_keep_0co2) < max_0co2*4):
        idx_keep_0co2.append(idx)

In [31]:
patch_info_new = []
for i in range(len(patch_info)):
    if (i not in idx_0co2) or (i in idx_keep_0co2):
        patch_info_new.append(patch_info[i])
# save the patch info
pd.DataFrame.from_dict(patch_info_new, orient='columns').to_csv(f'{outpath}/patch_info.csv')

In [32]:
N = len(patch_info_new)
print(f'Training and valid patch number: {N}')

Training and valid patch number: 2216


## Generate validating dataset by randomly selecting a small portion of the training dataset

In [33]:
Ntrain = 1500
vmax = N-Ntrain
pv = 1.1*vmax/N

In [34]:
patch_info_train = []
patch_info_valid = []
x = np.random.random(N)
c = 0
for i in range(N):
    if (x[i]<=pv) and (c<vmax):
        patch_info_valid.append(patch_info_new[i])
        c += 1
    else:
        patch_info_train.append(patch_info_new[i])     
# save the patch info
pd.DataFrame.from_dict(patch_info_train, orient='columns').to_csv(f'{outpath}/patch_info_train.csv')
pd.DataFrame.from_dict(patch_info_valid, orient='columns').to_csv(f'{outpath}/patch_info_valid.csv')
# print the number of training and validating
print(f'Final patch number for training: {len(patch_info_train)}')
print(f'Final patch number for validating: {len(patch_info_valid)}')

Final patch number for training: 1500
Final patch number for validating: 716


# Test dataset

In [35]:
fn = '../define_path.txt'
with open(fn) as f:
    lines = f.readlines()
for idx, line in enumerate(lines):
    if idx == 15:
        outpath_test = line.split('=')[1][:-1]

## Basic information for test dataset generation

In [36]:
# reference dataset grid path
xydfn = f'{dir_co2}/10p10/2010 processing/data/10p10nea.sgy'
# load the reference dataset head
Dr = dataload(fn=xydfn)
# get the grid axes
_,xd,yd,td = Dr.getdata()
# grid dimension
DD = (xd.shape[0],xd.shape[1],len(td))

Total trace number: 150909
Sample number along each trace: 1001
Sampling interval along each trace: 0.002 s
data arrangement: 269 (number of ensembles) x 561 (trace number per ensemble) x 1001 (sample number per trace)


In [37]:
# baseline processing year
bpy = ['2001','2001','2001','2001','2001','2001','2001','2010','2010']
# time-lapse data year
ty = ['1999','2001','2004','2006','2008','2010','2010','2010','2010']
# time-lapse processing year
tpy = ['2001','2001','2007','2007','2008','2010','2011','2010','2011']
# number of tests
Nte = len(bpy)

In [38]:
# define the patch size
Nh,Nt = 128,256
# define the sampled number along x, y and t
Nsi,Nsx,Nst = 3,5,6
# define resized dimension of the 3D patch
rs = (64,64)
# sample strategy indicator
stg = 1 # 0-random; 1-regular
# normalize patch
eps = 1e-5 # dividing std stablizer

In [39]:
# define x and y slices as interpreted in the 3D masks
inline_itp = np.arange(DD[0])
xline_itp = np.arange(DD[1])
Nitp = len(inline_itp) # number of inline slices
Nxtp = len(xline_itp) # number of xline slices
# patch No.
print(f'Total patch number: {Nst*(Nsi*Nxtp+Nsx*Nitp)}')

Total patch number: 18168


In [40]:
# readin reference CO2 mask for 2010
masks = np.fromfile(f'{mkfn}',dtype=np.float32)
masks = np.reshape(masks,DD)

In [41]:
# restrict inline/crossline and time axis sampling range
hNh = Nh//2
hNt = Nt//2
Ir = [hNh,D0.nx-hNh]
Xr = [hNh,D0.ny-hNh]
Tr = [hNt,D0.nt-hNt]

In [42]:
# sampling center range along x, y, and t directions
Is = np.array(np.linspace(Ir[0],Ir[1],Nsi),dtype=np.int16)
Xs = np.array(np.linspace(Xr[0],Xr[1],Nsx),dtype=np.int16)
Ts = np.array(np.linspace(Tr[0],Tr[1],Nst),dtype=np.int16)
# generate random/or regular samples along t and r
if stg == 1:
    Isample = np.zeros(0,dtype=np.int16)
    Xsample = np.zeros(0,dtype=np.int16)
    Tsample = np.zeros(0,dtype=np.int16)
    for i in range(Nitp):
        Isi = np.zeros(Nsx*Nst,dtype=np.int16)+inline_itp[i]
        Isample = np.concatenate((Isample,Isi),axis=0)
        Xsi,Tsi = np.meshgrid(Xs,Ts,indexing='ij')
        Xsample = np.concatenate((Xsample,Xsi.flatten()),axis=0)
        Tsample = np.concatenate((Tsample,Tsi.flatten()),axis=0)
    for j in range(Nxtp):
        Xsi = np.zeros(Nsi*Nst,dtype=np.int16)+xline_itp[j]
        Xsample = np.concatenate((Xsample,Xsi),axis=0)
        Isi,Tsi = np.meshgrid(Is,Ts,indexing='ij')
        Isample = np.concatenate((Isample,Isi.flatten()),axis=0)
        Tsample = np.concatenate((Tsample,Tsi.flatten()),axis=0)
else:
    Is0 = np.tile(inline_itp,(Nsx*Nst,1)).T.flatten()
    Is1 = np.random.randint(Ir[0],Ir[1],size=(Nxtp*Nsi*Nst))
    Isample = np.concatenate((Is0,Is1),axis=0)
    Xs0 = np.tile(xline_itp,(Nsi*Nst,1)).T.flatten()
    Xs1 = np.random.randint(Xr[0],Xr[1],size=(Nitp*Nsx*Nst))
    Xsample = np.concatenate((Xs1,Xs0),axis=0)
    Tsample = np.random.randint(Tr[0],Tr[1],size=(Nxtp*Nsi+Nitp*Nsx)*Nst)

In [44]:
for I in range(Nte):
    # file pathes
    blfn = f'{dir_co2}/94p{bpy[I][-2:]}/{bpy[I]} processing/data/94p{bpy[I][-2:]}nea.sgy'
    tlfn = f'{dir_co2}/{ty[I][-2:]}p{tpy[I][-2:]}/{tpy[I]} processing/data/{ty[I][-2:]}p{tpy[I][-2:]}nea.sgy'
    year = f'{ty[I]}_b{bpy[I][-2:]}_t{tpy[I][-2:]}'
    # output dataset path
    outpath = f'{outpath_test}/{year}/test'
    if tpy[I] == '2011':
        tlfn = tlfn[:-4]
    # load data
    D0 = dataload(fn=blfn)
    Dt = dataload(fn=tlfn)
    d0,xd0,yd0,_ = D0.getdata()
    dt,xdt,ydt,_ = Dt.getdata()
    
    ## preprocess the datasets
    # interpolate d0
    X = np.stack((xd.flatten(),yd.flatten()),axis=1)
    X0 = np.stack((xd0.flatten(),yd0.flatten()),axis=1)
    nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(X0)
    distances, ind = nbrs.kneighbors(X)
    sub = ind2sub(xd0.shape,ind)
    d0i = d0[sub[0],sub[1],:]
    d0i = np.reshape(d0i,DD)
    d0 = d0i
    # interpolate dt
    if tpy[I] == '2011':
        # interpolate for dt to conform to Dr
        # downsample along xline direction
        dt1 = np.pad(dt[:,1:-3:2,:],((0,0),(2,0),(0,0)),'edge')
        # calculate the coefficient for linear interpolation
        xdh,xdth = xd[:,0],xdt[:,0]
        X = np.stack((xdh,np.zeros(Dr.nx)),axis=1)
        Xt = np.stack((xdth,np.zeros(Dt.nx)),axis=1)
        nbrs = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(Xt)
        distances, ind = nbrs.kneighbors(X)
        coe = np.flip(distances,axis=1)/np.expand_dims(np.sum(distances,axis=1),1)
        # linear interpolate along inline direction
        dti = np.zeros(DD,dtype=np.float32)
        for i in range(Dr.nx):
            dti[i,:,:] = dt1[ind[i,0],:,:]*coe[i,0]+dt1[ind[i,1],:,:]*coe[i,1]
        dt = dti
    else:
        # find nearest trace of d0 from (xd,yd)
        X = np.stack((xd.flatten(),yd.flatten()),axis=1)
        Xt = np.stack((xdt.flatten(),ydt.flatten()),axis=1)
        nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(Xt)
        distances, ind = nbrs.kneighbors(X)
        sub = ind2sub(xdt.shape,ind)
        dti = dt[sub[0],sub[1],:]
        dti = np.reshape(dti,DD)
        dt = dti
    # whether there is reference mask
    if ty[I] == '2010':
        mask_ref = True
    else:
        mask_ref = False
    
    # generate parameter csv
    pm_info = OrderedDict()
    pm_info['data_dim'] = DD
    pm_info['baseline_datapath'] = [blfn]
    pm_info['timelapse_datapath'] = [tlfn]
    pm_info['patch_osize'] = [Nh,Nt]
    pm_info['patch_nsize'] = rs
    pm_info['patch_number'] = [Nsi,Nsx,Nst]
    pm_info['sample_strategy'] = stg
    pm_info['max_0co2'] = None
    pm_info['inline_itp'] = inline_itp.tolist()
    pm_info['xline_itp'] = xline_itp.tolist()
    with open(f'{outpath}/pm_info.json','w') as f:
        f.write(json.dumps(pm_info))
    with open(f'{outpath}/pm_info.json','r') as read_file:
        loaded_pm = json.loads(read_file.read())
        print(loaded_pm)
    # normalize d0 and dt
    d0N = (d0-np.mean(d0))/(np.std(d0)+eps)
    dtN = (dt-np.mean(dt))/(np.std(dt)+eps)
    # save d0 and dt to outpath
    d0N.tofile(f'{outpath}/d0.dat')
    dtN.tofile(f'{outpath}/dt.dat')
    # generate patches
    ixswitch = Nsx*Nst*Nitp
    patch_info = []
    c = 0
    for i in range(len(Isample)):
        Is = Isample[i]
        Xs = Xsample[i]
        Ts = Tsample[i]
        # slice the data and mask correspondingly
        if i<ixswitch:
            R0 = d0N[Is,Xs-hNh:Xs+hNh1:2,Ts-hNt:Ts+hNt1:4]
            Rt = dtN[Is,Xs-hNh:Xs+hNh1:2,Ts-hNt:Ts+hNt1:4]
            if mask_ref:
                M = masks[Is,Xs-hNh:Xs+hNh1:2,Ts-hNt:Ts+hNt1:4]     
        else:
            R0 = d0N[Is-hNh:Is+hNh1:2,Xs,Ts-hNt:Ts+hNt1:4]
            Rt = dtN[Is-hNh:Is+hNh1:2,Xs,Ts-hNt:Ts+hNt1:4]
            if mask_ref:
                M = masks[Is-hNh:Is+hNh1:2,Xs,Ts-hNt:Ts+hNt1:4]
        # normalize R0 and Rt respectively
        Rm0,Rs0 = np.mean(R0),np.std(R0)
        Rmt,Rst = np.mean(Rt),np.std(Rt)
        R0 = (R0-Rm0)/(Rs0+eps)
        Rt = (Rt-Rmt)/(Rst+eps)
        # stack R0 and Rt
        R0t = np.stack((R0,Rt))
        # save the patches and corresponding masks to outpath
        R0t.tofile(f'{outpath}/R0t_{c}.dat')
        # record the patch information in patch_info dict
        pf = OrderedDict()
        pf['Ptch_id'] = c
        pf['Mask_id'] = c
        pf['ct'] = [Is,Xs,Ts]
        pf['mean'] = [Rm0,Rmt]
        pf['std'] = [Rs0,Rst]
        patch_info.append(pf)
        if mask_ref:
            M.tofile(f'{outpath}/Mask_{c}.dat')
        c += 1
    # save the patch info
    pd.DataFrame.from_dict(patch_info, orient='columns').to_csv(f'{outpath}/patch_info.csv')
    print(f'///////////////////////////////////////////////////')
    print(f'Final patch number for {year}: {len(patch_info)}')

Total trace number: 166747
Sample number along each trace: 1001
Sampling interval along each trace: 0.002 s
data arrangement: 287 (number of ensembles) x 581 (trace number per ensemble) x 1001 (sample number per trace)
Total trace number: 166747
Sample number along each trace: 1001
Sampling interval along each trace: 0.002 s
data arrangement: 287 (number of ensembles) x 581 (trace number per ensemble) x 1001 (sample number per trace)
{'data_dim': [269, 561, 1001], 'baseline_datapath': ['/data/libei/co2_data/94p01/2001 processing/data/94p01nea.sgy'], 'timelapse_datapath': ['/data/libei/co2_data/99p01/2001 processing/data/99p01nea.sgy'], 'patch_osize': [128, 256], 'patch_nsize': [64, 64], 'patch_number': [3, 5, 6], 'sample_strategy': 1, 'max_0co2': None, 'inline_itp': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 

///////////////////////////////////////////////////
Final patch number for 2001_b01_t01: 18168
Total trace number: 166747
Sample number along each trace: 1001
Sampling interval along each trace: 0.002 s
data arrangement: 287 (number of ensembles) x 581 (trace number per ensemble) x 1001 (sample number per trace)
Total trace number: 116532
Sample number along each trace: 1001
Sampling interval along each trace: 0.002 s
data arrangement: 249 (number of ensembles) x 468 (trace number per ensemble) x 1001 (sample number per trace)
{'data_dim': [269, 561, 1001], 'baseline_datapath': ['/data/libei/co2_data/94p01/2001 processing/data/94p01nea.sgy'], 'timelapse_datapath': ['/data/libei/co2_data/04p07/2007 processing/data/04p07nea.sgy'], 'patch_osize': [128, 256], 'patch_nsize': [64, 64], 'patch_number': [3, 5, 6], 'sample_strategy': 1, 'max_0co2': None, 'inline_itp': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 3

///////////////////////////////////////////////////
Final patch number for 2006_b01_t07: 18168
Total trace number: 166747
Sample number along each trace: 1001
Sampling interval along each trace: 0.002 s
data arrangement: 287 (number of ensembles) x 581 (trace number per ensemble) x 1001 (sample number per trace)
Total trace number: 150909
Sample number along each trace: 1001
Sampling interval along each trace: 0.002 s
data arrangement: 269 (number of ensembles) x 561 (trace number per ensemble) x 1001 (sample number per trace)
{'data_dim': [269, 561, 1001], 'baseline_datapath': ['/data/libei/co2_data/94p01/2001 processing/data/94p01nea.sgy'], 'timelapse_datapath': ['/data/libei/co2_data/08p08/2008 processing/data/08p08nea.sgy'], 'patch_osize': [128, 256], 'patch_nsize': [64, 64], 'patch_number': [3, 5, 6], 'sample_strategy': 1, 'max_0co2': None, 'inline_itp': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 3

///////////////////////////////////////////////////
Final patch number for 2010_b01_t10: 18168
Total trace number: 166747
Sample number along each trace: 1001
Sampling interval along each trace: 0.002 s
data arrangement: 287 (number of ensembles) x 581 (trace number per ensemble) x 1001 (sample number per trace)
Total trace number: 242136
Sample number along each trace: 1001
Sampling interval along each trace: 0.002 s
data arrangement: 216 (number of ensembles) x 1121 (trace number per ensemble) x 1001 (sample number per trace)
{'data_dim': [269, 561, 1001], 'baseline_datapath': ['/data/libei/co2_data/94p01/2001 processing/data/94p01nea.sgy'], 'timelapse_datapath': ['/data/libei/co2_data/10p11/2011 processing/data/10p11nea'], 'patch_osize': [128, 256], 'patch_nsize': [64, 64], 'patch_number': [3, 5, 6], 'sample_strategy': 1, 'max_0co2': None, 'inline_itp': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 

///////////////////////////////////////////////////
Final patch number for 2010_b10_t10: 18168
Total trace number: 150909
Sample number along each trace: 1001
Sampling interval along each trace: 0.002 s
data arrangement: 269 (number of ensembles) x 561 (trace number per ensemble) x 1001 (sample number per trace)
Total trace number: 242136
Sample number along each trace: 1001
Sampling interval along each trace: 0.002 s
data arrangement: 216 (number of ensembles) x 1121 (trace number per ensemble) x 1001 (sample number per trace)
{'data_dim': [269, 561, 1001], 'baseline_datapath': ['/data/libei/co2_data/94p10/2010 processing/data/94p10nea.sgy'], 'timelapse_datapath': ['/data/libei/co2_data/10p11/2011 processing/data/10p11nea'], 'patch_osize': [128, 256], 'patch_nsize': [64, 64], 'patch_number': [3, 5, 6], 'sample_strategy': 1, 'max_0co2': None, 'inline_itp': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 