In [None]:
%autosave 0 

# Vertical Sesimic Profiling inversion

We are going to focus our attention to a common approach when dealing with geophysical inverse problems, regularization and prior information. In this example we will invert synthetic data from a vertical seismic profiling survey. To do so, we will discretize the following equation:
\begin{align}
t(z) =  \int_{0}^{z} \frac{1}{v(z')}dz',
\end{align}
where $t(z)$ is the traveltime from the surface to the depth $z$ and $v(z')$ represents the propagation speed of the medium. To make the problem linear we will parameterize the problem using slowness instead of speed. The discrete version for a regular $z$ sampling of it can be written as follows:
\begin{align}
t_i =  \sum_{j=0}^{N_i} s_j \Delta z,
\end{align}
where $\Delta z$ represents the sampling interval in the $z$ direction, while $t_i$ and $s_j$ are the traveltime and slowness at $z_i=N_i \Delta z$ depth, respectively.
In this example we will assume that the true subsurface vertical speed is given by the following equation:
\begin{align}
v(z)=3000 + \sqrt{1000 z},
\end{align}
expressed in km/s.

In [None]:
#Adding library modules to PYTHONPATH
import sys
sys.path.append("../python")
import numpy as np
#Plotting library
from matplotlib import pyplot as plt
import matplotlib
from mpl_toolkits.axes_grid1 import make_axes_locatable
%matplotlib inline
params = {
    'image.cmap': 'gray',
    'axes.grid': False,
    'savefig.dpi': 300,  # to adjust notebook inline plot size
    'axes.labelsize': 14, # fontsize for x and y labels (was 10)
    'axes.titlesize': 14,
    'font.size': 14, 
    'legend.fontsize': 12,
    'xtick.labelsize': 14,
    'ytick.labelsize': 14
}
matplotlib.rcParams.update(params)
#Inversion library-related modules
import pyVector as Vec
import pyOperator as Op
from pyLinearSolver import LCGsolver as LCG
import pyProblem as Prblm
from pyStopper import BasicStopper as Stopper

In [None]:
#Definition of the modeling operator forward and its adjoint
class VSP_op(Op.Operator):
    """
       Vertical Sesimic Profiling operator      
    """
    
    def __init__(self,model,data,dz,desampling):
        """Operator constructor"""
        self.setDomainRange(model,data)
        self.dz = dz                           #Sampling in depth
        self.desampling = int(desampling)      #Desampling of the data points (should be 1 or greater)
        self.M = model.getNdArray().shape[0]   #Number of model points
        self.N = data.getNdArray().shape[0]    #Number of data points
        if((self.M-2) < (self.N-2)*self.desampling): 
            raise ValueError("ERROR! Too many data points! Change desampling or number of data points")
        return
    
    def forward(self,add,model,data):
        """
           Modeling operator from slowness to traveltime
           add     = [no default] - boolean; Flag to add modeled data to input vector
           model   = [no default] - vector class; slowness model vector
           data    = [no default] - vector class; traveltime data vector
        """
        self.checkDomainRange(model,data)
        if(not add): data.zero()     #data = 0
        modelNd = model.getNdArray() #Getting pointer to Numpy model array
        dataNd = data.getNdArray()   #Getting pointer to Numpy data array
        #First data point
        dataNd[0] += modelNd[0]*dz
        for idata in range(1,self.N-1):
            dataNd[idata] += np.sum(modelNd[:(idata)*self.desampling+1])*dz
        #Last data point
        dataNd[-1] += np.sum(modelNd[:])*dz
        return
    
    def adjoint(self,add,model,data):
        """
           Adjoint operator from traveltime to slowness
           add     = [no default] - boolean; Flag to add modeled data to input vector
           model   = [no default] - vector class; slowness model vector
           data    = [no default] - vector class; traveltime data vector
        """
        self.checkDomainRange(model,data)
        if(not add): model.zero()    #model = 0
        modelNd = model.getNdArray() #Getting pointer to Numpy model array
        dataNd = data.getNdArray()   #Getting pointer to Numpy data array
        #First data point
        modelNd[0] += dataNd[0]*dz
        for idata in range(1,self.N-1):
            modelNd[:(idata)*self.desampling+1] += dataNd[idata]*dz
        #Last data point
        modelNd[:] += dataNd[-1]*dz
        return

In [None]:
dz = 2.
zmax = 1000.0
z = np.linspace(0.0,zmax,int(zmax/dz)+1)
vel = 3000.0 + np.sqrt(1000.0*z)
slowness = 1.0/vel
model_true = Vec.vectorIC(slowness)
#Desampling of receivers
desampling = 10 #20 m sampling
ndata = int(zmax/(dz*desampling))+1
data_true = Vec.vectorIC((ndata,))
VSP_10 = VSP_op(model_true,data_true,dz,desampling)

In [None]:
#Generating observed data
VSP_10.forward(False,model_true,data_true)

In [None]:
VSP_10.dotTest(True)

In [None]:
fig,ax=plt.subplots()
plt.plot(z[::desampling],data_true.getNdArray(),'*')
plt.xlabel("depth [m]")
plt.ylabel("time [s]")
_=plt.title("Observed traveltime")

In [None]:
#Create stopping criteria and related object
niter = 2000
Stop  = Stopper(niter=niter)
#Create LCG solver
LCGsolver = LCG(Stop)
LCGsolver.setDefaults(save_obj=True) #Saving objective function within the solver

In [None]:
#Inital slowness model
model = model_true.clone()
model.zero() #m = 0
VSP_prob = Prblm.ProblemL2Linear(model,data_true,VSP_10)

In [None]:
LCGsolver.run(VSP_prob,verbose=True)

In [None]:
fig,ax=plt.subplots()
plt.plot(z,1.e-3/VSP_prob.model.getNdArray(),label='inverted model',linewidth=3)
plt.plot(z,1.e-3*vel,label='true model',linewidth=2,linestyle='-.')
plt.xlabel("depth [m]")
plt.ylabel("vel [km/s]")
ax.legend()
plt.ylim(3,4)
plt.xlim(0,1000)
_=plt.title("$L_2$ inversion")

By looking at the objective function value, we clearly see that we can perfectly match the observed traveltime values. In fact, the problem is underdetermined (i.e., we have more model parameters than observations). Because of this fact, we can only retrieve the average velocity between observation depths. Moreover, we will encounter issues when random noise is present in the recorded data.

In [None]:
#Let's add some noise to the recorded data
noisy_data = data_true.clone()
noisy_data_arr = noisy_data.getNdArray()
noisy_data_arr += np.random.normal(scale=np.mean(noisy_data_arr)*0.005, size=noisy_data_arr.shape)

In [None]:
fig,ax=plt.subplots()
plt.plot(z[::desampling],noisy_data_arr,'*')
plt.xlabel("depth [m]")
plt.ylabel("time [s]")
_=plt.title("Observed noisy traveltime")

In [None]:
VSP_prob_noise = Prblm.ProblemL2Linear(model,noisy_data,VSP_10)
LCGsolver.run(VSP_prob_noise,verbose=True)

In [None]:
fig,ax=plt.subplots()
plt.plot(z,1.e-3*vel,label='true model')
plt.plot(z,1.e-3/VSP_prob_noise.model.getNdArray(),label='inverted model')
plt.xlabel("depth [m]")
plt.ylabel("vel [km/s]")
ax.legend()
plt.ylim(3,4)
plt.xlim(0,1000)
_=plt.title("$L_2$ inversion (noisy data)")

A simple method to avoid these issues is to add prior knowledge to the inversion. In fact, we can change to problem to be solved and add a regularization term. We will test two kind of regularization: one in which a smooth constraint is added and another one where a prior model is assumed and employed in the regularization term.

In the first case, the objective function to be minimized is the following:
\begin{align}
\phi_{smooth}(\mathbf{m})=\frac{1}{2}\|G \mathbf{m} - \mathbf{d}_{obs} \|_2^2 + \frac{\epsilon^2}{2} \|D_1 \mathbf{m}\|_2^2,
\end{align}
where $\mathbf{m}$ represents the slowness model, $\mathbf{d}_{obs}$ is the observed traveltimes, while $G$ and $D_1$ are the modeling and a first-order derivative operators, respectively. The variable $\epsilon$ is the trade-off weight and is going to be set on the basis of trial and error.

In [None]:
#Defining first-order derivative operator
class FirstDerivative(Op.Operator):
    def __init__(self, model, sampling=1.):
        """
        Compute 2nd-order first derivative
        :param model    : vector class; domain vector
        :param sampling : scalar; sampling step [1.]
        """
        self.sampling = sampling
        super(FirstDerivative, self).__init__(model, model)

    def forward(self, add, model, data):
        """Forward operator"""
        self.checkDomainRange(model, data)
        if not add:
            data.zero()
        # Getting Ndarrays
        x = model.getNdArray()
        y = data.getNdArray()
        #Computing the derivative
        y[:-1] += (x[1:] - x[:-1]) / self.sampling
        y[-1] += (x[-1] - x[-2]) / self.sampling #Boundary condition
        return

    def adjoint(self, add, model, data):
        """Adjoint operator"""
        self.checkDomainRange(model, data)
        if not add:
            model.zero()
        # Getting Ndarrays
        x = model.getNdArray()
        y = data.getNdArray()
        x[0] -= y[0] / self.sampling #Boundary condition
        x[1:-1] += (y[:-2] - y[1:-1]) / self.sampling
        x[-2] -= y[-1] / self.sampling
        x[-1] += (y[-2] + y[-1]) / self.sampling
        return

In [None]:
#Instantiating regularization operator
D1 = FirstDerivative(model_true,dz)
#Instantiating regularized problem with noise-free data
VSP_prob_regD1 = Prblm.ProblemL2LinearReg(model,data_true,VSP_10,epsilon=1.0,reg_op=D1)
#Estimating a value for the regularization weight epsilon
epsilon_balance = VSP_prob_regD1.estimate_epsilon(True)
VSP_prob_regD1.epsilon = epsilon_balance * 1e-4

In [None]:
LCGsolver.run(VSP_prob_regD1,verbose=True)

In [None]:
fig,ax=plt.subplots()
plt.plot(z,1.e-3*vel,label='true model')
plt.plot(z,1.e-3/VSP_prob_regD1.model.getNdArray(),label='inverted model')
plt.xlabel("depth [m]")
plt.ylabel("vel [km/s]")
ax.legend()
plt.ylim(3,4)
plt.xlim(0,1000)
_=plt.title("Regularized inversion (noise-free)")

In [None]:
#Instantiating regularized problem with noisy data
VSP_prob_regD1 = Prblm.ProblemL2LinearReg(model,noisy_data,VSP_10,epsilon=1.0,reg_op=D1)
#Estimating a value for the regularization weight epsilon
epsilon_balance = VSP_prob_regD1.estimate_epsilon(True)
VSP_prob_regD1.epsilon = epsilon_balance * 5e-2

In [None]:
LCGsolver.run(VSP_prob_regD1,verbose=True)

In [None]:
fig,ax=plt.subplots()
plt.plot(z,1.e-3*vel,label='true model')
plt.plot(z,1.e-3/VSP_prob_regD1.model.getNdArray(),label='inverted model')
plt.xlabel("depth [m]")
plt.ylabel("vel [km/s]")
ax.legend()
plt.ylim(3,4)
plt.xlim(0,1000)
_=plt.title("Regularized inversion (noisy data)")

In the second case (i.e., when a prior model is employed), the objective function to be minimized is written as follows:
\begin{align}
\phi_{smooth}(\mathbf{m})=\frac{1}{2}\|G \mathbf{m} - \mathbf{d}_{obs} \|_2^2 + \frac{\epsilon^2}{2} \|\mathbf{m} - \mathbf{m}_{prior}\|_2^2,
\end{align}
where $\mathbf{m}_{prior}$ is the prior model vector expressed as the following velocity profile:
\begin{align}
v_{prior}(z)  = 3100.+ 1.0 z
\end{align}

In [None]:
vel_prior = 3100.0 + z
model_prior = Vec.vectorIC(1.0/vel_prior)

In [None]:
#Instantiating regularized problem with prior model and noisy data
VSP_prob_prior = Prblm.ProblemL2LinearReg(model,noisy_data,VSP_10,epsilon=1.0,prior_model=model_prior)
#Estimating a value for the regularization weight epsilon
epsilon_balance = VSP_prob_prior.estimate_epsilon(True)
VSP_prob_prior.epsilon = epsilon_balance * 2.5e-1

In [None]:
LCGsolver.run(VSP_prob_prior,verbose=True)

In [None]:
fig,ax=plt.subplots()
plt.plot(z,1.e-3*vel,label='true model')
plt.plot(z,1.e-3*vel_prior,label='prior model')
plt.plot(z,1.e-3/VSP_prob_prior.model.getNdArray(),label='inverted model')
plt.xlabel("depth [m]")
plt.ylabel("vel [km/s]")
ax.legend()
plt.ylim(3,4)
plt.xlim(0,1000)
_=plt.title("Regularized inversion with prior model (noisy data)")