In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 

import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torch import nn
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning import seed_everything
from pathlib import Path
from sklearn.model_selection import train_test_split
torch.set_float32_matmul_precision('high') # or'high'. This is to properly utilize Tensor Cores of my CUDA device ('NVIDIA RTX A6000')
# import multiprocessing
# To set start method safely (only once, at top of script/notebook)
# multiprocessing.set_start_method('spawn', force=True)

seed_everything(42, workers=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

Seed set to 42


cuda


## 1. Configurations

In [2]:
data_dir = Path("C:/Users/idris_oduola/Documents/Projects/FWI/dataset/train_samples/FlatFault_B")
for file in data_dir.iterdir():
    print(file)

C:\Users\idris_oduola\Documents\Projects\FWI\dataset\train_samples\FlatFault_B\seis6_1_0.npy
C:\Users\idris_oduola\Documents\Projects\FWI\dataset\train_samples\FlatFault_B\seis8_1_0.npy
C:\Users\idris_oduola\Documents\Projects\FWI\dataset\train_samples\FlatFault_B\vel6_1_0.npy
C:\Users\idris_oduola\Documents\Projects\FWI\dataset\train_samples\FlatFault_B\vel8_1_0.npy


In [3]:
folder = Path("C:/Users/idris_oduola/Documents/Projects/FWI/dataset/test")
file_count = sum(1 for f in folder.iterdir() if f.is_file())
print(f"Total files: {file_count}")

Total files: 13


In [10]:
class config:
    def __init__(self):
        #The File paths
        self.train_path = 'C:/Users/idris_oduola/Documents/Projects/FWI/dataset/train_samples'
        self.test_path = 'C:/Users/idris_oduola/Documents/Projects/FWI/dataset/test'
        self.test_num = sum(1 for f in folder.iterdir() if f.is_file())
        print(f'The number of files in test data is: {self.test_num}') 
        #self.submission_path = 'C:/Users/idris_oduola/Documents/Projects/FWI/sample_submission.csv'
        self.model_path = 'C:/Users/idris_oduola/Documents/Projects/FWI/dataset/fwi_model.pt'
        self.checkpoint_dir = 'C:/Users/idris_oduola/Documents/Projects/FWI/dataset/checkpoint_fwi'
        self.dset1 = ["FlatVel_A","FlatVel_B", "Style_A", "Style_B","CurveVel_A","CurveVel_B"] #Dataset storage names used for training
        self.dset2 = ["FlatFault_A", "FlatFault_B", "CurveFault_A"]

        #Model Parameters
        self.init_channels = 5
        self.final_channel = 1
        self.depth = 4
        self.base_channel = 64
        
        
        #Optimizer
        self.lr = 0.0005
        self.weight_decay = 1e-4 #Regularization weight
        
        #The training parameters
        self.num_epoch = 30
        self.batch_size = 50

        #Learning rate scheduler
        self.step_size = 15  #To decay after every, say 10 epochs
        self.gamma = 0.1      #To reduce the learning rate by gamma (say, 1/2)

        

cfg = config()

The number of files in test data is: 13


### 1.1 Preparing the Data

In [5]:
#Pytorch Dataset for DataLoader, we set velocity initially to None in the case of loading the test set
#Note: We can also use the TensorDataset from torch.utils.data
class SeismicDataset(Dataset):
    def __init__(self, seismic, vel = None):
        self.seismic = torch.tensor(seismic, dtype = torch.float32)
        self.label = vel is not None
        if self.label:
            self.vel = torch.tensor(vel, dtype = torch.float32)

    def __len__(self):
        return len(self.seismic)

    def __getitem__(self,idx):
        if self.label:
            return self.seismic[idx], self.vel[idx]
        else:
            return self.seismic[idx]

In [6]:

def prepare_data(cfg):
    #First we extract the velocity and seismic data's for training and testing
    vel_data = []; seismic_data = []; test_data = []

    #Extracting and concatenating the training data
    for domain in cfg.dset1: 
        model_path = Path(cfg.train_path) / domain / "model"
        data_path = Path(cfg.train_path) / domain / "data"
    
        # Load all .npy files in this domain and extend the master lists
        vel_data += [np.load(str(f)) for f in sorted(model_path.glob("*.npy"))]
        seismic_data += [np.load(str(f)) for f in sorted(data_path.glob("*.npy"))]
    
    for domain in cfg.dset2: 
        model_path = Path(cfg.train_path) / domain 
        data_path = Path(cfg.train_path) / domain
    
        # Load all .npy files in this domain and extend the master lists
        vel_data += [np.load(str(f)) for f in sorted(model_path.glob("vel*.npy"))]
        seismic_data += [np.load(str(f)) for f in sorted(data_path.glob("seis*.npy"))]

    
    # Concatenate all at once
    sample_points = sum(v.shape[0] for v in vel_data) #To ensure we have the required number of data
    vel_data = np.concatenate(vel_data, axis=0)
    vel_data = (vel_data - vel_data.mean())/(vel_data.std() + 1e-8)
    seismic_data = np.concatenate(seismic_data, axis=0)
    assert ( vel_data.shape[0] == sample_points and seismic_data.shape[0] == sample_points
           ), f"Expected sample size {sample_points} but got {vel_data.shape[0]} and {seismic_data.shape[0]}"
    print(f"Training data --> Seismic: {seismic_data.shape}, Velocity: {vel_data.shape}")

    #We need to normalize (Z-score) our input before training
    #s_mean = seismic_data.mean(axis=(0, 2, 3), keepdims=True); s_std = seismic_data.std(axis=(0, 2, 3), keepdims=True)
    #seismic_data = (seismic_data - s_mean)/(s_std + 1e-6) #Epsilon is for stability

    #Extracting the Test data
    test_path = Path(cfg.test_path)
    test_data += [np.load(str(f)) for f in sorted(test_path.glob("*.npy"))[0:cfg.test_num]] #Only first few for illustration
    test_sample_points = sum(v.shape[0] for v in test_data)
    test_data = np.concatenate(test_data,axis=0)
    test_data = (test_data - test_data.mean())/(test_data.std() + 1e-8)
    test_data = np.expand_dims(test_data, axis=1)
    test_data = np.repeat(test_data, 5, axis=1)
    assert test_data.shape[0] == test_sample_points, f"Expected test size {test_sample_points} but got {test_data.shape[0]} "
    print(f"Testing data --> Seismic: {test_data.shape}")

    #Next, we take a portion of the train data for validation
    X_train, X_val, y_train,y_val = train_test_split(
        seismic_data, vel_data, test_size = 0.1, random_state = 42, shuffle = True
    )
    print(f"After split --> X_train: {X_train.shape}, y_train: {y_train.shape} -- X_val: {X_val.shape}, y_val: {y_val.shape}")
    
    #Loading the datasets into batches
    train_dataset = SeismicDataset(X_train, y_train)
    val_dataset = SeismicDataset(X_val, y_val)
    test_dataset = SeismicDataset(test_data)

    #DataLoader
    train_loader = DataLoader(train_dataset, batch_size = cfg.batch_size, shuffle = True)
    val_loader = DataLoader(val_dataset, batch_size = cfg.batch_size, shuffle = False)
    test_loader = DataLoader(test_dataset, batch_size = cfg.batch_size, shuffle = False)

    return train_loader, val_loader, test_loader 

In [7]:
class DataModule(pl.LightningDataModule):
    def __init__(self,cfg):
        super().__init__()
        self.cfg = cfg
        self.train_loader = None
        self.val_loader = None
        self.test_loader = None

    def setup(self, stage = None):
        self.train_loader, self.val_loader, self.test_loader = prepare_data(self.cfg)
        print('DataLoaded Successfully!')

    def train_dataloader(self):
        return self.train_loader

    def val_dataloader(self):
        return self.val_loader

    def test_dataloader(self):
        return self.test_loader
    
    def predict_dataloader(self):
        return self.test_loader

data_module = DataModule(cfg)

In [8]:
data_module.setup()

Training data --> Seismic: (9000, 5, 1000, 70), Velocity: (9000, 1, 70, 70)
Testing data --> Seismic: (65, 5, 1000, 70)
After split --> X_train: (8100, 5, 1000, 70), y_train: (8100, 1, 70, 70) -- X_val: (900, 5, 1000, 70), y_val: (900, 1, 70, 70)
DataLoaded Successfully!


In [9]:
batch = next(iter(data_module.predict_dataloader()))
print("Test batch:", type(batch), batch.shape)

Test batch: <class 'torch.Tensor'> torch.Size([50, 5, 1000, 70])


## Inversion Net Model

As a summary, 
InversionNet is a deep learning model designed to learn a mapping from seismic reflection data to subsurface velocity models using an encoder-decoder convolutional neural network (CNN) coupled with a locally connected Conditional Random Field (CRF) for structure refinement. It has the same structure as the UNet but without the skip connections. Thus, allowing for different input and output feature space. (Unlike the UNet which requires that the input and output feature space are the same).

The Network has the following architecture:
1) The Input is a 2D seismic reflection data (competition dataset: 70 x 1000) with different sources. The output is a 2D subsurface velocity model (e.g., competition dataset shape =70x70)
2) **Encoder**: Extract high-level features from the seismic data.
    * Structure: 
        * Multiple convolution blocks (Conv + BatchNorm + ReLU).
        * Early layers are 1D convolutions to capture temporal features.
        * Later layers compress to a single high-dimensional vector.
    *   No zero-padding in the last layer to reduce to a single feature vector.

3) **Decoder**: Maps the encoded features to a velocity model.
    * Structure: 
        * Mixed deconvolution and convolution blocks.
        * Each deconv block is structured as:
            * 4×4 transposed convolution with stride 2 for upsampling.
            * 3×3 convolution for refinement.
        * Final 1×1 convolution layer to map features to scalar velocity per pixel.
        * Output is cropped to match the ground truth dimensions.
    * The Loss function is the L1 Loss

4) **Conditional Random Field (CRF)**: It refines the CNN's output by enforcing spatial consistency and capturing local interactions.
    * CRF formulation:
        * Unary potential: penalizes deviations from CNN predictions.
        * Pairwise potential: enforces similarity between neighboring pixels.
        * Neighborhood: fixed d×d window (e.g., d=20).
        * Optimization via mean-field approximation.
        * Only active during post-processing, not during CNN training.

5) **The Training Setup**:
        * Optimizer: Adam
        * Batch size: 50
        * Learning rate: starts at 0.0005, decays ×0.1 every 15 epochs
        * Training: CNN for 30 epochs, then CRF refinement