# Configuration Generator

In [1]:
import json
import os
import numpy as np
import torch 

configs_dir = "configs"
os.makedirs(configs_dir, exist_ok=True)

def save_config(config, filename):
    with open(os.path.join(configs_dir, filename), 'w') as f:
        json.dump(config, f, indent=4)
    print(f"Configuration saved to {filename}")

## Model Configuration

In [2]:
model_config = {
    'in_channels': 1,
    'out_channels': 6
}

model_config

{'in_channels': 1, 'out_channels': 6}

## Dataset Configuration

In [3]:
from dataset import NpyDataset, SegyDataset
from transformation import sliceT, traceT

In [4]:
""" NpyDataSet:
Args:
    paths (List[Dict[str, Any]]): A list of dictionaries containing the paths to the seismic data and labels.
        Each dictionary should have the following structure:
        {
        'data': str,  # path to data file
        'label': str,  # path to label file (optional) - if not provided, labels will be zeros (can be ignored/used in testing)
        'order': Tuple[str, str, str],  # e.g., ('x', 'y', 'z') 
        'range': [Dict[str, Tuple[float, float]]]  # e.g., {'x': (0, 1), 'y': (0, 1), 'z': (0, 1)} (optional: any missing dimension will default to full range)
        }
    dt_transformations (Optional[Union[List[Callable], Callable]]): Transformations to apply to the data.
    lb_transformations (Optional[Union[List[Callable], Callable]]): Transformations to apply to the labels.
    dtype (np.dtype): The datatype to use for the data.
    ltype (np.dtype): The datatype to use for the labels.
    norm (int): Whether to normalize the data when loading. (0 = no normalization, 1 = normalize before applying transformations, 2 = normalize after applying transformations)
    stride (int): The stride to use when creating slices or windows (default: 1). a.k.a. steps or jumps when indexing the data.
    mode (str): The mode to use for the dataset. Options: 'windowed', 'slice', 'traces'.
    line_mode (str): The line mode to use for slice and windowed modes. Options: 'both', 'iline', 'xline'.
    window_w (int): The width of the windowed slice (only used in 'windowed' mode).
    window_h (int): The height of the windowed slice (only used in 'windowed' mode).
    stride_w (int): The stride to use when creating windowed slices on width (only used in 'windowed' mode).
    stride_h (int): The stride to use when creating windowed slices on height (only used in 'windowed' mode).
"""
train_src = []

file1 = {
    'data' : '/Users/alkhalmr/aramco/alphasei/src/dataset/data/SEAM/seismic_train.npy',
    'label' : '/Users/alkhalmr/aramco/alphasei/src/dataset/data/SEAM/labels.npy',
    'order' : ('z', 'x', 'y'),
    'range' : {'x': (0, 0.3), 'y': (0, 0.5), 'z': (0, 0.3)} # 70% of the data
}

train_src.append(file1)

dt = [] 
lb = [] 

data_type = np.float32
label_type = np.float32

norm = 0 

stride = 5

mode = 'slice'

line_mode = 'iline' # not needed in traces mode 

# only needed in windowed mode
window_w = 128
window_h = 128
stride_w = 64
stride_h = 64


train_dataset= NpyDataset.NpyDataset(train_src, dt, lb, data_type, label_type, norm, stride, mode, line_mode, window_w, window_h, stride_w, stride_h)

train_dataconfig = train_dataset.get_config()



In [5]:
val_src = []

file1 = {
    'data' : '/Users/alkhalmr/aramco/alphasei/src/dataset/data/SEAM/seismic_train.npy',
    'label' : '/Users/alkhalmr/aramco/alphasei/src/dataset/data/SEAM/labels.npy',
    'order' : ('z', 'x', 'y'),
    'range' : {'x': (0.3, 0.5), 'y': (0, 0.5), 'z': (0, 0.3)} # 30% of the data
}

val_src.append(file1)

val_dataset = NpyDataset.NpyDataset(val_src, dt, lb, data_type, label_type, norm, stride, mode, line_mode, window_w, window_h, stride_w, stride_h)

val_dataconfig = val_dataset.get_config()



In [6]:
dataset_config = {
    'train': train_dataconfig,
    'val': val_dataconfig,
    'batch_size': 16,
    'num_workers': 0,
    # 'collate' : ''  # --  In NpyDataset: either 'window' or 'padded'
    # 'collate_args' : {}
}

dataset_config

{'train': {'paths': [{'data': '/Users/alkhalmr/aramco/alphasei/src/dataset/data/SEAM/seismic_train.npy',
    'label': '/Users/alkhalmr/aramco/alphasei/src/dataset/data/SEAM/labels.npy',
    'order': ('z', 'x', 'y'),
    'range': {'x': (0, 0.3), 'y': (0, 0.3), 'z': (0, 0.3)}}],
  'dt_transformations': [],
  'lb_transformations': [],
  'dtype': 'float32',
  'ltype': 'float32',
  'norm': 0,
  'mode': 'slice',
  'line_mode': 'iline',
  'window_w': 128,
  'window_h': 128,
  'stride_w': 64,
  'stride_h': 64},
 'val': {'paths': [{'data': '/Users/alkhalmr/aramco/alphasei/src/dataset/data/SEAM/seismic_train.npy',
    'label': '/Users/alkhalmr/aramco/alphasei/src/dataset/data/SEAM/labels.npy',
    'order': ('z', 'x', 'y'),
    'range': {'x': (0.3, 0.5), 'y': (0.3, 0.5), 'z': (0, 0.3)}}],
  'dt_transformations': [],
  'lb_transformations': [],
  'dtype': 'float32',
  'ltype': 'float32',
  'norm': 0,
  'mode': 'slice',
  'line_mode': 'iline',
  'window_w': 128,
  'window_h': 128,
  'stride_w': 64,

## Training Configuration

In [7]:
train_config = {
    'loss': {
        'type': 'crossentropy'  # Loss function type (e.g., l1, l2, crossentropy)
    },
    'optimizer': 'Adam',  # Optimizer (Adam/AdamW/SGD/RMSprop/Adagrad/Adadelta)
    'optimizer_params': {  # Optimizer parameters (e.g., weight_decay, momentum, etc.)
        'lr': 0.001,
        }, 
    'scheduler': 'StepLR',  # Learning rate scheduler (StepLR/MultiStepLR/ExponentialLR/CosineAnnealingLR/CyclicLR)
    'scheduler_params': {
        'step_size': 10,  # For StepLR
        'gamma': 0.1  # For StepLR and ExponentialLR
        # modify for other schedulers as needed
    },
    'metrics': 'classification',  # Metrics type (regression/classification)
    'metrics_params': {
    'data_range': 1.0,  # Data range for regression metrics
    'full': True,  # Use full/light regression metrics? (True/False)
    'num_classes': 6,  # Number of classes (for classification)
    },    
    'data_transforms': {
        'type': 'slice',  # Data transform type (slice/trace)
        'transformations': [
            # ('normalize', {}),  # Data normalization
            # ('scale', {'factor': 5.0})  # Data scaling
        ],
    },
    'label_transforms': {
        'type': 'slice',  # Label transform type (slice/trace)
        'transformations': [
          
        ],
    },
    'epochs': 2,  # Number of epochs
}

train_config

{'loss': {'type': 'crossentropy'},
 'optimizer': 'Adam',
 'optimizer_params': {'lr': 0.001},
 'scheduler': 'StepLR',
 'scheduler_params': {'step_size': 10, 'gamma': 0.1},
 'metrics': 'classification',
 'metrics_params': {'data_range': 1.0, 'full': True, 'num_classes': 6},
 'data_transforms': {'type': 'slice', 'transformations': []},
 'label_transforms': {'type': 'slice', 'transformations': []},
 'epochs': 2}

## Generate Configuration Files

In [8]:
save_config(model_config, "model_config.json")
save_config(dataset_config, "dataset_config.json")
save_config(train_config, "train_config.json")

print("All configurations have been generated and saved in the 'configs' directory.")

Configuration saved to model_config.json
Configuration saved to dataset_config.json
Configuration saved to train_config.json
All configurations have been generated and saved in the 'configs' directory.
