In [1]:
from pprint import pprint
import os
import json
import itertools
import numpy as np
from contextlib import nullcontext
import torch
from torch.cuda.amp import autocast
from momentfm.data.informer_dataset import InformerDataset
from torch.utils.data import Dataset, DataLoader
import momentfm
from momentfm import MOMENTPipeline
from momentfm.utils.masking import Masking
from tqdm.auto import tqdm
import torch.distributed as dist
from torch.utils.data.distributed import DistributedSampler
from momentfm.utils.utils import control_randomness
from momentfm.utils.forecasting_metrics import mse, mae
import matplotlib.pyplot as plt

model = MOMENTPipeline.from_pretrained(
    "AutonLab/MOMENT-1-large",
    model_kwargs={'task_name': 'reconstruction',  # For imputation, we will load MOMENT in `reconstruction` mode
                  'freeze_encoder': False,  # Freeze the patch embedding layer
                  'freeze_embedder': False,  # Freeze the transformer encoder
                  'freeze_head': False,  # The linear forecasting head must be trained
                  }
)


  torch.utils._pytree._register_pytree_node(


In [28]:
from torchsummary import summary
summary(model, input_data=inp)

Layer (type:depth-idx)                   Output Shape              Param #
├─RevIN: 1-1                             [-1, 5, 512]              --
├─Patching: 1-2                          [-1, 5, 64, 8]            --
├─PatchEmbedding: 1-3                    [-1, 5, 64, 1024]         --
|    └─Linear: 2-1                       [-1, 5, 64, 1024]         8,192
|    └─PositionalEmbedding: 2-2          [-1, 64, 1024]            --
|    └─Dropout: 2-3                      [-1, 5, 64, 1024]         --
├─T5Stack: 1-4                           [[-1, 64, 1024]]          --
|    └─Dropout: 2-4                      [-1, 64, 1024]            --
|    └─ModuleList: 2                     []                        --
|    |    └─T5Block: 3-1                 [-1, 64, 1024]            12,847,616
|    |    └─T5Block: 3-2                 [-1, 64, 1024]            12,847,104
|    |    └─T5Block: 3-3                 [-1, 64, 1024]            12,847,104
|    |    └─T5Block: 3-4                 [-1, 64, 1024]   

Layer (type:depth-idx)                   Output Shape              Param #
├─RevIN: 1-1                             [-1, 5, 512]              --
├─Patching: 1-2                          [-1, 5, 64, 8]            --
├─PatchEmbedding: 1-3                    [-1, 5, 64, 1024]         --
|    └─Linear: 2-1                       [-1, 5, 64, 1024]         8,192
|    └─PositionalEmbedding: 2-2          [-1, 64, 1024]            --
|    └─Dropout: 2-3                      [-1, 5, 64, 1024]         --
├─T5Stack: 1-4                           [[-1, 64, 1024]]          --
|    └─Dropout: 2-4                      [-1, 64, 1024]            --
|    └─ModuleList: 2                     []                        --
|    |    └─T5Block: 3-1                 [-1, 64, 1024]            12,847,616
|    |    └─T5Block: 3-2                 [-1, 64, 1024]            12,847,104
|    |    └─T5Block: 3-3                 [-1, 64, 1024]            12,847,104
|    |    └─T5Block: 3-4                 [-1, 64, 1024]   

In [27]:
summary?

[0;31mSignature:[0m
[0msummary[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mmodel[0m[0;34m:[0m [0mtorch[0m[0;34m.[0m[0mnn[0m[0;34m.[0m[0mmodules[0m[0;34m.[0m[0mmodule[0m[0;34m.[0m[0mModule[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0minput_data[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mtorch[0m[0;34m.[0m[0mTensor[0m[0;34m,[0m [0mtorch[0m[0;34m.[0m[0mSize[0m[0;34m,[0m [0mSequence[0m[0;34m[[0m[0mtorch[0m[0;34m.[0m[0mTensor[0m[0;34m][0m[0;34m,[0m [0mSequence[0m[0;34m[[0m[0mUnion[0m[0;34m[[0m[0mint[0m[0;34m,[0m [0mSequence[0m[0;34m[[0m[0mAny[0m[0;34m][0m[0;34m,[0m [0mtorch[0m[0;34m.[0m[0mSize[0m[0;34m][0m[0;34m][0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0margs[0m[0;34m:[0m [0mAny[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbatch_dim[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mint[0m[0;34m][0m [0;34m=[0m [

In [3]:
import sys, os
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [3]:
# Hyperparameters
seed=13
epochs = 3
lr = 1e-4
batch_size = 192
grad_accum_step = 1
use_amp = True
use_tensorcore = True
autotune = True
use_fused = True # False for quick start/debug mode
zero_stage = 2
data_stride_len = 512
mask_ratio = 0.3
dtype = torch.float32
amp_dtype = torch.bfloat16  # use float16 for V100 and bfloat16 for A100
# amp_dtype = torch.float16  # use float16 for V100 and bfloat16 for A100

In [4]:
ds_zero_config = {
  "train_batch_size": batch_size,
  "gradient_accumulation_steps": grad_accum_step,
  "optimizer": {
    "type": "Adam",
    "params": {
      "lr": lr
    }
  },
  "fp16": {
    "enabled": amp_dtype == torch.float16 and use_amp
  },
  "bf16": {
   "enabled": amp_dtype == torch.bfloat16 and use_amp
  },
  "zero_optimization": {
    "stage": zero_stage
  },
}

In [11]:
model = MOMENTPipeline.from_pretrained(
    "AutonLab/MOMENT-1-large",
    model_kwargs={'task_name': 'reconstruction', # For imputation, we will load MOMENT in `reconstruction` mode
                   'freeze_encoder': False, # Freeze the patch embedding layer
                   'freeze_embedder': False, # Freeze the transformer encoder
                   'freeze_head': False, # The linear forecasting head must be trained
                 }
)

mask_generator = Masking(mask_ratio=mask_ratio) # Mask 30% of patches randomly 
num_params = sum(p.numel() for p in model.parameters())
world_size = torch.cuda.device_count()
model = model.to(rank, dtype)

In [13]:
# Optimize Mean Squarred Error using your favourite optimizer
criterion = torch.nn.MSELoss() 
if use_fused:
    print("Torch compile needs some times...")
    model = torch.compile(model)
    optimizer = FusedAdam(model.parameters(), lr=lr)
else:
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Torch compile needs some times...


Using /home/ecc_17/makamx0a/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...
Detected CUDA files, patching ldflags
Emitting ninja build file /home/ecc_17/makamx0a/.cache/torch_extensions/py311_cu121/fused_adam/build.ninja...
If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].
Building extension module fused_adam...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)


ninja: no work to do.
Time to load fused_adam op: 0.0867621898651123 seconds


Loading extension module fused_adam...


In [15]:
# use tensor core
if use_tensorcore:
    torch.set_float32_matmul_precision('high')

In [6]:
# # Dataset
class WellLogDataset(Dataset):
    def __init__(self, 
                 root_dir, 
                 task_name: str = "imputation", 
                 data_split: str = "train", 
                 few_shot: int = 5, 
                 forecast_horizon: int = 192
                ):
        self.seq_len = 512
        self.root_dir = root_dir
        self.task_name = task_name
        self.data_split = data_split
        self.few_shot = few_shot
        self.forecast_horizon = forecast_horizon
        with open(root_dir + 'dict_tokens.json', 'r') as file:
            self.mapping = json.load(file)
        self._read_data()

    def __len__(self):
        return len(self.files)

    def _get_borders(self):
        train_mapping = dict(itertools.islice(self.mapping.items(), self.few_shot))
        test_mapping = dict(itertools.islice(self.mapping.items(), self.few_shot, len(self.mapping)))
        return train_mapping, test_mapping

    def _read_data(self):
        train_mapping, test_mapping = self._get_borders()

        if self.data_split == "train":
               self.files = [f for f in train_mapping.values()]
        elif self.data_split == "test":
               self.files = [f for f in test_mapping.values()]
        self.length_timeseries = len(self.files)
        
    def __getitem__(self, idx):
        file_name = self.files[idx]
        input_mask = np.ones(self.seq_len)
        data_dict = torch.load(file_name)
        if self.task_name == 'imputation':
            return data_dict['input'].T, input_mask
        elif self.task_name == 'forecast':
            return  data_dict['input'].T, data_dict['label'].T[:, :self.forecast_horizon], input_mask
        else:
            pass

In [7]:
from config import Config
global_configs = Config('../.config/settings.yaml')
ARAMCO_LOGS = os.path.join(global_configs.data, 'alphas', 'tokenized/logs_tokenized/data_processed_512_standard_Aramco/')
root_dir = ARAMCO_LOGS

In [21]:
world_size = torch.cuda.device_count()
train_dataset = WellLogDataset(root_dir, task_name='forecast', data_split="train",  few_shot=14400)
#train_sampler = DistributedSampler(train_dataset, num_replicas=world_size, shuffle=False)
train_loader = DataLoader(train_dataset, batch_size=batch_size)

In [23]:
i = iter(train_loader)
inp, labels, mask = next(i)

In [26]:
inp.shape, labels.shape, mask.shape

(torch.Size([192, 5, 512]), torch.Size([192, 5, 192]), torch.Size([192, 512]))