In [1]:
import json
import os
import ml_collections
import torch
import matplotlib.pyplot as plt
import tqdm

from lightning.pytorch import seed_everything
from seisLM.model.foundation.pretrained_models import LitMultiDimWav2Vec2
from seisLM.data_pipeline import collator
from seisLM.data_pipeline import seisbench_dataloaders
from seisLM.model.foundation import pretrained_models

from seisLM.utils.project_path import gitdir

DEFAULT_NUM_WORKERS = 4

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
pretrained_ckpt_path = gitdir() + '/results/models/pretrained_seisLM/42__2024-07-14-20h-19m-28s/checkpoints/epoch=37-step=998944.ckpt'
checkpoint = torch.load(pretrained_ckpt_path)

In [7]:
# checkpoint.keys()

dict_keys(['epoch', 'global_step', 'pytorch-lightning_version', 'state_dict', 'loops', 'callbacks', 'optimizer_states', 'lr_schedulers', 'MixedPrecision', 'hparams_name', 'hyper_parameters'])

In [2]:

# pretrained_ckpt_path = gitdir() + '/results/models/pretrained_seisLM/42__2024-08-01-18h-03m-01s/checkpoints/epoch=12-step=99515.ckpt'
# pretrained_ckpt_path = gitdir() + '/results/models/pretrained_seisLM/42__2024-08-01-18h-03m-01s/checkpoints/epoch=23-step=183720.ckpt'

pretrained_ckpt_path = gitdir() + '/results/models/pretrained_seisLM/42__2024-07-14-20h-19m-28s/checkpoints/epoch=37-step=998944.ckpt'
pretrained_model_lit = pretrained_models.LitMultiDimWav2Vec2.load_from_checkpoint(
    pretrained_ckpt_path
)


config = pretrained_model_lit.config
config.data_config.local_batch_size = 8
model = pretrained_model_lit


data_collator = \
  collator.DataCollatorForWav2Vec2PretrainingConcatChannelsNoPadding(
      config=config.model_config,
      mask_time_prob=config.training_config.mask_time_prob,
      mask_time_length=config.training_config.mask_time_length,
  )

TypeError: __init__() missing 1 required positional argument: 'config'

## Evaluate on the foreshock--aftershock dataset

In [10]:
from seisLM.data_pipeline.foreshock_aftershock_dataloaders import (
  prepare_foreshock_aftershock_dataloaders
)


shock_loaders = prepare_foreshock_aftershock_dataloaders(
  num_classes=4,
  batch_size=8,
  component_order='ZNE',
  event_split_method='temporal',
  demean=True,
  amp_norm=True,
  amp_norm_type='peak',
  collator=data_collator,
)


data_name = 'ETHZ'
_, pretrain_dev_loader = seisbench_dataloaders.prepare_seisbench_dataloaders(
  model=model,
  training_fraction=config.data_config.training_fraction,
  data_names=[data_name],
  batch_size=config.data_config.local_batch_size,
  num_workers=config.data_config.num_workers,
  prefetch_factor=config.data_config.prefetch_factor,
  collator=data_collator,
  cache=config.data_config.cache_dataset,
)



def evaluate_self_supervised_loss(model, loader):
  all_losses = []
  for batch in tqdm.tqdm(loader):
    for k, v in batch.items():
      batch[k] = v.to(model.device)

    mask_time_indices = batch["mask_time_indices"]
    num_losses = mask_time_indices.sum()

    with torch.no_grad():
      out = model.model(**batch)
    all_losses.append(out.loss / num_losses)

  return sum(all_losses) / len(all_losses)

Seed set to 42
Seed set to 42


In [12]:
evaluate_self_supervised_loss(model, shock_loaders['test'])

100%|██████████| 181/181 [00:08<00:00, 21.35it/s]


tensor(4.0320, device='cuda:0')

In [5]:
evaluate_self_supervised_loss(model, shock_loaders['test'])

100%|██████████| 181/181 [00:08<00:00, 21.72it/s]


tensor(4.0841, device='cuda:0')

In [13]:
evaluate_self_supervised_loss(model, pretrain_dev_loader)

100%|██████████| 454/454 [00:25<00:00, 17.49it/s]


tensor(3.8012, device='cuda:0')

In [5]:
evaluate_self_supervised_loss(model, pretrain_dev_loader)

100%|██████████| 454/454 [00:25<00:00, 17.72it/s]


tensor(3.8440, device='cuda:0')

# Evaluate on the pretrained dataset

In [6]:
data_name = 'ETHZ'

seed_everything(config.seed)




all_losses = []
for batch in tqdm.tqdm(train_loader):
  for k, v in batch.items():
    batch[k] = v.to(model.device)

  mask_time_indices = batch["mask_time_indices"]
  num_losses = mask_time_indices.sum()

  with torch.no_grad():
    out = model.model(**batch)
  all_losses.append(out.loss / num_losses)

Seed set to 42


NameError: name 'train_loader' is not defined

In [None]:
sum(all_losses) / len(all_losses)

tensor(3.7653, device='cuda:0')

tensor(4.1240, device='cuda:0')