In [1]:
import pandas as pd
import logging
import torch
import pytorch_lightning as pl
from pathlib import Path
from torch.utils.data import DataLoader
import seisbench
import seisbench.models as sbm
from seisLM.evaluation import pick_eval

# from seisLM.utils.project_path import gitdir
from sklearn.metrics import roc_curve, precision_recall_curve
import matplotlib.pyplot as plt
from seisLM.utils import project_path

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
phasenet_ethz = {
  '0.05': "/home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/phasenet_ETHZ_train_frac_0.05_model_PhaseNet_seed_42_time_2024-08-11-16h-57m-27s/checkpoints/epoch=49-step=800.ckpt",
  '0.1': "/home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/phasenet_ETHZ_train_frac_0.1_model_PhaseNet_seed_42_time_2024-08-11-16h-59m-42s/checkpoints/epoch=49-step=2400.ckpt",
  '0.2': "/home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/phasenet_ETHZ_train_frac_0.2_model_PhaseNet_seed_42_time_2024-08-11-17h-01m-13s/checkpoints/epoch=48-step=3920.ckpt",
  '0.5': "/home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/phasenet_ETHZ_train_frac_0.5_model_PhaseNet_seed_42_time_2024-08-11-17h-03m-46s/checkpoints/epoch=37-step=6498.ckpt",
  '1.0': "/home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/phasenet_ETHZ_train_frac_1.0_model_PhaseNet_seed_42_time_2024-08-11-17h-04m-13s/checkpoints/epoch=37-step=13414.ckpt",
}


phasenet_geofon = {
  '0.05': "/home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/phasenet_GEOFON_train_frac_0.05_model_PhaseNet_seed_42_time_2024-08-11-16h-47m-30s/checkpoints/epoch=41-step=5208.ckpt",
  '0.1': "/home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/phasenet_GEOFON_train_frac_0.1_model_PhaseNet_seed_42_time_2024-08-11-16h-47m-30s/checkpoints/epoch=42-step=10965.ckpt",
  '0.2': "/home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/phasenet_GEOFON_train_frac_0.2_model_PhaseNet_seed_42_time_2024-08-11-16h-47m-30s/checkpoints/epoch=26-step=13392.ckpt",
  '0.5': "/home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/phasenet_GEOFON_train_frac_0.5_model_PhaseNet_seed_42_time_2024-08-11-16h-47m-31s/checkpoints/epoch=28-step=36801.ckpt",
  '1.0': "/home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/phasenet_GEOFON_train_frac_1.0_model_PhaseNet_seed_42_time_2024-08-11-16h-47m-31s/checkpoints/epoch=20-step=52920.ckpt",
}


seislm_ethz = {
  '0.05': "/home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/seisLM_convpos_ETHZ_train_frac_0.05_model_MultiDimWav2Vec2ForFrameClassification_seed_42_time_2024-08-12-20h-00m-41s/checkpoints/epoch=40-step=656.ckpt", 
  '0.1': "/home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/seisLM_convpos_ETHZ_train_frac_0.1_model_MultiDimWav2Vec2ForFrameClassification_seed_42_time_2024-08-12-20h-06m-11s/checkpoints/epoch=41-step=2016.ckpt",
  '0.2': "/home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/seisLM_convpos_ETHZ_train_frac_0.2_model_MultiDimWav2Vec2ForFrameClassification_seed_42_time_2024-08-12-20h-14m-40s/checkpoints/epoch=44-step=3600.ckpt",
  '0.5': "/home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/seisLM_convpos_ETHZ_train_frac_0.5_model_MultiDimWav2Vec2ForFrameClassification_seed_42_time_2024-08-12-20h-26m-11s/checkpoints/epoch=36-step=6327.ckpt",
  '1.0': "/home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/seisLM_convpos_ETHZ_train_frac_1.0_model_MultiDimWav2Vec2ForFrameClassification_seed_42_time_2024-08-12-20h-46m-05s/checkpoints/epoch=46-step=16591.ckpt"
}

seislm_geofon = {
  '0.05': "/home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/seisLM_convpos_GEOFON_train_frac_0.05_model_MultiDimWav2Vec2ForFrameClassification_seed_42_time_2024-08-12-18h-36m-00s/checkpoints/epoch=38-step=4836.ckpt",
  '0.1': "/home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/seisLM_convpos_GEOFON_train_frac_0.1_model_MultiDimWav2Vec2ForFrameClassification_seed_42_time_2024-08-12-18h-36m-01s/checkpoints/epoch=46-step=11985.ckpt",
  '0.2': "/home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/seisLM_convpos_GEOFON_train_frac_0.2_model_MultiDimWav2Vec2ForFrameClassification_seed_42_time_2024-08-12-19h-01m-20s/checkpoints/epoch=49-step=24800.ckpt",
  '0.5': "/home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/seisLM_convpos_GEOFON_train_frac_0.5_model_MultiDimWav2Vec2ForFrameClassification_seed_42_time_2024-08-12-19h-13m-26s/checkpoints/epoch=48-step=62181.ckpt",
  '1.0': "/home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/seisLM_convpos_GEOFON_train_frac_1.0_model_MultiDimWav2Vec2ForFrameClassification_seed_42_time_2024-08-12-19h-25m-23s/checkpoints/epoch=36-step=93240.ckpt",
}

all_ckpt_dicts = {
  # ('ethz', 'MultiDimWav2Vec2ForFrameClassification'): seislm_ethz,
  # ('geofon', 'MultiDimWav2Vec2ForFrameClassification'): seislm_geofon,
  ('ethz', 'PhaseNet'): phasenet_ethz,
  ('geofon', 'PhaseNet'): phasenet_geofon,
}


: 

In [3]:
# ckpt = "/scicore/home/dokman0000/liu0003/projects/seisLM/results/models/ethz_seisLM/42__2024-06-07-10h-43m-45s/checkpoints/epoch=28-step=5104.ckpt"

# sets = 'dev,test'
sets = 'test'

for (dataset, model_name), ckpt_dict in all_ckpt_dicts.items():
  for frac, ckpt in ckpt_dict.items():
    print(f"dataset: {dataset}, model: {model_name}, frac: {frac}")
    print(f"ckpt: {ckpt}")
    save_tag = ckpt.split('/')[-3]
    print(f'model tag, {save_tag}')
    pick_eval.save_pick_predictions(
        checkpoint_path_or_data_name=ckpt,
        save_tag=ckpt.split('/')[-3],
        model_name=model_name,
        targets=project_path.gitdir() + f'/data/targets/{dataset}/',
        sets=sets,
        batchsize=64
    )

dataset: ethz, model: PhaseNet, frac: 0.05
ckpt: /home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/phasenet_ETHZ_train_frac_0.05_model_PhaseNet_seed_42_time_2024-08-11-16h-57m-27s/checkpoints/epoch=49-step=800.ckpt
model tag, phasenet_ETHZ_train_frac_0.05_model_PhaseNet_seed_42_time_2024-08-11-16h-57m-27s


Preloading waveforms: 100%|██████████| 10485/10485 [00:12<00:00, 807.22it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 328/328 [00:24<00:00, 13.46it/s]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 252/252 [00:18<00:00, 13.30it/s]
dataset: ethz, model: PhaseNet, frac: 0.1
ckpt: /home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/phasenet_ETHZ_train_frac_0.1_model_PhaseNet_seed_42_time_2024-08-11-16h-59m-42s/checkpoints/epoch=49-step=2400.ckpt
model tag, phasenet_ETHZ_train_frac_0.1_model_PhaseNet_seed_42_time_2024-08-11-16h-59m-42s


Preloading waveforms: 100%|██████████| 10485/10485 [00:02<00:00, 3563.24it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 328/328 [00:24<00:00, 13.65it/s]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 252/252 [00:18<00:00, 13.33it/s]
dataset: ethz, model: PhaseNet, frac: 0.2
ckpt: /home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/phasenet_ETHZ_train_frac_0.2_model_PhaseNet_seed_42_time_2024-08-11-17h-01m-13s/checkpoints/epoch=48-step=3920.ckpt
model tag, phasenet_ETHZ_train_frac_0.2_model_PhaseNet_seed_42_time_2024-08-11-17h-01m-13s


Preloading waveforms: 100%|██████████| 10485/10485 [00:02<00:00, 3647.24it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 328/328 [00:24<00:00, 13.66it/s]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 252/252 [00:18<00:00, 13.37it/s]
dataset: ethz, model: PhaseNet, frac: 0.5
ckpt: /home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/phasenet_ETHZ_train_frac_0.5_model_PhaseNet_seed_42_time_2024-08-11-17h-03m-46s/checkpoints/epoch=37-step=6498.ckpt
model tag, phasenet_ETHZ_train_frac_0.5_model_PhaseNet_seed_42_time_2024-08-11-17h-03m-46s


Preloading waveforms: 100%|██████████| 10485/10485 [00:02<00:00, 3637.54it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 328/328 [00:24<00:00, 13.47it/s]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 252/252 [00:18<00:00, 13.28it/s]
dataset: ethz, model: PhaseNet, frac: 1.0
ckpt: /home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/phasenet_ETHZ_train_frac_1.0_model_PhaseNet_seed_42_time_2024-08-11-17h-04m-13s/checkpoints/epoch=37-step=13414.ckpt
model tag, phasenet_ETHZ_train_frac_1.0_model_PhaseNet_seed_42_time_2024-08-11-17h-04m-13s


Preloading waveforms: 100%|██████████| 10485/10485 [00:02<00:00, 3685.30it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 328/328 [00:24<00:00, 13.60it/s]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 252/252 [00:18<00:00, 13.35it/s]
dataset: geofon, model: PhaseNet, frac: 0.05
ckpt: /home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/phasenet_GEOFON_train_frac_0.05_model_PhaseNet_seed_42_time_2024-08-11-16h-47m-30s/checkpoints/epoch=41-step=5208.ckpt
model tag, phasenet_GEOFON_train_frac_0.05_model_PhaseNet_seed_42_time_2024-08-11-16h-47m-30s


Preloading waveforms: 100%|██████████| 86261/86261 [00:16<00:00, 5109.23it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 2696/2696 [01:16<00:00, 35.11it/s]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 1416/1416 [00:40<00:00, 35.14it/s]
dataset: geofon, model: PhaseNet, frac: 0.1
ckpt: /home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/phasenet_GEOFON_train_frac_0.1_model_PhaseNet_seed_42_time_2024-08-11-16h-47m-30s/checkpoints/epoch=42-step=10965.ckpt
model tag, phasenet_GEOFON_train_frac_0.1_model_PhaseNet_seed_42_time_2024-08-11-16h-47m-30s


Preloading waveforms: 100%|██████████| 86261/86261 [00:19<00:00, 4447.05it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 2696/2696 [01:15<00:00, 35.67it/s]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting DataLoader 0: 100%|██████████| 1416/1416 [00:40<00:00, 35.20it/s]
dataset: geofon, model: PhaseNet, frac: 0.2
ckpt: /home/liu0003/Desktop/projects/seisLM/results/models/phasepick_run/phasenet_GEOFON_train_frac_0.2_model_PhaseNet_seed_42_time_2024-08-11-16h-47m-30s/checkpoints/epoch=26-step=13392.ckpt
model tag, phasenet_GEOFON_train_frac_0.2_model_PhaseNet_seed_42_time_2024-08-11-16h-47m-30s


Preloading waveforms:  51%|█████▏    | 44361/86261 [00:09<00:08, 4730.40it/s]

# Taks 1 - event detection

In [None]:
task = 1 # '23'
eval_set = 'test' #'test'
data_name = 'ethz'
save_tag = 'phasenet_ETHZ_train_frac_0.05_model_PhaseNet_seed_42_time_2024-08-11-16h-57m-27s'


fig = plt.figure(figsize=(6, 6))
ax = fig.add_subplot(111)

# for model_name in ['PhaseNet', 'MultiDimWav2Vec2ForFrameClassification']:

pred_path = (
  Path(project_path.EVAL_SAVE_DIR)
  / f"{save_tag}_{data_name}"
  / f"{eval_set}_task{task}.csv"
)

pred = pd.read_csv(pred_path)

pred["trace_type_bin"] = pred["trace_type"] == "earthquake"

fpr, tpr, _ = roc_curve(pred["trace_type_bin"], pred["score_detection"])

ax.plot(fpr, tpr, label=model_name)
ax.set_aspect("equal")

ax.set_xlabel("false positive rate")
ax.set_ylabel("true positive rate")
# ax.set_title(f"{model_name}; train: {data_name}; eval: {eval_set}-{task}")
plt.legend()

In [None]:
task = 1 # '23'
eval_set = 'test' #'test'
model_name = 'PhaseNet'
data_name = 'ethz'

fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111)

for model_name in ['PhaseNet', 'MultiDimWav2Vec2ForFrameClassification']:

  pred_path = (
    Path(project_path.EVAL_SAVE_DIR)
    / f"{model_name}_{data_name}"
    / f"{eval_set}_task{task}.csv"
  )

  pred = pd.read_csv(pred_path)
  pred["trace_type_bin"] = pred["trace_type"] == "earthquake"

  prec, recall, thr = precision_recall_curve(pred["trace_type_bin"], pred["score_detection"])

  f1 = 2 * prec * recall / (prec + recall)
  f1_threshold = thr[np.nanargmax(f1)]
  best_f1 = np.max(f1)

  # fig = plt.figure(figsize=(10, 10))
  # ax = fig.add_subplot(111)
  ax.plot(prec, recall, label=model_name)
  ax.set_aspect("equal")

  ax.set_xlabel("Precision")
  ax.set_ylabel("Recall")
  ax.set_xlim(ax.get_ylim())

  print(f"{model_name} | Optimal F1:", best_f1)
  print(f"{model_name} | Optimal F1 threshold:", f1_threshold)

plt.legend()

# Task 2 - Phase identification

In [None]:
task = '23'
eval_set = 'test' #'test'
data_name = 'ethz'

fig = plt.figure(figsize=(6, 6))
ax = fig.add_subplot(111)

for model_name in ['PhaseNet', 'MultiDimWav2Vec2ForFrameClassification']:

  pred_path = (
    Path(project_path.EVAL_SAVE_DIR)
    / f"{model_name}_{data_name}"
    / f"{eval_set}_task{task}.csv"
  )
  pred = pd.read_csv(pred_path)

  pred["phase_label_bin"] = pred["phase_label"] == "P"

  fpr, tpr, _ = roc_curve(pred["phase_label_bin"], pred["score_p_or_s"])

  # fig = plt.figure(figsize=(10, 10))
  # ax = fig.add_subplot(111)
  ax.plot(fpr, tpr, label=model_name)
  ax.set_aspect("equal")

  ax.set_xlabel("false positive rate")
  ax.set_ylabel("true positive rate")
  # ax.set_yscale('log')
  # ax.set_xscale('log')
plt.legend()


In [None]:
task = '23'
eval_set = 'test' #'test'
data_name = 'ethz'

fig = plt.figure(figsize=(6, 6))
ax = fig.add_subplot(111)

for model_name in ['PhaseNet', 'MultiDimWav2Vec2ForFrameClassification']:

  pred_path = (
    Path(project_path.EVAL_SAVE_DIR)
    / f"{model_name}_{data_name}"
    / f"{eval_set}_task{task}.csv"
  )
  pred = pd.read_csv(pred_path)

  pred["phase_label_bin"] = pred["phase_label"] == "P"
  
  prec, recall, thr = precision_recall_curve(pred["phase_label_bin"], pred["score_p_or_s"])

  f1 = 2 * prec * recall / (prec + recall)
  f1_threshold = thr[np.nanargmax(f1)]
  best_f1 = np.max(f1)

  # fig = plt.figure(figsize=(10, 10))
  # ax = fig.add_subplot(111)
  ax.plot(prec, recall, label=model_name)
  ax.set_aspect("equal")

  ax.set_xlabel("Precision")
  ax.set_ylabel("Recall")
  ax.set_xlim(ax.get_ylim())

  print(f"{model_name} | Optimal F1:", best_f1)
  print(f"{model_name} | Optimal F1 threshold:", f1_threshold)


In [None]:
model

In [None]:
import numpy as np

task = 23
eval_set = 'test' #'test'


fig = plt.figure(figsize=(8, 4))
axs = fig.subplots(1, 2)

for i, phase in enumerate(["P", "S"]):

  for model_name in ['PhaseNet', 'MultiDimWav2Vec2ForFrameClassification']:
    pred_path = (
      Path(project_path.EVAL_SAVE_DIR)
      / f"{model_name}_{data_name}"
      / f"{eval_set}_task{task}.csv"
    )
    pred = pd.read_csv(pred_path)


    ax = axs[i]
    ax.set_title(f"{phase} arrivals")
    pred_phase = pred[pred["phase_label"] == phase]
    pred_col = f"{phase.lower()}_sample_pred"

    diff = (pred_phase[pred_col] - pred_phase["phase_onset"]) / pred_phase["sampling_rate"]

    print(f'dataset: {data_name} | model: {model_name} | phase {phase} | MAE {diff.abs().mean()}')
    bins = np.linspace(-2.5, 2.5, 50)
    ax.hist(diff, bins=bins, label=model_name, alpha=0.5)
    ax.set_xlabel("$t_{pred} - t_{true}$")
  plt.suptitle(f"{model_name}; train: {data_name}; eval: {eval_set}-{task}")

plt.legend()