In [1]:
import pytorch_lightning as pl
import pytorch_lightning.callbacks as pl_callbacks
import torch
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import eq


from eq.data import Catalog, InMemoryDataset, Sequence, default_catalogs_dir

In [2]:
catalog1 = eq.catalogs.SCEDC(include_loc=False)
catalog2 = eq.catalogs.ANSS_MultiCatalog(    
    num_sequences=5000,
    t_end_days=1*365,
    mag_completeness=4.5,
    minimum_mainshock_mag=6.0,
    include_loc=False
)
# catalog3=eq.catalogs.White() 

Loading existing catalog from /home/gcl/RA/jonahk/recast/data/SCEDC.
Loading existing catalog from /home/gcl/RA/jonahk/recast/data/ANSS_MultiCatalog.




In [3]:
def combine_catalogs_sequences(seqlist):
    sequences = []
    for seq in seqlist:
        sequences.append(seq)
    return InMemoryDataset(sequences=sequences)

def build_seqlist(catalogs):
    train_sequences = []
    for catalog in catalogs:
        for seq in range(len(catalog)):
            train_sequences.append(catalog[seq])
    return train_sequences

def subtract_magnitudes(sequences, mag_completeness):
    for seq in sequences:
        seq.mag -= mag_completeness

def full_sequence(catalog):
    final = []
    for seq in catalog.train:
        final.append(seq)
    for seq in catalog.val:
        final.append(seq)
    for seq in catalog.test:
        final.append(seq)
    return final

def split_manual(seqs):
    data_len = len(seqs)
    train = seqs[0:int(data_len*.8)]
    val = seqs[int(data_len*.8):]
    return train, val


In [4]:
catalog1.test[0].mag -= 2.0
test = catalog1.test.get_dataloader( batch_size=5, shuffle=True)

In [5]:
#Compare results to single catalog training
catalog1.train[0].mag -= 2.0
catalog1.val[0].mag -= 2.0
single_train = catalog1.train.get_dataloader(batch_size=200, shuffle=True)
single_val = catalog1.val.get_dataloader(batch_size=200, shuffle=True)

T = catalog1.train[0].t_end

N = np.mean([len(seq) for seq in catalog1.train])
mag_mean = np.mean([catalog1.train[0].mag.mean().item() for seq in catalog1.train])
tau_mean = T/N

single_model = eq.models.RecurrentTPP(
    mag_mean = mag_mean,
    tau_mean = tau_mean,
    mag_completeness=catalog1.metadata['mag_completeness'],
    learning_rate=1e-3,
)

    # ModelCheckpoints saves the model with the best validation loss
checkpoint = pl_callbacks.ModelCheckpoint(monitor="total_val_loss")

    # EarlyStopping stops training if the validation loss doesn't improve by more than 1e-3 for 20 epochs
early_stopping = pl_callbacks.EarlyStopping(monitor="total_val_loss", patience=10, min_delta=1e-5)

    # RichProgressBar adds a nice and more functional progress bar
progress_bar = pl_callbacks.RichProgressBar()

    # Trainer set up training and validation loops with previous specs
trainer = pl.Trainer(devices=1,max_epochs=1000, callbacks=[checkpoint, early_stopping, progress_bar],log_every_n_steps=1)

trainer.fit(single_model, single_train, single_val)
checkpoint.best_model_path
single_model.load_from_checkpoint(checkpoint.best_model_path)
trainer.test(single_model, test)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Output()

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Output()

  rank_zero_warn(
  rank_zero_warn(


[{'test_loss': -14.238285064697266, 'total_test_loss': -14.238285064697266}]

In [7]:
trainval_data = full_sequence(catalog2)

combined_cat_losses = []

for simulation in np.arange(2,10):
    #pre processing
    train, val = split_manual(trainval_data[simulation*100: simulation*100 + 100])
    subtract_magnitudes(train, 4.0)
    subtract_magnitudes(val, 4.0)
    train.append(catalog1.train[0])
    val.append(catalog1.val[0])
    final_train = combine_catalogs_sequences(train)
    final_val = combine_catalogs_sequences(val)

    #training
    dl_final_train = final_train.get_dataloader( batch_size=5, shuffle=True)
    dl_final_val = final_val.get_dataloader( batch_size=5, shuffle=True)

    T = final_train.sequences[0].t_end
    N = np.mean([len(seq) for seq in final_train])
    mag_mean = np.mean([final_train.sequences[0].mag.mean().item() for seq in final_train])
    tau_mean = T/N
    mag_completness = 0.6

    anss_double_model = eq.models.RecurrentTPP(
        mag_mean = mag_mean,
        tau_mean = tau_mean,
        mag_completeness = mag_completness,
        learning_rate=1e-3,
    )
        # ModelCheckpoints saves the model with the best validation loss
    checkpoint = pl_callbacks.ModelCheckpoint(monitor="total_val_loss")

        # EarlyStopping stops training if the validation loss doesn't improve by more than 1e-3 for 20 epochs
    early_stopping = pl_callbacks.EarlyStopping(monitor="total_val_loss", patience=10, min_delta=1e-5)

        # RichProgressBar adds a nice and more functional progress bar
    progress_bar = pl_callbacks.RichProgressBar()

        # Trainer set up training and validation loops with previous specs
    trainer = pl.Trainer(devices=1,max_epochs=1000, callbacks=[checkpoint, early_stopping, progress_bar],log_every_n_steps=1)

    trainer.fit(anss_double_model, dl_final_train, dl_final_val)
    checkpoint.best_model_path
    anss_double_model.load_from_checkpoint(checkpoint.best_model_path)
    nll_loss = trainer.test(anss_double_model, test)[0]['test_loss']
    combined_cat_losses.append(nll_loss)
    print(nll_loss)    

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Output()

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Output()

  rank_zero_warn(
  rank_zero_warn(


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


-14.133837699890137


Output()

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Output()

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


-14.204086303710938


Output()

In [None]:
combined_cat_losses

NameError: name 'combined_cat_losses' is not defined

In [None]:
plt.hist(combined_cat_losses)