In [None]:
import os
import h5py
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
import torch
import mlflow
import itertools

from thisquakedoesnotexist.thisquakedoesnotexist import get_waves_real_bin, get_cond_var_bins
from thisquakedoesnotexist.models import gan
from thisquakedoesnotexist.utils.random_fields import rand_noise
from thisquakedoesnotexist.utils.data_utils import SeisData

In [None]:
# mpl.use('module://matplotlib_inline.backend_inline')

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(f"Running on device: {device}")

logged_model = '../../mlruns/893763428974995232/f5d336cb5bca47249b5eed04a424e7d8/artifacts/thisquakedoesnotexist/data/output_f5d336cb/model_final'

logged_model = '../../mlruns/893763428974995232/650af9f98f75467fb751d633bed32361/artifacts/thisquakedoesnotexist/data/output_650af9f9/model_epoch_00280'

loaded_model = mlflow.pytorch.load_model(logged_model)

In [None]:
sns.set_theme(font_scale=1.0)
color_palette = sns.color_palette('dark')
colors = [color_palette[3], color_palette[7], color_palette[0], color_palette[1], color_palette[2], color_palette[4], color_palette[5], color_palette[6], color_palette[8], color_palette[9]]
sns.set_palette(sns.color_palette(colors))

In [None]:
attributes = pd.read_csv('../data/japan/attributes.csv')
print(attributes)

wfs = np.load('../data/japan/waveforms.npy')
print(wfs)

In [None]:
wfs_df = pd.DataFrame(wfs)

In [None]:
dists = attributes['dist']
print(dists.describe())

mags = attributes['mag']
print(mags.describe())

pga_vs = attributes['pga_v']
print(pga_vs.describe())

In [None]:
fig = sns.histplot(mags, bins='auto', color=colors[1], log_scale=(False, True))
plt.xlabel(xlabel='Magnitude $M_w$')
plt.title('Japanese Dataset Magnitude Distribution')
plt.show()

In [None]:
g = sns.jointplot(x=mags, y=dists, kind="hex", color="#4CB391", marginal_ticks=True, joint_kws=dict(gridsize=20))
g.fig.set_size_inches((9, 6))
plt.xlabel('Magnitude $M_w$')
plt.ylabel('Distance [km]')

g.ax_marg_x.set_xlabel('Number of Records')
plt.subplots_adjust(left=0.2, right=0.8) #, top=0.75, bottom=0.25)  # shrink fig so cbar is visible
cbar_ax = g.fig.add_axes([0.85, 0.115, 0.02, 0.70])  # x, y, width, height
cb = plt.colorbar(cax=cbar_ax)
cb.set_label(r'Number of Records', fontsize=14)

#plt.show()
plt.savefig('../plots/mag_dist_data_distr.pdf')

In [None]:
g = sns.jointplot(x=dists, y=np.log(pga_vs), kind="hex", color="#4CB391", marginal_ticks=True, joint_kws=dict(gridsize=20)) #, joint_kws=dict(gridsize=10))
g.fig.set_size_inches((9, 6))
plt.xlabel('Distance [km]')
plt.ylabel('Log PGA $\\left[\\frac{m}{s^2}\\right]$')

plt.subplots_adjust(left=0.2, right=0.8) #, top=0.75, bottom=0.25)  # shrink fig so cbar is visible
cbar_ax = g.fig.add_axes([0.85, 0.115, 0.02, 0.70])  # x, y, width, height
cb = plt.colorbar(cax=cbar_ax)
cb.set_label(r'Number of Records', fontsize=14)

# plt.title('Distance and Magnitude Distributions of Japenese Dataset')
plt.savefig('../plots/dist_pga_data_distr.pdf')

In [None]:
data_file = '../data/japan/waveforms.npy'
attr_file = '../data/japan/attributes.csv'
batch_size = 32
sample_rate = 20

condv_names = ['dist', 'mag', 'vs30']

f = np.load(data_file)
num_samples = len(f)
del f

# get all indexes
ix_all = np.arange(num_samples)

dataset = SeisData(
        data_file=data_file,
        attr_file=attr_file,
        batch_size=batch_size,
        sample_rate=sample_rate,
        v_names=condv_names,
        isel=ix_all,
    )

In [None]:
letters = ['(a)', '(b)', '(c)', '(d)', '(e)', '(f)', '(g)', '(h)', '(i)', '(j)', '(k)', '(l)', ]

In [None]:
def _rolling_window(x, window_len, step_length):
    pos = 0
    while pos <= len(x) - window_len:
        yield x[pos : pos+window_len]
        pos += step_length
        
def plot_env(samples, dist, mag):
    tt = 0.05 * np.arange(1000)
    
    for j in range(1):
        log_signal = np.log(np.abs(np.array(samples)))
        signal = np.array(samples)

        fig, ax = plt.subplots(5, 1, figsize=(15, 15))

        ax[0].plot(tt, signal, lw=0.5, label='Signal', color=colors[1])
        low, high = ax[0].get_ylim()
        bound = max(abs(low), abs(high))
        ax[0].set_ylim(-bound, bound)
        ax[0].text(0.05, 1.1, letters[0], transform=ax[0].transAxes, fontsize=12, va='top', ha='right')
        ax[0].set_xlabel('Time [s]')
        ax[0].set_ylabel('Ground Acceleration $\\left[\\frac{m}{s^2}\\right]$')
        ax[0].legend(loc=4)

        # Make window length 1 s long
        window_length = int(1 / 0.05)
        half_wl = int(window_length / 2)
        
        rw = _rolling_window(log_signal, window_length, window_length)
        rolling_mean = []
        for i, window in enumerate(rw):
            rolling_mean.append(window.mean())
        
        locs = 0.05 * np.arange(half_wl, len(rolling_mean) * window_length, window_length)

        """
        rw = _rolling_window(log_signal, window_length, 1)
        causal_mean = []
        for i, window in enumerate(rw):
            causal_mean.append(np.median(window))
        ax[1].plot(tt[window_length:], causal_mean, label='Causal Mean')
        """
        
        ax[1].plot(tt, log_signal, lw=0.5, label='Log Transformed Signal', color=colors[1])
        ax[1].plot(locs, rolling_mean, label='Centered Mean')
        ax[1].set_xlabel('Time [s]')
        ax[1].text(0.05, 1.1, letters[1], transform=ax[1].transAxes, fontsize=12, va='top', ha='right')
        ax[1].set_ylabel('Log Ground Acceleration $\\left[\\frac{m}{s^2}\\right]$')
        ax[1].legend(loc=4)

        rolling_max = []
        rw = _rolling_window(log_signal, window_length, window_length)

        for i, window in enumerate(rw):
            rolling_max.append(window.max())

        ax[2].plot(tt, log_signal, lw=0.5, label='Log Transformed Signal', color=colors[1])
        ax[2].plot(locs, rolling_max, label='Centered Max')
        ax[2].set_xlabel('Time [s]')
        ax[2].text(0.05, 1.1, letters[2], transform=ax[2].transAxes, fontsize=12, va='top', ha='right')
        ax[2].set_ylabel('Log Ground Acceleration $\\left[\\frac{m}{s^2}\\right]$')
        ax[2].legend(loc=4)

        rolling_max = [None] * (window_length -1)
        rw = _rolling_window(log_signal, window_length, 1)
        for window in rw:
            rolling_max.append(window.max())

        ax[3].plot(tt, log_signal, lw=0.5, label='Log Transformed Signal', color=colors[1])
        ax[3].plot(tt, rolling_max, label='Causal Rolling Max')
        ax[3].set_xlabel('Time [s]')
        ax[3].text(0.05, 1.1, letters[3], transform=ax[3].transAxes, fontsize=12, va='top', ha='right')
        ax[3].set_ylabel('Log Ground Acceleration $\\left[\\frac{m}{s^2}\\right]$')
        ax[3].legend(loc=4)

        s_fft = np.fft.rfft(signal)
        s_ps = np.real(s_fft * np.conj(s_fft))
        freq = np.fft.rfftfreq(signal.size, d=0.05)

        rw = _rolling_window(s_ps, window_length, window_length)
        rolling_mean = []

        for i, window in enumerate(rw):
            rolling_mean.append(window.mean())

        locs_mean = np.linspace(freq.min(), freq.max(), 25)
        ax[4].loglog(freq, s_ps, lw=0.5, label='Fourier Transformed Signal', color=colors[1])
        ax[4].plot(locs_mean, rolling_mean, label='Centered Mean')
        ax[4].set_xlabel('Frequency [Hz]')
        ax[4].text(0.05, 1.1, letters[4], transform=ax[4].transAxes, fontsize=12, va='top', ha='right')
        ax[4].set_ylabel('Fourier Amplitude')
        ax[4].set_xlim((0.8, 15))
        ax[4].legend(loc=4)

        fig.suptitle(f'Randomly Selected Real Waveform. Dist: {dist:.1f}, Mag: {mag:.1f}')
        plt.subplots_adjust(hspace=0.4)
        plt.tight_layout()

        fig_file = f'../plots/synthetic_data_plot_{dist:.1f}_km_mag_{mag:.1f}.pdf'
        plt.savefig(fig_file, format=f'pdf')
        plt.show()

In [None]:
cond_var_bins = get_cond_var_bins(dataset, 10, True)

dist_bins = cond_var_bins['dist_bins']
mag_bins = cond_var_bins['mag_bins']

n_dist_bins = len(dist_bins) - 1 
n_mag_bins = len(mag_bins) - 1

In [None]:


dist_border = [dist_bins[4], dist_bins[5]]
mag_border = [5.4, 5.6]

wfs, c_norms, df_s, means, n_obs = get_waves_real_bin(dataset, dist_border, mag_border)
real_data = wfs * c_norms.reshape(-1, 1)
df_s = df_s.sort_values('dist')

tt = 0.05 * np.arange(1000)

num_plots = 5
for i in range(num_plots):
    fig, ax = plt.subplots(figsize=(8,4))
    location = df_s.iloc[i].name

    dist = dataset.df_meta['dist'][location]
    mag = dataset.df_meta['mag'][location]
    waveform = dataset.wfs[location, :] * dataset.cnorms[location].reshape(-1)

    plt.plot(tt, waveform, lw=0.5, color=colors[1], label=f'Distance: {dist:.1f} km, $M_w$: {mag:.1f}')
    low, high = ax.get_ylim()
    bound = max(abs(low), abs(high))
    ax.set_ylim(-bound, bound)
    ax.set_title(f'Distance: {dist:.1f} km')


    ax.set_xlabel('Time [s]')
    ax.set_ylabel('Ground Acceleration $\\left[\\frac{m}{s^2}\\right]$')
    plt.title('Up-Down Component of Randomly Selected Waveform Record')
    plt.legend()
    plt.tight_layout()
    if i == 2:
        plt.savefig(f'../plots/single_wafeform.pdf')

        plot_env(waveform, dist, mag)

In [None]:
single_quake = dataset.df_meta[np.isclose(dataset.df_meta['mag'], 5.961192)].sort_values('dist').drop(columns='vs30')
single_quake

In [None]:
sorted_quake = single_quake['dist'].reset_index()

sorted_quake['dist'].plot()
plt.show()

plt.close('all')


In [None]:
fig, ax = plt.subplots()

single_quake_d = single_quake['dist']
_, bins, _ = plt.hist(single_quake_d, bins=10, label='Count', color=colors[1])

# ax.tick_params(which="both", bottom=True)

plt.xticks(bins)

plt.title('$M_w$: 5.96 Earthquake Distance Distribution')
plt.legend()
plt.ylabel('Number of Records')
plt.xlabel('Station to Hypocenter Distance [km]')
plt.tight_layout()
plt.savefig('../plots/single_quake_dist_distr.pdf')
plt.close('all')


In [None]:
num_plots = 7

plt.figure()
# fig, ax = plt.subplots(num_plots, 1, sharex='col', figsize=(10, 15), gridspec_kw={'hspace': 0.2, 'wspace': 0.05},
# )
# fig.add_subplot(111, frameon=False)
# plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
# plt.grid(False)

fig, ax = plt.subplots(num_plots, 1, figsize=(10, 12))
tt = 0.05 * np.arange(1000)

dist_border = [dist_bins[0], dist_bins[9]]
mag_border = [5.045218 - 10e-6, 5.045218 + 10e-6]

wfs, c_norms, df_s, means, n_obs = get_waves_real_bin(dataset, dist_border, mag_border)
idx = np.linspace(0, df_s.index.size-1, num_plots, dtype=int)
real_data = wfs * c_norms.reshape(-1, 1)
df_s = df_s.sort_values('dist')

for i in range(num_plots):
    location = df_s.iloc[idx[i]].name

    dist = dataset.df_meta['dist'][location]
    mag = dataset.df_meta['mag'][location]
    waveform = dataset.wfs[location, :] * dataset.cnorms[location].reshape(-1)

    ax[i].plot(tt, waveform, color=colors[1], lw=0.5, label=f'Signal Number {location}')
    low, high = ax[i].get_ylim()
    bound = max(abs(low), abs(high))
    ax[i].set_ylim(-bound, bound)
    ax[i].set_title(f'Distance: {dist:.1f} km')
    
    if i < num_plots -1:
        ax[i].set_xticklabels([])

    plt.subplots_adjust(hspace=0.5)


plt.suptitle(f'Waveform Records From Single Earthquake, Magnitude $M_w$: {mag:.2f}\nTotal Number of Observations : {n_obs}')

ax[6].set_xlabel('Time [s]')
ax[3].set_ylabel('Ground Acceleration $\\left[\\frac{m}{s^2}\\right]$', labelpad=1)

plt.tight_layout()

plt.savefig('../plots/real_data_samples_dist.pdf')

In [None]:
num_plots = 7

plt.figure()
fig, ax = plt.subplots(num_plots, 1, figsize=(10, 12))

tt = 0.05 * np.arange(1000)

dist_border = [dist_bins[5], dist_bins[6]]
mag_border = [mag_bins[0], mag_bins[10]]

wfs, c_norms, df_s, means, n_obs = get_waves_real_bin(dataset, dist_border, mag_border)
idx = np.linspace(0, df_s.index.size-1, num_plots, dtype=int)
real_data = wfs * c_norms.reshape(-1, 1)

for i in range(num_plots):
    location = df_s.iloc[idx[i]].name

    dist = dataset.df_meta['dist'][location]
    mag = dataset.df_meta['mag'][location]
    waveform = dataset.wfs[location, :] * dataset.cnorms[location].reshape(-1)

    ax[i].plot(tt, waveform, color=colors[1], lw=0.5, label=f'Signal Number {location}')
    ax[i].set_title(f"Distance: {dist:.1f} km, $M_w$: {mag:.1f}") #, vs30: {sdat_train.df_meta.loc[waveform_i, 'vs30']:.1f}")
    
    low, high = ax[i].get_ylim()
    bound = max(abs(low), abs(high))
    ax[i].set_ylim(-bound, bound)
    
    if i < num_plots -1:
        ax[i].set_xticklabels([])

    plt.subplots_adjust(hspace=0.5)

plt.suptitle(f'Randomly Drawn Waveform Records From Bin\nDistance: [{dist_border[0]:.1f}, {dist_border[1]:.1f}], Magnitude: [{mag_border[0]:.2f}, {mag_border[1]:.2f}].\nTotal Number of Observations : {n_obs}')
ax[6].set_xlabel('Time [s]')
ax[3].set_ylabel('Ground Acceleration $\\left[\\frac{m}{s^2}\\right]$', labelpad=1)

plt.tight_layout()

plt.savefig('../plots/real_data_samples_mag.pdf')


In [None]:
dist_max = dataset.df_meta['dist'].max()    
mag_max = dataset.df_meta['mag'].max()

In [None]:
dt = 0.05
noise_dim = 100
cp = sns.color_palette('Reds', 10)
grf = rand_noise(1, noise_dim, device=device)


In [None]:
dt = 0.05

dist_border = [cond_var_bins['dist_bins'][4], cond_var_bins['dist_bins'][5]]
mag_border = [cond_var_bins['mag_bins'][7], cond_var_bins['mag_bins'][8]]

wfs, c_norms, df_s, means, n_obs = get_waves_real_bin(dataset, dist_border, mag_border)

c_norms = c_norms.reshape(-1, 1)
                        
real_data = np.abs(wfs * c_norms)
real_data_log = np.log(np.abs(wfs * c_norms) + 1e-20)

rd_25 = np.exp(np.percentile(real_data_log, 25, axis=0))
rd_75 = np.exp(np.percentile(real_data_log, 75, axis=0))

real_data_median = np.median(real_data, axis=0) + 1e-20

samples = n_obs
dt = 0.05

vc_list = [
        1.0 / dist_max * torch.ones(samples, 1).cuda(),
        1.0 / mag_max * torch.ones(samples, 1).cuda(),
]

for i, (dist, mag) in enumerate(zip(df_s['dist'], df_s['mag'])):
    vc_list[0][i] *= dist
    vc_list[1][i] *= mag

grf = rand_noise(1, noise_dim, device=device)
random_data = grf.sample(samples)
syn_data, syn_scaler = loaded_model(random_data, *vc_list)
syn_data = syn_data.squeeze().detach().cpu().numpy()
syn_data = syn_data * syn_scaler.detach().cpu().numpy()

synthetic_data = np.median(np.abs(syn_data), axis=0) + 1e-20

sd_25 = np.percentile(np.abs(syn_data), 25, axis=0)
sd_75 = np.percentile(np.abs(syn_data), 75, axis=0)

nt = synthetic_data.shape[0]
tt = dt * np.arange(0, nt)

fig = plt.figure(figsize=(8, 4))
plt.semilogy(tt, synthetic_data, '-' , label=f'Synthetic Data', alpha=0.8, lw=0.5)
plt.fill_between(tt, sd_75, sd_25, alpha=0.1)
plt.semilogy(tt, real_data_median, '-' , label=f'Real Data', alpha=0.8, lw=0.5)
# plt.semilogy(tt, real_data_median, '-' , label=f'Real Data Median', alpha=0.8, lw=0.5)
plt.fill_between(tt, rd_75, rd_25, alpha=0.1)

plt.ylabel('Log Ground Acceleration $\\left[\\frac{m}{s^2}\\right]$')
plt.xlabel('Time [s]')
plt.legend(loc=4)
plt.title(f"Time Domain Envelopes for one Distance and Magnitude Bin.\nObservations in Bin: {n_obs}, Distance: [{dist_border[0]:.1f},{dist_border[1]:.1f}] km, Magnitude: [{mag_border[0]:.1f},{mag_border[1]:.1f}]")
fig_file = f'../plots/synthetic_real_comp_dist_{dist_border[0]:03.0f}-{dist_border[1]:03.0f}_mag_{mag_border[0]:02.1f}-{mag_border[1]:02.1f}.pdf'
plt.savefig(fig_file, format='pdf')

In [None]:
plt.figure()
fig, axs = plt.subplots(3, 2, sharex='col', sharey='all',
                        gridspec_kw={'hspace': 0.2, 'wspace': 0.05},
                        figsize=(20,12),
                        )
fig.add_subplot(111, frameon=False)
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
plt.grid(False)

p_x = 0
p_y = 0

cp = sns.color_palette('Reds', 10)

for i, distbucket in enumerate([1, 4, 7]):
    for j in range(n_mag_bins):
        if j % 2 != 0:
            continue

        dist_border = [cond_var_bins['dist_bins'][distbucket], cond_var_bins['dist_bins'][distbucket+1]]
        mag_border = [cond_var_bins['mag_bins'][j], cond_var_bins['mag_bins'][j+1]]
        wfs, c_norms, df_s, means, n_obs = get_waves_real_bin(dataset, dist_border, mag_border)

        samples = n_obs
        dt = 0.05

        vc_list = [
                1.0 / dist_max * torch.ones(samples, 1).cuda(),
                1.0 / mag_max * torch.ones(samples, 1).cuda(),
        ]

        for i, (dist, mag) in enumerate(zip(df_s['dist'], df_s['mag'])):
            vc_list[0][i] *= dist
            vc_list[1][i] *= mag

        grf = rand_noise(1, noise_dim, device=device)
        random_data = grf.sample(n_obs)
        syn_data, syn_scaler = loaded_model(random_data, *vc_list)
        syn_data = syn_data.squeeze().detach().cpu().numpy()
        
        syn_scaler = syn_scaler.detach().cpu().numpy()
        syn_data = syn_data * syn_scaler

        synthetic_data = np.median(np.abs(np.array(syn_data)), axis=0) + 1e-20

        nt = synthetic_data.shape[0]
        tt = dt * np.arange(0, nt)
        axs[p_x, p_y].semilogy(tt, synthetic_data, '-' , color=cp[int(j/2)+3], label=f'Dist: [{dist_border[0]:.1f}, {dist_border[1]:.1f}] km, Mag: [{mag_border[0]:.1f}, {mag_border[1]:.1f}]', alpha=0.8, lw=0.5)
        axs[p_x, p_y].text(0.05, 1.1, letters[p_y + p_x], transform=axs[p_x, p_y].transAxes, fontsize=12, va='top', ha='right')
        axs[p_x, p_y].legend(loc=4)
    p_x += 1

cp = sns.color_palette('Greys', 10)
p_y += 1
p_x = 0
for i, distbucket in enumerate([1, 4, 7]):
    for j in range(n_mag_bins):
        if j % 2 != 0:
            continue

        dist_border = [cond_var_bins['dist_bins'][distbucket], cond_var_bins['dist_bins'][distbucket+1]]
        mag_border = [cond_var_bins['mag_bins'][j], cond_var_bins['mag_bins'][j+1]]
        
        wfs, c_norms, df_s, means, n_obs = get_waves_real_bin(dataset, dist_border, mag_border)
        dist = means['dist']
        mag = means['mag']
        real_data = wfs * c_norms.reshape(-1, 1)
        real_data = np.median(np.abs(np.array(real_data)), axis=0) + 1e-20

        nt = real_data.shape[0]
        tt = dt * np.arange(0, nt)
        axs[p_x, p_y].semilogy(tt, real_data, '-' , color=cp[int(j/2)+3], label=f'Dist: [{dist_border[0]:.1f}, {dist_border[1]:.1f}] km, Mag: [{mag_border[0]:.1f}, {mag_border[1]:.1f}]', alpha=0.8, lw=0.5)
        axs[p_x, p_y].text(0.05, 1.1, letters[p_y + p_x], transform=axs[p_x, p_y].transAxes, fontsize=12, va='top', ha='right')
        axs[p_x, p_y].legend(loc=4)
    
    p_x += 1


axs[0,0].set_title('Synthetic Data')
axs[0,1].set_title('Real Data')
axs[0,0].set_ylim(bottom=10e-7)
axs[1, 0].set_ylabel('Log Ground Acceleration $\\left[\\frac{m}{s^2}\\right]$')
plt.xlabel('Time [s]')
plt.suptitle('Synthetic and Real Data Bin Time Domain Envelopes')

plt.savefig('../plots/syn_real_data_fix_dist.pdf')


In [None]:
plt.figure()
fig, axs = plt.subplots(3, 2, sharex='col', sharey='all',
                        gridspec_kw={'hspace': 0.2, 'wspace': 0.05},
                        figsize=(20,12),
                        )
fig.add_subplot(111, frameon=False)
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
plt.grid(False)

dt = 0.05
noise_dim = 100
cp = sns.color_palette('Reds', 10)
grf = rand_noise(1, noise_dim, device=device)

p_x = 0
p_y = 0

for i, magbucket in enumerate([1, 5, 9]):
    for j in range(n_dist_bins):

        if j % 2 != 1:
            continue

        dist_border = [cond_var_bins['dist_bins'][j], cond_var_bins['dist_bins'][j+1]]
        mag_border = [cond_var_bins['mag_bins'][magbucket], cond_var_bins['mag_bins'][magbucket+1]]
        wfs, c_norms, df_s, means, n_obs = get_waves_real_bin(dataset, dist_border, mag_border)

        samples = n_obs
        dt = 0.05

        dist = means['dist']
        mag = means['mag']
        dist_max = dataset.df_meta['dist'].max()    
        mag_max = dataset.df_meta['mag'].max()

        vc_list = [
            dist / dist_max * torch.ones(samples, 1).cuda(),
            mag / mag_max * torch.ones(samples, 1).cuda(),
        ]

        grf = rand_noise(1, noise_dim, device=device)
        random_data = grf.sample(n_obs)
        syn_data, syn_scaler = loaded_model(random_data, *vc_list)
        syn_data = syn_data.squeeze().detach().cpu().numpy()
        syn_data = syn_data * syn_scaler.detach().cpu().numpy()
        
        synthetic_data = np.median(np.abs(np.array(syn_data)), axis=0) + 1e-20
        
        nt = synthetic_data.shape[0]
        tt = dt * np.arange(0, nt)
        axs[p_x, p_y].semilogy(tt, synthetic_data, '-' , color=cp[int(j/2)+3], label=f'Dist: [{dist_border[0]:.1f}, {dist_border[1]:.1f}] km, Mag: [{mag_border[0]:.1f}, {mag_border[1]:.1f}]', alpha=0.8, lw=0.5)
        axs[p_x, p_y].text(0.05, 1.1, letters[p_y + p_x], transform=axs[p_x, p_y].transAxes, fontsize=12, va='top', ha='right')
        axs[p_x, p_y].legend(loc=4)
    p_x += 1

cp = sns.color_palette('Greys', 10)
p_y += 1
p_x = 0
for i, magbucket in enumerate([1, 5, 9]):
    for j in range(n_dist_bins):

        if j % 2 != 1:
            continue

        dist_border = [cond_var_bins['dist_bins'][j], cond_var_bins['dist_bins'][j+1]]
        mag_border = [cond_var_bins['mag_bins'][magbucket], cond_var_bins['mag_bins'][magbucket+1]]
        # mag_border = [mag_bins[j], mag_bins[j+1]]
        wfs, c_norms, df_s, means, n_obs = get_waves_real_bin(dataset, dist_border, mag_border)
        dist = means['dist']
        mag = means['mag']
        real_data = wfs * c_norms.reshape(-1, 1)
        real_data = np.median(np.abs(np.array(real_data)), axis=0) + 1e-20
        
        nt = real_data.shape[0]
        tt = dt * np.arange(0, nt)
        axs[p_x, p_y].semilogy(tt, real_data, '-' , color=cp[int(j/2)+3], label=f'Dist: [{dist_border[0]:.1f}, {dist_border[1]:.1f}] km, Mag: [{mag_border[0]:.1f}, {mag_border[1]:.1f}]', alpha=0.8, lw=0.5)
        axs[p_x, p_y].text(0.05, 1.1, letters[p_y * 3 + p_x], transform=axs[p_x, p_y].transAxes, fontsize=12, va='top', ha='right')
        axs[p_x, p_y].legend(loc=4)
        
    p_x += 1


axs[0,0].set_title('Synthetic Data')
axs[0,1].set_title('Real Data')
axs[0,0].set_ylim(bottom=10e-8)
axs[1, 0].set_ylabel('Log Ground Acceleration $\\left[\\frac{m}{s^2}\\right]$')
plt.xlabel('Time [s]')
plt.suptitle('Synthetic and Real Data Bin Time Domain Envelopes')

plt.savefig('../plots/syn_real_data_fix_mag.pdf')


In [None]:
n_waveforms = 72 * 5

dist_min = dataset.df_meta['dist'].min()
dist_max = dataset.df_meta['dist'].max()
dist_mean = dataset.df_meta['dist'].mean()

mag_min = dataset.df_meta['mag'].min()
mag_max = dataset.df_meta['mag'].max()
mag_mean = dataset.df_meta['mag'].mean()

dists = [dist_min, dist_mean, dist_max]
dists =[dist_mean]
mags = [mag_min, mag_mean, mag_max]
mags = [mag_mean]


for dist in dists:
    for i, mag in enumerate(cond_var_bins['mag_bins']):
        if i % 2 != 0:
            continue
        vc_list = [
            dist / dist_max * torch.ones(n_waveforms, 1).cuda(),
            mag / mag_max * torch.ones(n_waveforms, 1).cuda(),
        ]

        grf = rand_noise(1, 100, device=device)
        random_data = grf.sample(n_waveforms)
        syn_data, syn_scaler = G(random_data, *vc_list)
        syn_data = syn_data.squeeze().detach().cpu().numpy()
        syn_data = syn_data * syn_scaler.detach().cpu().numpy()
        
        synthetic_data_log = np.log(np.abs(np.array(syn_data + 1e-10)))
        sd_mean = np.mean(synthetic_data_log, axis=0)

        y = np.exp(sd_mean)

        nt = synthetic_data_log.shape[1]
        tt = dt * np.arange(0, nt)
        plt.semilogy(tt, y, '-' , label=f'Dist: {dist:.2f}km, Mag: {mag:.2f}', alpha=0.8, lw=0.5)

    plt.legend()
    plt.xlabel('Time [s]')
    plt.ylabel('Log Ground Acceleration $\\left[\\frac{m}{s^2}\\right]$')
    plt.title('Time Domain Envelopes of Synthetic Waveform Records')
    plt.savefig(f"../plots/syn_data_mag_dependence_dist_final.pdf")
    plt.show()

for mag in mags:
    for i, dist in enumerate(cond_var_bins['dist_bins']):
        if i % 2 != 0:
            continue
        vc_list = [
            dist / dist_max * torch.ones(n_waveforms, 1).cuda(),
            mag / mag_max * torch.ones(n_waveforms, 1).cuda(),
        ]

        grf = rand_noise(1, 100, device=device)
        random_data = grf.sample(n_waveforms)
        syn_data, syn_scaler = loaded_model(random_data, *vc_list)
        syn_data = syn_data.squeeze().detach().cpu().numpy()
        syn_data = syn_data * syn_scaler.detach().cpu().numpy()
        
        synthetic_data_log = np.log(np.abs(np.array(syn_data + 1e-10)))
        sd_mean = np.mean(synthetic_data_log, axis=0)

        y = np.exp(sd_mean)

        nt = synthetic_data_log.shape[1]
        tt = dt * np.arange(0, nt)
        plt.semilogy(tt, y, '-' , label=f'Dist: {dist:.2f}km, Mag: {mag:.2f}', alpha=0.8, lw=0.5)

    plt.legend()
    plt.xlabel('Time [s]')
    plt.ylabel('Log Ground Acceleration $\\left[\\frac{m}{s^2}\\right]$')
    plt.title('Time Domain Envelopes of Synthetic Waveform Records')
    plt.savefig(f"../plots/syn_data_dist_dependence_mag_final.pdf")
    plt.show()

In [None]:
dist_bins = cond_var_bins['dist_bins']
mag_bins = cond_var_bins['mag_bins']

total_obs = 0
obs_mat = np.zeros((10, 10), dtype=int)

for i in range(n_dist_bins):
    for j in range(n_mag_bins):

        dist_border = [cond_var_bins['dist_bins'][i], cond_var_bins['dist_bins'][i+1]]
        mag_border = [cond_var_bins['mag_bins'][j], cond_var_bins['mag_bins'][j+1]]
        
        *_, n_obs = get_waves_real_bin(dataset, dist_border, mag_border)
        
        total_obs += n_obs

        obs_mat[i, j] = n_obs

print(obs_mat)

In [None]:
mag_bins

In [None]:
fig, ax = plt.subplots(figsize=(8,6))

yticks = [f'[{dist_bins[i]:.1f}, {dist_bins[i+1]:.1f}]' for i in range(len(dist_bins)-1)]
xticks = [f'[{mag_bins[i]:.1f}, {mag_bins[i+1]:.1f}]' for i in range(len(mag_bins)-1)]

cmap = sns.color_palette("mako_r", as_cmap=True)

ax = sns.heatmap(obs_mat, cmap=cmap, annot=True, fmt='4d', yticklabels=yticks, xticklabels=xticks, cbar_kws={'label':'Number of Records'})
plt.xlabel('Magnitude Bin')
plt.ylabel('Distance Bin [km]')
plt.title('Data Set Records Distribution')
xtick_loc = np.arange(10)
plt.xticks(xtick_loc, xticks, rotation=30)
plt.tight_layout()

plt.savefig('../plots/obs_distr.pdf')

In [None]:
n_cols = 2
n_rows = 9
n_tot = n_rows * n_cols
noise_dim = 100
G = loaded_model

num_plots = 9
tt = 0.05 * np.arange(0, 1000)

plt.figure()
fig, ax = plt.subplots(n_rows, n_cols, sharex='col',
                        gridspec_kw={'hspace': 0.35, 'wspace': 0.15},
                        figsize=(16,12),
                        )

fig.add_subplot(111, frameon=False)
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
plt.grid(False)
plt.xlabel("Time [s]")
plt.ylabel('Ground Acceleration $\\left[\\frac{m}{s^2}\\right]$', labelpad=20.0)

tt = 0.05 * np.arange(1000)

dist_border = [dist_bins[0], dist_bins[9]]
mag_border = [5.045218 - 10e-6, 5.045218 + 10e-6]

wfs, c_norms, df_s, means, n_obs = get_waves_real_bin(dataset, dist_border, mag_border)
idx = np.linspace(0, df_s.index.size-1, num_plots, dtype=int)
real_data = wfs * c_norms.reshape(-1, 1)
df_s = df_s.sort_values('dist')

for i in range(n_rows):
    location = df_s.iloc[idx[i]].name

    dist = dataset.df_meta['dist'][location]
    mag = dataset.df_meta['mag'][location]
    waveform = dataset.wfs[location, :] * dataset.cnorms[location].reshape(-1)

    vc_list = [
            dist / dist_max * torch.ones(n_tot, 1).cuda(),
            mag / mag_max * torch.ones(n_tot, 1).cuda(),
    ]
    grf = rand_noise(1, noise_dim, device=device)
    random_data = grf.sample(n_tot)
    syn_data, syn_scaler = G(random_data, *vc_list)
    syn_data = syn_data.squeeze()
    syn_data = np.array(syn_data.detach().cpu())
    syn_scaler = syn_scaler.detach().cpu().numpy()

    syn_data = syn_data * syn_scaler
    
    # Plot synthetic
    ax[i, 0].plot(tt, syn_data[i, :], linewidth=0.5, label='Synthetic Data')
    low, high = ax[i, 0].get_ylim()
    bound = max(abs(low), abs(high))
    ax[i, 0].set_ylim(-bound, bound)
    ax[i, 0].set_title(f'Distance: {dist:.1f} km')

    # Plot real
    ax[i, 1].plot(tt, waveform, color=colors[1], lw=0.5, label='Real Data')
    low, high = ax[i, 1].get_ylim()
    bound = max(abs(low), abs(high))
    ax[i, 1].set_ylim(-bound, bound)
    ax[i, 1].set_title(f'Distance: {dist:.1f} km')

ax[0, 0].legend(loc=1)
ax[0, 1].legend(loc=1)

# plt.tight_layout(pad=0.2)
fig.suptitle(f'Generated Waveform Records From Generator for Corresponding Real Records.\n$M_w$: {mag:.1f}')

fig_file = f"../plots/syn_and_generated_data_mag_{mag:.1f}.pdf"

plt.tight_layout()

plt.savefig(fig_file, format='pdf')


In [None]:
n_cols = 2
n_rows = 9
n_tot = n_rows * n_cols
noise_dim = 100
G = loaded_model

num_plots = 9
tt = 0.05 * np.arange(0, 1000)



plt.figure()
fig, ax = plt.subplots(n_rows, n_cols, sharex='col',
                        gridspec_kw={'hspace': 0.35, 'wspace': 0.15},
                        figsize=(16,12),
                        )

fig.add_subplot(111, frameon=False)
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
plt.grid(False)
plt.xlabel("Time [s]")
plt.ylabel('Ground Acceleration $\\left[\\frac{m}{s^2}\\right]$', labelpad=20.0)

tt = 0.05 * np.arange(1000)

dist_border = [dist_bins[5], dist_bins[6]]
mag_border = [mag_bins[0], mag_bins[10]]

wfs, c_norms, df_s, means, n_obs = get_waves_real_bin(dataset, dist_border, mag_border)
idx = np.linspace(0, df_s.index.size-1, num_plots, dtype=int)
real_data = wfs * c_norms.reshape(-1, 1)

# df_s = df_s.sort_values('dist')

for i in range(n_rows):
    location = df_s.iloc[idx[i]].name

    dist = dataset.df_meta['dist'][location]
    mag = dataset.df_meta['mag'][location]
    waveform = dataset.wfs[location, :] * dataset.cnorms[location].reshape(-1)

    vc_list = [
            dist / dist_max * torch.ones(n_tot, 1).cuda(),
            mag / mag_max * torch.ones(n_tot, 1).cuda(),
    ]
    grf = rand_noise(1, noise_dim, device=device)
    random_data = grf.sample(n_tot)
    syn_data, syn_scaler = G(random_data, *vc_list)
    syn_data = syn_data.squeeze()
    syn_data = np.array(syn_data.detach().cpu())
    syn_scaler = syn_scaler.detach().cpu().numpy()

    syn_data = syn_data * syn_scaler
    
    # Plot synthetic
    ax[i, 0].plot(tt, syn_data[i, :], linewidth=0.5, label='Synthetic Data')
    low, high = ax[i, 0].get_ylim()
    bound = max(abs(low), abs(high))
    ax[i, 0].set_ylim(-bound, bound)
    ax[i, 0].set_title(f'$M_w$: {mag:.1f}, Distance: {dist:.1f} km')

    # Plot real
    ax[i, 1].plot(tt, waveform, color=colors[1], lw=0.5, label='Real Data')
    low, high = ax[i, 1].get_ylim()
    bound = max(abs(low), abs(high))
    ax[i, 1].set_ylim(-bound, bound)
    ax[i, 1].set_title(f'$M_w$: {mag:.1f}, Distance: {dist:.1f} km')

ax[0, 0].legend(loc=1)
ax[0, 1].legend(loc=1)

# plt.tight_layout(pad=0.2)
fig.suptitle(f'Generated Waveform Records From Generator for Corresponding Real Records.\nDistance Bin: [{dist_border[0]:.1f}, {dist_border[1]:.1f}]')

fig_file = f"../plots/syn_and_generated_data_dist_{dist:.1f}.pdf"
plt.savefig(fig_file, format='pdf')


In [None]:
dists = [30, 90, 150]
mags = [4.8, 5.8, 6.8]

for dist, mag in itertools.product(dists, mags):

    vc_list = [
            dist / dist_max * torch.ones(n_tot, 1).cuda(),
            mag / mag_max * torch.ones(n_tot, 1).cuda(),
    ]
    grf = rand_noise(1, noise_dim, device=device)
    random_data = grf.sample(n_tot)
    syn_data, syn_scaler = G(random_data, *vc_list)
    syn_data = syn_data.squeeze()
    syn_data = np.array(syn_data.detach().cpu())
    syn_scaler = syn_scaler.detach().cpu().numpy()

    syn_data = syn_data * syn_scaler

    n_t = syn_data.shape[1]
    tt = 0.05 * np.arange(0, n_t)

    plt.figure()
    fig, axs = plt.subplots(n_rows, n_cols, sharex='col',
                            gridspec_kw={'hspace': 0.2, 'wspace': 0.15},
                            figsize=(15,15),
                            )

    fig.add_subplot(111, frameon=False)
    plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
    plt.grid(False)
    plt.xlabel("Time [s]")
    plt.ylabel('Ground Acceleration $\\left[\\frac{m}{s^2}\\right]$', labelpad=10.0)

    for i, ax in enumerate(axs.flat):
        ax.plot(tt, syn_data[i, :], linewidth=0.5)
        low, high = ax.get_ylim()
        bound = max(abs(low), abs(high))
        ax.set_ylim(-bound, bound)
        
    # plt.tight_layout(pad=0.2)
    fig.suptitle(f'Randomly Selected Waveform Records From Generator.\nDist: {dist:.1f} km, Mag: {mag:.1f}')

    fig_file = f"../plots/generated_data_{dist:.1f}_km_mag_{mag:.1f}.pdf"
    plt.savefig(fig_file, format='pdf')