In [None]:
import os
import h5py
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
import torch
import mlflow

from thisquakedoesnotexist.amplitudes.amplitudes import get_waves_real_bin, get_cond_var_bins
from thisquakedoesnotexist.models import gan
from thisquakedoesnotexist.utils.random_fields import rand_noise
from thisquakedoesnotexist.utils.data_utils import SeisData

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(f"Running on device: {device}")

logged_model = '../../mlruns/893763428974995232/8a26d9ee666e4739bae9cd68dfa2e466/artifacts/thisquakedoesnotexist/data/output_8a26d9ee/model_epoch_00030'
loaded_model = mlflow.pytorch.load_model(logged_model)

In [None]:
sns.set()
color_palette = sns.color_palette('dark')
colors = [color_palette[3], color_palette[7], color_palette[0], color_palette[1], color_palette[2], color_palette[4], color_palette[5], color_palette[6], color_palette[8], color_palette[9]]
sns.set_palette(sns.color_palette(colors))


In [None]:
attributes = pd.read_csv('../data/japan/attributes.csv')
print(attributes)

wfs = np.load('../data/japan/waveforms.npy')
print(wfs)

In [None]:
wfs_df = pd.DataFrame(wfs)

In [None]:
dists = attributes['dist']
print(dists.describe())

mags = attributes['mag']
print(mags.describe())

pga_vs = attributes['pga_v']
print(pga_vs.describe())

In [None]:
fig = sns.histplot(mags, bins='auto', color=colors[1])
plt.xlabel(xlabel='Magnitude $M_w$')
plt.title('Japanese Dataset Magnitude Distribution')
plt.show()

In [None]:
g = sns.jointplot(x=mags, y=dists, kind="hex", color="#4CB391", joint_kws=dict(gridsize=20))
g.fig.set_size_inches((12, 8))
plt.xlabel('Magnitude $M_w$')
plt.ylabel('Distance [km]')

plt.subplots_adjust(left=0.2, right=0.8) #, top=0.75, bottom=0.25)  # shrink fig so cbar is visible
cbar_ax = g.fig.add_axes([0.85, 0.12, 0.02, 0.705])  # x, y, width, height
cb = plt.colorbar(cax=cbar_ax)
cb.set_label(r'Number of Points',fontsize=13)

#plt.show()
plt.savefig('../plots/mag_dist_data_distr.pdf')

In [None]:
g = sns.jointplot(x=dists, y=np.log(pga_vs), kind="hex", color="#4CB391", joint_kws=dict(gridsize=20)) #, joint_kws=dict(gridsize=10))
g.fig.set_size_inches((12, 8))
plt.xlabel('Distance [km]')
plt.ylabel('Log(PGA)')

plt.subplots_adjust(left=0.2, right=0.8) #, top=0.75, bottom=0.25)  # shrink fig so cbar is visible
cbar_ax = g.fig.add_axes([0.85, 0.12, 0.02, 0.705])  # x, y, width, height
cb = plt.colorbar(cax=cbar_ax)
cb.set_label(r'Number of Points',fontsize=13)

# plt.title('Distance and Magnitude Distributions of Japenese Dataset')
plt.savefig('../plots/dist_pga_data_distr.pdf')

In [None]:
data_file = '../data/japan/waveforms.npy'
attr_file = '../data/japan/attributes.csv'
batch_size = 32
sample_rate = 20

condv_names = ['dist', 'mag', 'vs30']

f = np.load(data_file)
num_samples = len(f)
del f

# get all indexes
ix_all = np.arange(num_samples)

dataset = SeisData(
        data_file=data_file,
        attr_file=attr_file,
        batch_size=batch_size,
        sample_rate=sample_rate,
        v_names=condv_names,
        isel=ix_all,
    )

In [None]:
cond_var_bins = get_cond_var_bins(dataset, 10, True)

dist_bins = cond_var_bins['dist_bins']
mag_bins = cond_var_bins['mag_bins']

n_dist_bins = len(dist_bins) - 1 
n_mag_bins = len(mag_bins) - 1

In [None]:
plt.plot(dataset.wfs[25926, :], lw=0.5)

In [None]:
num_plots = 7

plt.figure()
fig, ax = plt.subplots(num_plots, 1, sharex='col', figsize=(10, 15))
fig.add_subplot(111, frameon=False)
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
plt.grid(False)

tt = 0.05 * np.arange(1000)

dist_border = [dist_bins[0], dist_bins[9]]
mag_border = [mag_bins[1], mag_bins[2]]

wfs, c_norms, df_s, means, n_obs = get_waves_real_bin(dataset, dist_border, mag_border)
idx = np.linspace(0, df_s.index.size-1, num_plots, dtype=int)
real_data = wfs * c_norms.reshape(-1, 1)
df_s = df_s.sort_values('dist')

for i in range(num_plots):
    location = df_s.iloc[idx[i]].name

    dist = dataset.df_meta['dist'][location]
    mag = dataset.df_meta['mag'][location]
    waveform = dataset.wfs[location, :] * dataset.cnorms[location].reshape(-1)

    ax[i].plot(tt, waveform, color=colors[1], lw=0.5, label=f'Signal Number {location}')
    ax[i].set_title(f"Dist: {dist:.1f}, Mag: {mag:.1f}") #, vs30: {sdat_train.df_meta.loc[waveform_i, 'vs30']:.1f}")
    
    low, high = ax[i].get_ylim()
    bound = max(abs(low), abs(high))
    ax[i].set_ylim(-bound, bound)
    ax[i].set_title(f'Dist: {dist:.1f}, Mag: {mag:.1f}')
    
    # ax[i].legend(loc=4)
    plt.subplots_adjust(hspace=0.5)

ax[6].set_xlabel('Time [s]')
ax[3].set_ylabel('Amplitude')
plt.suptitle(f'Obs: {n_obs}, Dist: [{dist_border[0]:.1f}, {dist_border[1]:.1f}], Mag: [{mag_border[0]:.2f}, {mag_border[1]:.2f}]')
plt.savefig('../plots/real_data_samples_dist.pdf')

In [None]:
;

In [None]:
num_plots = 7

plt.figure()
fig, ax = plt.subplots(num_plots, 1, figsize=(10, 15))

tt = 0.05 * np.arange(1000)

dist_border = [dist_bins[5], dist_bins[6]]
mag_border = [mag_bins[0], mag_bins[9]]

wfs, c_norms, df_s, means, n_obs = get_waves_real_bin(dataset, dist_border, mag_border)
idx = np.linspace(0, df_s.index.size-1, num_plots, dtype=int)
real_data = wfs * c_norms.reshape(-1, 1)

for i in range(num_plots):
    location = df_s.iloc[idx[i]].name

    dist = dataset.df_meta['dist'][location]
    mag = dataset.df_meta['mag'][location]
    waveform = dataset.wfs[location, :] * dataset.cnorms[location].reshape(-1)

    ax[i].plot(tt, waveform, color=colors[1], lw=0.5, label=f'Signal Number {location}')
    ax[i].set_title(f"Dist: {dist:.1f}, Mag: {mag:.1f}") #, vs30: {sdat_train.df_meta.loc[waveform_i, 'vs30']:.1f}")
    
    low, high = ax[i].get_ylim()
    bound = max(abs(low), abs(high))
    ax[i].set_ylim(-bound, bound)
    ax[i].set_title(f'Dist: {dist:.1f}, Mag: {mag:.1f}')
    
    # ax[i].legend(loc=4)
    plt.subplots_adjust(hspace=1.0)

plt.suptitle(f'Obs: {n_obs}, Dist: [{dist_border[0]:.1f}, {dist_border[1]:.1f}], Mag: [{mag_border[0]:.2f}, {mag_border[1]:.2f}]')
ax[6].set_xlabel('Time [s]')
ax[3].set_ylabel('Amplitude')
plt.savefig('../plots/real_data_samples_mag.pdf')

In [None]:
;

In [None]:
plt.figure()
fig, axs = plt.subplots(3, 2, sharex='col', sharey='all',
                        gridspec_kw={'hspace': 0.2, 'wspace': 0.05},
                        figsize=(20,12),
                        )
fig.add_subplot(111, frameon=False)
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
plt.grid(False)


dt = 0.05
noise_dim = 100
cp = sns.color_palette('Reds', 10)
grf = rand_noise(1, noise_dim, device=device)

p_x = 0
p_y = 0

for i, distbucket in enumerate([0, 4, 8]):
    for j in range(n_mag_bins):
        if j % 2 != 0:
            continue

        dist_border = [cond_var_bins['dist_bins'][distbucket], cond_var_bins['dist_bins'][distbucket+1]]
        mag_border = [cond_var_bins['mag_bins'][j], cond_var_bins['mag_bins'][j+1]]
        wfs, c_norms, df_s, means, n_obs = get_waves_real_bin(dataset, dist_border, mag_border)

        samples = n_obs
        dt = 0.05

        dist = means['dist']
        mag = means['mag']
        dist_max = dataset.df_meta['dist'].max()    
        mag_max = dataset.df_meta['mag'].max()

        vc_list = [
            dist / dist_max * torch.ones(samples, 1).cuda(),
            mag / mag_max * torch.ones(samples, 1).cuda(),
        ]

        grf = rand_noise(1, noise_dim, device=device)
        random_data = grf.sample(n_obs)
        syn_data, syn_scaler = loaded_model(random_data, *vc_list)
        syn_data = syn_data.squeeze().detach().cpu().numpy()
        syn_data = syn_data * syn_scaler.detach().cpu().numpy()
        
        synthetic_data_log = np.log(np.abs(np.array(syn_data + 1e-10)))
        sd_mean = np.mean(synthetic_data_log, axis=0)

        y = np.exp(sd_mean)
        
        nt = y.shape[0]
        tt = dt * np.arange(0, nt)
        axs[p_x, p_y].semilogy(tt, y, '-' , color=cp[int(j/2)+3], label=f'Dist: [{dist_border[0]:.1f}, {dist_border[1]:.1f}] km, Mag: [{mag_border[0]:.1f}, {mag_border[1]:.1f}]', alpha=0.8, lw=0.5)
        axs[p_x, p_y].legend(loc=4)
    p_x += 1

cp = sns.color_palette('Greys', 10)
p_y += 1
p_x = 0
for i, distbucket in enumerate([0, 4, 8]):
    for j in range(n_mag_bins):
        if j % 2 != 0:
            continue

        dist_border = [cond_var_bins['dist_bins'][distbucket], cond_var_bins['dist_bins'][distbucket+1]]
        mag_border = [cond_var_bins['mag_bins'][j], cond_var_bins['mag_bins'][j+1]]
        
        wfs, c_norms, df_s, means, n_obs = get_waves_real_bin(dataset, dist_border, mag_border)
        dist = means['dist']
        mag = means['mag']
        real_data = wfs * c_norms.reshape(-1, 1)
        real_data_log = np.log(np.abs(np.array(real_data + 1e-10)))
        rd_mean = np.mean(real_data_log, axis=0)
        y = np.exp(rd_mean)

        nt = real_data.shape[1]
        tt = dt * np.arange(0, nt)
        axs[p_x, p_y].semilogy(tt, y, '-' , color=cp[int(j/2)+3], label=f'Dist: [{dist_border[0]:.1f}, {dist_border[1]:.1f}] km, Mag: [{mag_border[0]:.1f}, {mag_border[1]:.1f}]', alpha=0.8, lw=0.5)
        axs[p_x, p_y].legend(loc=4)
    
    p_x += 1


axs[0,0].set_title('Synthetic Data')
axs[0,1].set_title('Real Data')
axs[0,0].set_ylim(bottom=10e-7)
axs[1, 0].set_ylabel('Log-Amplitude')
plt.xlabel('Time [s]')


plt.savefig('../plots/syn_real_data_fix_dist.pdf')

In [None]:
plt.figure()
fig, axs = plt.subplots(3, 2, sharex='col', sharey='all',
                        gridspec_kw={'hspace': 0.2, 'wspace': 0.05},
                        figsize=(20,12),
                        )
fig.add_subplot(111, frameon=False)
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
plt.grid(False)


dt = 0.05
noise_dim = 100
cp = sns.color_palette('Reds', 10)
grf = rand_noise(1, noise_dim, device=device)

p_x = 0
p_y = 0

for i, magbucket in enumerate([1, 5, 9]):
    for j in range(n_dist_bins):

        if j % 2 != 0:
            continue

        dist_border = [cond_var_bins['dist_bins'][j], cond_var_bins['dist_bins'][j+1]]
        mag_border = [cond_var_bins['mag_bins'][magbucket], cond_var_bins['mag_bins'][magbucket+1]]
        wfs, c_norms, df_s, means, n_obs = get_waves_real_bin(dataset, dist_border, mag_border)

        samples = n_obs
        dt = 0.05

        dist = means['dist']
        mag = means['mag']
        dist_max = dataset.df_meta['dist'].max()    
        mag_max = dataset.df_meta['mag'].max()

        vc_list = [
            dist / dist_max * torch.ones(samples, 1).cuda(),
            mag / mag_max * torch.ones(samples, 1).cuda(),
        ]

        grf = rand_noise(1, noise_dim, device=device)
        random_data = grf.sample(n_obs)
        syn_data, syn_scaler = loaded_model(random_data, *vc_list)
        syn_data = syn_data.squeeze().detach().cpu().numpy()
        syn_data = syn_data * syn_scaler.detach().cpu().numpy()
        
        synthetic_data_log = np.log(np.abs(np.array(syn_data + 1e-10)))
        sd_mean = np.mean(synthetic_data_log, axis=0)

        y = np.exp(sd_mean)
        
        nt = y.shape[0]
        tt = dt * np.arange(0, nt)
        axs[p_x, p_y].semilogy(tt, y, '-' , color=cp[int(j/2)+3], label=f'Dist: [{dist_border[0]:.1f}, {dist_border[1]:.1f}] km, Mag: [{mag_border[0]:.1f}, {mag_border[1]:.1f}]', alpha=0.8, lw=0.5)
        axs[p_x, p_y].legend(loc=4)
    p_x += 1

cp = sns.color_palette('Greys', 10)
p_y += 1
p_x = 0
for i, magbucket in enumerate([1, 5, 9]):
    for j in range(n_dist_bins):

        if j % 2 != 0:
            continue

        dist_border = [cond_var_bins['dist_bins'][j], cond_var_bins['dist_bins'][j+1]]
        mag_border = [cond_var_bins['mag_bins'][magbucket], cond_var_bins['mag_bins'][magbucket+1]]
        # mag_border = [mag_bins[j], mag_bins[j+1]]
        wfs, c_norms, means, n_obs = get_waves_real_bin(dataset, dist_border, mag_border)
        dist = means['dist']
        mag = means['mag']
        real_data = wfs * c_norms.reshape(-1, 1)
        real_data_log = np.log(np.abs(np.array(real_data + 1e-10)))
        rd_mean = np.mean(real_data_log, axis=0)
        y = np.exp(rd_mean)

        nt = real_data.shape[1]
        tt = dt * np.arange(0, nt)
        axs[p_x, p_y].semilogy(tt, y, '-' , color=cp[int(j/2)+3], label=f'Dist: [{dist_border[0]:.1f}, {dist_border[1]:.1f}] km, Mag: [{mag_border[0]:.1f}, {mag_border[1]:.1f}]', alpha=0.8, lw=0.5)
        axs[p_x, p_y].legend(loc=4)
        
    p_x += 1


axs[0,0].set_title('Synthetic Data')
axs[0,1].set_title('Real Data')
axs[0,0].set_ylim(bottom=10e-7)
axs[1, 0].set_ylabel('Log-Amplitude')
plt.xlabel('Time [s]')

plt.savefig('../plots/syn_real_data_fix_mag.pdf')