# Sharma et al. (2009)

In which the GMPE of Sharma et al. (2009) is implemented. First coefficient tables are reprocessed for cut & paste in to the .py source code. Then, key figures in the original paper are reproduced for validation. Finally, test vectors are produced for automatic code verification using unittest.

In [None]:
%matplotlib inline
%load_ext autoreload

In [None]:
import os
import warnings
import importlib
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from matplotlib.offsetbox import AnchoredText
from scipy.constants import g

import toolbox as tb
%autoreload 2
import gmpe_tools as gt

from openquake.hazardlib import gsim, imt, const

from openquake.hazardlib.gsim.sharma_2009 import SharmaEtAl2009

In [None]:
df_coeffs = pd.DataFrame.from_csv('Tables/Table2.csv', index_col=None)
df_coeffs

In [None]:
sharma_file = 'correspondence/sharma coefficients.csv'
df_coeffs2 = pd.DataFrame.from_csv(sharma_file, index_col=None)
df_coeffs2

In [None]:
# how peculiar that there is just one repeated row (except for sigma) ...
# no mention of coefficients being reused for 0.1 and 0.2 s in the paper ...
df_coeffs2.diff() == 0

In [None]:
coefficients = ['b%d' for i in range(1,7)]
df_coeffs.plot(x='T', y=['b1', 'b2', 'b3', 'b5', 'b6'], figsize=(6, 8), grid=True)
plt.axhspan(-0.1, 0.1, color='0.5', alpha=0.5)
plt.savefig('Sharma_coefficients.pdf', dpi=300, bbox_inches='tight')

In [None]:
with open('Table2.txt','w') as f:
    f.write(df_coeffs[['T', 'b1', 'b2', 'b3', 'b5', 'b6', 'sigma']].to_string(index=False))

In [None]:
%autoreload 2
gmpe = SharmaEtAl2009()
print(type(gmpe).__name__)
print('Supported tectonic region: %s' 
      % gmpe.DEFINED_FOR_TECTONIC_REGION_TYPE)
print('Supported intensity measure types: %s' 
      % ', '.join([item.__name__ for item 
                   in gmpe.DEFINED_FOR_INTENSITY_MEASURE_TYPES]))
print('Supported component: %s' 
      % gmpe.DEFINED_FOR_INTENSITY_MEASURE_COMPONENT)
print('Supported standard deviations: %s' 
      % ', '.join([item for item              
                   in gmpe.DEFINED_FOR_STANDARD_DEVIATION_TYPES]))
print('Required site parameters: %s' 
      % ', '.join([item for item in gmpe.REQUIRES_SITES_PARAMETERS]))
print('Required rupture parameters: %s' 
      % ', '.join([item for item in gmpe.REQUIRES_RUPTURE_PARAMETERS]))
print('Required distance parameters: %s' 
      % ', '.join([item for item in gmpe.REQUIRES_DISTANCES]))

In [None]:
def add_dummy_variables(gmpe, df_means):
    sctx = gsim.base.SitesContext()
    sctx.vs30 = df_means['site_vs30'].values
    S = gmpe.get_site_type_dummy_variables(sctx)
    df_means['S'] = S.astype('float')

    H = [gmpe.get_fault_type_dummy_variables(tb.Structure(rake=rake)) 
         for rake in df_means['rup_rake'].astype('float')]
    df_means['H'] = np.array(H).astype('float')

In [None]:
# generate data for Figure 6
mags = np.array([5., 6., 7.])
rakes = np.array([0., 90.]) # degrees
distances = tb.logspace(1, 200, 6) # km
vs30s = np.array([500., 2000.]) # m/s
im_types = [imt.SA(0.04, 5)]

df_means, df_stddevs = gt.compute_gmpe(gmpe, mags, rakes, distances, 
                                       vs30s, im_types)
add_dummy_variables(gmpe, df_means)

pd.concat((df_means.head(), df_means.tail()))

In [None]:
# generate data for Figures 7-9
mags = np.array([5., 6., 7.])
rakes = np.array([0., 90.]) # degrees
distances = np.array([10., 50., 100.]) # km
vs30s = np.array([500., 2000.]) # m/s
im_types = [imt.SA(T, 5) for T in df_coeffs['T']]

df_means2, df_stddevs2 = gt.compute_gmpe(gmpe, mags, rakes, distances, vs30s, im_types)
add_dummy_variables(gmpe, df_means2)

pd.concat((df_means2.head(), df_means2.tail()))

In [None]:
is_normal = np.array([False])
is_reverse = np.array([False])
is_strike_slip = (~is_reverse).astype(float)
is_strike_slip[is_normal] = np.nan

In [None]:
print(is_strike_slip, type(is_strike_slip), is_strike_slip.size)

In [None]:
pd.concat((df_stddevs2.head(), df_stddevs2.tail()))

In [None]:
# check that warning is raised for normal faulting
rctx = gsim.base.RuptureContext()
sctx = gsim.base.SitesContext()
dctx = gsim.base.DistancesContext()

rctx.mag = np.array([6.])
rctx.rake = np.array([-90.])
dctx.rjb = np.array([100.])
sctx.vs30 = np.array([1000.])

with warnings.catch_warnings(record=True) as w:
    # Cause all warnings to always be triggered.
    warnings.simplefilter('always')
    # Trigger a warning.
    mean, [stddev] = gmpe.get_mean_and_stddevs(
        sctx, rctx, dctx, im_types[0], [const.StdDev.TOTAL])
    # Verify some things
    assert len(w) == 1
    assert issubclass(w[-1].category, UserWarning)
    assert 'not supported' in str(w[-1].message).lower()
    assert np.all(np.isnan(mean))


In [None]:
# produce Figure 6
digitized_template = 'digitized/M%d_S%d_H%d_%gs.csv'

Ss = sorted(list(set(df_means['S'])))
Hs = sorted(list(set(df_means['H'])))
Rjbs = sorted(list(set(df_means['dist_rjb'])))
fig, axes = plt.subplots(len(Ss), len(Hs), figsize=(8,6),
                         sharex=True, sharey=True)
plt.subplots_adjust(wspace=0.075, hspace=0.1)
for i, S in enumerate(Ss):
    for j, H in enumerate(Hs):
        ax = axes[i,j]
        df_axes = df_means[
            (df_means['S'] == S) &
            (df_means['H'] == H)]
        axis_label = 'S=%g\nH=%g' % (S, H)
        ax.add_artist(AnchoredText(axis_label, loc=1, 
                                          frameon=False))

        for mag in set(df_axes['rup_mag']):
            df_trace = df_axes[df_axes['rup_mag'] == mag]
                
            trace_label = 'M=%g' % mag
            h = ax.loglog(df_trace['dist_rjb'], 
                           df_trace[im_types[0].period]*g, 
                       label=trace_label, alpha=0.5)
            
            digitized_file = digitized_template % (
                mag, S, H, im_types[0].period)
            if os.path.exists(digitized_file):
                data = np.genfromtxt(digitized_file, delimiter=',')
                ax.plot(data[:,0], data[:,1], 
                         color=h[0].get_color(), marker='x', 
                         linestyle='none', alpha=0.5)
            else:
                print '%s not available' % digitized_file
                
for ax in axes:
    ax[0].set_xlim((min(Rjbs), max(Rjbs)))
    ax[0].set_ylim((0.1, 5))
for ax in axes[1,:]:
    ax.set_xlabel('Joyner-Boore Distance [km]')
for ax in axes[:,0]:
    ax.set_ylabel('SA(T=%g s, $\\xi$=%g%%) [m/s$^2$]' 
          % (im_types[0].period, im_types[0].damping))
axes[1,0].legend(loc='lower left', labelspacing=0, fontsize=10, 
                 frameon=False)
plt.savefig('Figure_6_computed.pdf', dpi=300, bbox_inches='tight')

In [None]:
# produce Figures 7-9
digitized_template = 'digitized/M%d_S%d_H%d_%gkm.csv'

Ss = sorted(list(set(df_means2['S'])))
Hs = sorted(list(set(df_means2['H'])))
Rjbs = sorted(list(set(df_means2['dist_rjb'])))
fig, axes = plt.subplots(len(Rjbs), 1, figsize=(6,10), sharex=True)
plt.subplots_adjust(hspace=0.1)
for ax, dist in zip(axes, Rjbs):
    
    df_axes = df_means2[df_means2['dist_rjb'] == dist]
    axis_label = '%g km' % dist
    ax.add_artist(AnchoredText(axis_label, loc=2, frameon=False))
    
    S = 0
    for H in Hs:
        for mag in set(df_axes['rup_mag']):
            df_trace = df_axes[
                (df_axes['rup_mag'] == mag) &
                (df_axes['S'] == S) & 
                (df_axes['H'] == H)]
                
            trace_label = 'M=%g, S=%d, H=%d' % (mag, S, H)
            if df_trace.size == 0:
                print 'No data found for', trace_label
            
            data_trace = df_trace.loc[:,np.array(map(tb.is_numeric, df_trace.keys()))].T
            T = [float(item) for item in data_trace.index]
            SA = data_trace.values*g
            h = ax.plot(T, SA, label=trace_label, alpha=0.5)
            
            digitized_file = digitized_template % (mag, S, H, dist)
            if os.path.exists(digitized_file):
                data = np.genfromtxt(digitized_file, delimiter=',')
                ax.plot(data[:,0], data[:,1],
                        color=h[0].get_color(), marker='x', 
                        linestyle='none', alpha=0.5)

            else:
                print '%s not available' % digitized_file
                
for ax in axes:
    ax.set_xlim((0, max(T)))
    ax.set_ylim((0, ax.get_ylim()[1]))
axes[-1].set_xlabel('Period [s]')
for ax in axes:
    ax.set_ylabel('SA($\\xi$=%g%%) [m/s$^2$]' % (im_types[0].damping))
axes[0].legend(loc='upper right', labelspacing=0, fontsize=10, frameon=False)
plt.savefig('Figures_7-9_computed.pdf', dpi=300, bbox_inches='tight')

In [None]:
# produce plot of estimated residuals
vs30s = sorted(list(set(df_stddevs2['site_vs30'])))
rakes = sorted(list(set(df_stddevs2['rup_rake'])))
Rjbs = sorted(list(set(df_stddevs2['dist_rjb'])))
fig, ax = plt.subplots(1, 1)

dist = Rjbs[0]   
df_axes = df_stddevs2[df_stddevs2['dist_rjb'] == dist]

vs30 = vs30s[0]
rake = rakes[0]
mag = sorted(list(set(df_axes['rup_mag'])))[0]
df_trace = df_axes[
    (df_axes['rup_mag'] == mag) &
    (df_axes['site_vs30'] == vs30) & 
    (df_axes['rup_rake'] == rake)]

trace_label = 'M=%g, vs30=%d, rake=%d' % (mag, vs30, rake)
if df_trace.size == 0:
    print 'No data found for', trace_label

data_trace = df_trace.loc[:,np.array(map(tb.is_numeric, df_trace.keys()))].T
T = [float(item) for item in data_trace.index]
sigma = data_trace.values
plt.plot(T, sigma, label=trace_label, alpha=0.5)
                            
ax.set_xlim((0, max(T)))
ax.set_ylim((0, ax.get_ylim()[1]))
ax.set_xlabel('Period [s]')
ax.set_ylabel('Total standard deviation $\sigma$')
plt.savefig('Sigma_computed.pdf', dpi=300, bbox_inches='tight')

In [None]:
# produce result file for unittest
test_path = '/home/nick/src/python/GEM/oq-hazardlib/openquake/hazardlib/tests/gsim/data/SDBK09/'
means_file = os.path.join(test_path, 'SDBK09_MEAN.csv')
stddev_file = os.path.join(test_path, 'SDBK09_STD_TOTAL.csv')

# for the standard deviations we must generate the test result file ourselves
df_stddevs2.columns = [str(item) for item in df_stddevs2.columns]
df_stddevs2.to_csv(stddev_file, index=False, float_format='%.7g')

In [None]:
# load dataset provided by authors
means_file = os.path.join(test_path, 'SDBK09_MEAN.csv')
df_ref = pd.read_csv(means_file)
df_ref = gt.df_massage(df_ref)

mags = np.sort(np.array(list(set(df_ref['rup_mag']))))
rakes = np.sort(np.array(list(set(df_ref['rup_rake']))))
#rakes = rakes[rakes >= 0]
Rjbs = np.sort(np.array(list(set(df_ref['dist_rjb']))))
vs30s = np.sort(np.array(list(set(df_ref['site_vs30']))))
Ts = np.sort(np.array([float(item) for item in 
     df_ref.loc[:,np.array(map(tb.is_numeric, df_ref.keys()))]]))
im_types = [imt.SA(T) for T in Ts]

print mags, rakes, Rjbs, vs30s
print Ts

In [None]:
df_new, _ = gt.compute_gmpe(gmpe, mags, rakes, Rjbs, vs30s, im_types)
df_new = gt.df_massage(df_new)
df_new = df_new[df_ref.columns]

In [None]:
df_ref.head()

In [None]:
df_new.head()

In [None]:
n_traces = len(rakes)*len(vs30s)*len(mags)
color_cycle = plt.cm.jet(np.linspace(0, 1, n_traces))

fig, axes = plt.subplots(len(Rjbs), 1, figsize=(6,15), sharex=True)
plt.subplots_adjust(hspace=0.1)
for i, dist in enumerate(Rjbs):
    
    fig.sca(axes[i])
    axis_label = 'rjb=%g' % dist
    fig.gca().add_artist(AnchoredText(axis_label, loc=1, frameon=False))
    fig.gca().set_color_cycle(color_cycle)
   
    for rake in rakes:
        for vs30 in vs30s:
            for mag in mags:
                trace_label = 'mag=%g, rake=%g, vs30=%g' % (mag, rake, vs30)
                
                df_trace = df_ref[
                    (df_ref['dist_rjb'] == dist) &
                    (df_ref['rup_mag'] == mag) &
                    (df_ref['rup_rake'] == rake) & 
                    (df_ref['site_vs30'] == vs30)]

                if df_trace.size == 0:
                    print 'No reference found for', trace_label

                data_trace = df_trace.loc[:,np.array(map(tb.is_numeric, df_trace.keys()))].T
                SA = np.reshape(data_trace.values.T, (-1,))*g
                h = plt.plot(Ts, SA, label=trace_label, alpha=0.5)
                
                df_trace2 = df_new[
                    (df_new['dist_rjb'] == dist) &
                    (df_new['rup_mag'] == mag) &
                    (df_new['rup_rake'] == rake) & 
                    (df_new['site_vs30'] == vs30)]
                
                if df_trace2.size == 0:
                    print 'No new value found for', trace_label

                data_trace2 = df_trace2.loc[:,np.array(map(tb.is_numeric, df_trace2.keys()))].T
                SA2 = np.reshape(data_trace2.values.T, (-1,))*g
                plt.plot(Ts, SA2, linestyle='none', marker='x', 
                         color=h[0].get_color(), alpha=0.5)
                
for ax in axes:
    ax.set_xlim((0, max(Ts)))
    ax.set_ylim((0, ax.get_ylim()[1]))
axes[-1].set_xlabel('Period [s]')
for ax in axes:
    ax.set_ylabel('SA($\\xi$=%g%%) [m/s$^2$]' % (im_types[0].damping))
axes[0].legend(loc='upper left', labelspacing=0, fontsize=10, bbox_to_anchor=(1, 1))
plt.savefig('Reference_result.png', dpi=300, bbox_inches='tight')

In [None]:
# summarize the largest discrepancies
threshhold = 1e-8

ne_stacked = (tb.df_compare(df_new, df_ref) > threshhold).stack()
changed = ne_stacked[ne_stacked]
changed.index.names = ['index', 'column']

difference_locations = np.where(tb.df_compare(df_new, df_ref) > threshhold)
changed_from = df_new.values[difference_locations]
changed_to = df_ref.values[difference_locations]
pd.DataFrame({'from': changed_from, 'percent': 100*(changed_to/changed_from - 1)}, index=changed.index)