# Source model uncertainty for Nath & Thingbaijam (2012)

OpenQuake isn't up to the task of enumerating $3^{222} \approx 0.83$ million googols of logic tree branches, so we will collapse all $m_{max}$ and $b$ variation for each areal zone into one discrete distribution.

In [None]:
%matplotlib inline
%load_ext autoreload

In [None]:
import os

import numpy as np
import pandas as pd
import toolbox as tb
import lxml.etree as et
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.colors import LogNorm
from StringIO import StringIO
from IPython.display import display

%autoreload 2
import source_model_tools as smt

%autoreload 2
import logic_tree_tools as ltt

import hmtk.sources as src
from openquake.hazardlib import tom, geo

from hmtk.plotting.mapping import HMTKBaseMap
from hmtk.parsers.source_model.nrml04_parser import nrmlSourceModelParser
from openquake.hazardlib.mfd.truncated_gr import TruncatedGRMFD

In [None]:
float(3**222)/10**106

In [None]:
# key file names
areal_source_model_tsv = 'areal_source_model_no_twin.tsv'
min_mags = [4.5, 5.5]
smoothed_source_models_tsv = [
    'nath2012probabilistic_smoothed_source_model_mmin_%g.tsv' % min_mag
    for min_mag in min_mags]
source_tree_tsv = '../Logic Trees/areal_model_logic_tree.tsv'

In [None]:
# read areal model data from TSV file
source_zones_df = pd.read_csv(areal_source_model_tsv, sep='\t')
layer_ids = sorted(list(set(source_zones_df['layerid'])))

drop_columns = [
    'tectonic zone', 'concerns', 'zmax', 'zmin',
    'polygon coordinates', 'zoneid', 'mechanism', 'name', 'aspect ratio', 
    'smoothed N 4.5', 'smoothed N 5.5', 'smoothed a 4.5', 'smoothed a 5.5', 
    'smoothed lambda 4.5', 'smoothed lambda 5.5', 'areal lambda 4.5', 'areal lambda 5.5',
    'equiv a 4.5', 'equiv a 5.5',
]
display(pd.concat([source_zones_df.drop(drop_columns, axis=1).head(),
                   source_zones_df.drop(drop_columns, axis=1).tail()]))

In [None]:
source_tree_symbolic_df = ltt.read_tree_tsv(source_tree_tsv)        
display(source_tree_symbolic_df)

In [None]:
collapsed_df, reduced_df, all_rates, all_weights, labels = \
    ltt.collapse_sources(source_zones_df, source_tree_symbolic_df)

In [None]:
fig, axes = plt.subplots(len(layer_ids), 1, figsize=(8, 4*len(layer_ids)), sharex=True)
fig.subplots_adjust(hspace=0.05)
for layer_id, ax in zip(layer_ids, axes):
    fig.sca(ax)
    tb.annotate('layer %d' % layer_id)
    layer_df = collapsed_df[collapsed_df['layerid'] == layer_id]
    for _, zone in layer_df.iterrows():
        num_bins = zone['occurRates'].size
        mags = zone['mmin'] + zone['magBin']*np.arange(num_bins + 1)
        rates = zone['occurRates'][[0] + range(num_bins)]
        ax.step(mags, rates, label=zone['id'])
    ax.set_yscale('log')
    ax.set_ylabel('Annual Occurence Rate')
    ax.legend(bbox_to_anchor=(1, 0.5), loc='center left', 
              frameon=False, labelspacing=0, ncol=3)

axes[-1].set_xlabel('Moment Magnitude, $M_w$')
fig.savefig('MeanOccurrenceRatesAllZones.pdf', transparent=True, bbox_inches='tight', pad_inches=0.1)

In [None]:
reduced_df

In [None]:
zoneids = ['z1', 'z924', 'z93', 'z137', 'z913', 'z915', 'z132']
zones_rates = [rates for rates, zoneid in zip(all_rates, collapsed_df['id']) 
              if zoneid in zoneids]
zones_df = collapsed_df.loc[[zone['id'] in zoneids
           for _, zone in collapsed_df.iterrows()]]

fig, axes = plt.subplots(len(zoneids), 1, figsize=(8, 4*len(zoneids)), 
                         sharex=True)
fig.subplots_adjust(hspace=0.05)
for zone_rates, (_, zone), ax \
        in zip(zones_rates, zones_df.iterrows(), axes):
    fig.sca(ax)
    tb.annotate(r'%s: $b = %g \pm %g$, $m_{max} = %g \pm %g$' % 
                (zone['id'], zone['b'], zone['stdb'], zone['mmax'], zone['stdmmax']))
    num_bins = zone_rates.shape[0]
    mags = zone['mmin'] + zone['magBin']*np.arange(num_bins + 1)
    rates = zone_rates[[0] + range(num_bins), :]
    for rate, label in zip(rates.T, labels):
        ax.step(mags, rate.tolist(), label=label)
    ax.step(mags, zone['occurRates'][[0] + range(num_bins)], 
            linewidth=2, color='black', label='mean')
    ax.set_yscale('log')
    ax.set_ylabel('Annual Occurence Rate')

axes[0].legend(bbox_to_anchor=(1, 0.5), loc='center left', 
          frameon=False, labelspacing=0)
axes[-1].set_xlabel('Moment Magnitude, $M_w$')
fig.savefig('MeanOccurrenceRatesSelectedZones.pdf', transparent=True, bbox_inches='tight', pad_inches=0.1)

In [None]:
def display_and_save(name, df):
    print name + ':'
    display(df)
    df.to_csv(name.replace(' ','_') + '.csv')

In [None]:
display_and_save('Selected zones of interest', 
                 zones_df.drop(drop_columns + ['occurRates'], axis=1))

In [None]:
display_and_save('Top 5 b-value uncertainty', 
    collapsed_df.sort(columns='stdb', ascending=False).drop(drop_columns + ['occurRates'], axis=1).head())

In [None]:
display_and_save('Top 3 mmax uncertainty', 
    collapsed_df.sort(columns='stdmmax', ascending=False).drop(drop_columns + ['occurRates'], axis=1).head(3))

In [None]:
display_and_save('Top 3 mmax', 
    collapsed_df.sort(columns='mmax', ascending=False).drop(drop_columns + ['occurRates'], axis=1).head(3))

In [None]:
display_and_save('Bottom 3 mmax', 
    collapsed_df.sort(columns='mmax', ascending=True).drop(drop_columns + ['occurRates'], axis=1).head(3))

In [None]:
display_and_save('Bottom 5 a-value', 
    collapsed_df.sort(columns='a', ascending=True).drop(drop_columns + ['occurRates'], axis=1).head())

In [None]:
display_and_save('Top 3 a-value', 
    collapsed_df.sort(columns='a', ascending=False).drop(drop_columns + ['occurRates'], axis=1).head(3))

In [None]:
display_and_save('Top 3 b-value', 
    collapsed_df.sort(columns='b', ascending=False).drop(drop_columns + ['occurRates'], axis=1).head(3))

In [None]:
display_and_save('Bottom 3 b-value', 
    collapsed_df.sort(columns='b', ascending=True).drop(drop_columns + ['occurRates'], axis=1).head(3))

In [None]:
twinned_df = smt.twin_source_by_magnitude(collapsed_df)

In [None]:
display(collapsed_df[collapsed_df['zoneid'] == 1].drop(drop_columns + ['region'], axis=1))
collapsed_df[collapsed_df['zoneid'] == 1]['occurRates'].values

In [None]:
display(twinned_df[(twinned_df['zoneid'] == '1') | (twinned_df['zoneid'] == '1m')].drop(drop_columns + ['region'], axis=1))
display(twinned_df[twinned_df['zoneid'] == '1']['occurRates'].values)
display(twinned_df[twinned_df['zoneid'] == '1m']['occurRates'].values)

In [None]:
# write result to NRML
areal_source_list = smt.source_df_to_list(smt.sort_and_reindex(smt.add_name_id(twinned_df)))
areal_source_model = src.source_model.mtkSourceModel(
    identifier='areal', 
    name='areal collapsed', 
    sources=areal_source_list)
areal_source_model.serialise_to_nrml('areal_collapsed.xml')

In [None]:
for smoothed_source_model_tsv, min_mag in zip(smoothed_source_models_tsv, min_mags):
    smoothed_source_df = pd.read_csv(smoothed_source_model_tsv, sep='\t')
    smoothed_collapsed_df = ltt.collapse_sources(smoothed_source_df, source_tree_symbolic_df)[0]
    smoothed_source_list = smt.source_df_to_list(smt.add_name_id(smt.twin_source_by_magnitude(smoothed_collapsed_df)))
    
    areal_source_model = src.source_model.mtkSourceModel(
        identifier=str(min_mag), 
        name='nath2012probabilistic smoothed collapsed', 
        sources=smoothed_source_list)
    areal_source_model.serialise_to_nrml('nath2012probabilistic_smoothed_collapsed_mmin%g.xml' % min_mag)