# Source models for Nath & Thingbaijam (2012)

Read the source description input files from the online supplementary material and write them to XML.

In [None]:
%matplotlib inline
%load_ext autoreload

In [None]:
import os

%autoreload 2
import source_model_tools as smt

import numpy as np
import pandas as pd
import toolbox as tb
import lxml.etree as et
import matplotlib.pyplot as plt
from StringIO import StringIO

import hmtk.sources as src
from openquake.hazardlib import tom

from hmtk.plotting.mapping import HMTKBaseMap
from hmtk.parsers.source_model.nrml04_parser import nrmlSourceModelParser

In [None]:
layer_ids = [1, 2, 3, 4]
layer_depths_km = [0, 25, 70, 180, 300]
min_mags = [4.5, 5.5]

model_path = 'nath2012probabilistic'
polygon_file_template = os.path.join(model_path,'polygonlay%d.txt')
seismicity_file_template = os.path.join(model_path,'seismicitylay%d.txt')
aux_file = 'auxiliary data.csv'
smoothed_data_template = os.path.join(model_path,'lay%dsmooth%.1f.txt')
areal_source_model = 'areal_source_model.xml'
smoothed_model_template = 'smoothed_source_mmin%.1f_model.xml'
all_source_data_file = areal_source_model.replace('.xml','.tsv')

polygon_files = [polygon_file_template % i for i in layer_ids] 
seismicity_files = [seismicity_file_template % i for i in layer_ids] 
smoothed_files = [[smoothed_data_template % (i, m) for i in layer_ids] 
                  for m in min_mags]
smoothed_model_files = [smoothed_model_template % m for m in min_mags]

In [None]:
def read_polygons(file_name):
    """
    Read polygon descriptions from text file into pandas.DataFrame. 
    
    File format as per Nath & Thingbaijam (2012).
    """
    with open(file_name) as f:
        keys = f.readline().strip().split(',')
        result = []
        for line in f.readlines():
            values = line.strip().split(',', len(keys) - 1)
            entry = {}
            for key, value in zip(keys, values):
                key = key.strip('[]')
                value = value.strip('[]').replace(';', '\n')
                if key == 'zoneid':
                    value = int(value)
                else:
                    value = np.genfromtxt(StringIO(value), delimiter=',')
                entry[key] = value
            result.append(entry)
    return pd.DataFrame.from_dict(result)

In [None]:
all_source_data = pd.DataFrame()
all_polygons = pd.DataFrame()
loop_data = zip(layer_ids, seismicity_files, polygon_files, 
                layer_depths_km[:-1], layer_depths_km[1:])
for i, seismicity_file, polygon_file, z_min, z_max in loop_data:
        
    # read seismicity and polygons
    layer_seismicity = pd.read_csv(seismicity_file)
    layer_polygons = read_polygons(polygon_file)
    
    # fill in depths, specify source mechanisms, clean up dip & rake
    n_zones = len(layer_seismicity)
    idx = layer_seismicity.index
    layer_seismicity['zmin'] = pd.Series(np.full(n_zones, z_min), index=idx)
    layer_seismicity['zmax'] = pd.Series(np.full(n_zones, z_max), index=idx)
    layer_seismicity['layerid'] = pd.Series(np.full(n_zones, i), index=idx)
    layer_seismicity['rake'] = tb.wrap(layer_seismicity['rake'])
    layer_seismicity['mechanism'] = pd.Series(
        smt.focal_mechanisms(layer_seismicity['dip'], layer_seismicity['rake']), index=idx)
    layer_seismicity.loc[layer_seismicity['dip'] == -1, 'dip'] = 45
    layer_seismicity.loc[layer_seismicity['mechanism'] == 'undefined', 'rake'] = 90
    layer_seismicity.loc[layer_seismicity['strike'] == -1, 'strike'] = 0
    layer_seismicity['mmin'] = pd.Series(np.full(n_zones, min_mags[0]), index=idx) 
    
    # put it all together
    layer_source_data = pd.merge(layer_seismicity, layer_polygons, on='zoneid')
    all_source_data = pd.concat([all_source_data, layer_source_data], ignore_index=True)
    all_polygons = pd.concat([all_polygons, layer_polygons], ignore_index=True)

aux_data = pd.read_csv(aux_file)
aux_data = aux_data.drop(['zmin','zmax','dip','rake','mechanism'], axis=1)
all_source_data = pd.merge(all_source_data, aux_data, on='zoneid')

all_source_data = all_source_data.set_index(all_source_data['zoneid'])

# write everything to kml
for i in set(all_source_data['layerid']):
    indices = all_source_data['layerid'] == i
    smt.df_to_kml(all_source_data[indices], 'layer %d' % i)

In [None]:
# twin certain zones to have megathrust
if not any(all_source_data['tectonic subregion'] == 'subduction interface megathrust'):
    indices = ((all_source_data['tectonic subregion'] == 'subduction interface')
               & (all_source_data['mmax'] > 7.5))
    sub_inter_zones = all_source_data[indices].copy()

    all_source_data.loc[indices, 'mmax'] = 7.5
    sub_inter_zones.loc[indices, 'mmin'] = 7.5
    sub_inter_zones['tectonic subregion'] = 'subduction interface megathrust'
    sub_inter_zones['zoneid'] = (sub_inter_zones['zoneid'] % 1000) + 1000
    all_source_data = pd.concat([all_source_data, sub_inter_zones])
    all_source_data = all_source_data.set_index(all_source_data['zoneid'])

In [None]:
# add some binwise sesismicity rates for comparison
for m in range(5, 8):
    m_lo = np.maximum(all_source_data['mmin'], m)
    m_hi = np.minimum(all_source_data['mmax'], m + 1)
    log_N_m_lo = all_source_data['avalue'] - all_source_data['bvalue']*m_lo
    log_N_m_hi = all_source_data['avalue'] - all_source_data['bvalue']*m_hi
    log_N_bin = np.log10(10**log_N_m_lo - 10**log_N_m_hi).round(2)
    series_name = 'logN_%.1f-%.1f' % (m, m +1)
    all_source_data[series_name] = pd.Series(log_N_bin, index=all_source_data.index)

In [None]:
# show a summary
drop_columns = ['polygon coordinates', 'tectonic zone', 'region', 
                'concerns', 'layerid', 'zmin',
                'aspect ratio', 'dip', 'rake', 'strike',
                'logN_5.0-6.0', 'logN_6.0-7.0']
pd.concat([all_source_data.drop(drop_columns, axis=1).head(),
           all_source_data.drop(drop_columns, axis=1).tail()])

In [None]:
# write a template to a file, to fill in missing info
#keep = ['zoneid','zmin','zmax','strike','dip','rake']
#for i, aux_file in zip(layer_ids, aux_files):
#    layer_data = sources_data[sources_data['layerid'] == i]
#    layer_data[keep].to_csv(aux_file)

In [None]:
# write NRML source file
sources = smt.df_to_source_list(all_source_data)
source_model = src.source_model.mtkSourceModel(
    identifier='1', name=model_path + ' areal', sources=sources)
source_model.serialise_to_nrml(areal_source_model)

In [None]:
# write source data to TSV file
all_source_data.to_csv(all_source_data_file, sep='\t')

In [None]:
class PseudoCatalogue:
    """
    ugly hack: construct pseudo-cataloge from average mechanisms
    """
    def __init__(self, source_model, select_depth='all'):
        data = []
        oq_sources = source_model.convert_to_oqhazardlib(tom.PoissonTOM(1.0))
        for source in oq_sources:
            
            longitude = np.mean(source.polygon.lons)
            latitude = np.mean(source.polygon.lats)
            strike = source.nodal_plane_distribution.data[0][1].strike
            dip = source.nodal_plane_distribution.data[0][1].dip
            rake = source.nodal_plane_distribution.data[0][1].rake
            magnitude = source.get_min_max_mag()[1]*2.5
            depth = source.hypocenter_distribution.data[0][1]
            name = source.id
            
            if select_depth == 'all' or depth == select_depth:
                data.append({'longitude': longitude, 'latitude': latitude,
                             'strike1': strike, 'dip1': dip, 'rake1': rake,
                             'magnitude': magnitude, 'depth': depth, 'id': name})
        self.data = pd.DataFrame(data)

    def get_number_tensors(self):
        return len(self.data.magnitude)

catalogue = PseudoCatalogue(source_model)

In [None]:
map_config = {"min_lon": 60, "max_lon": 105, 
              "min_lat": 0,  "max_lat": 40, "resolution": "l",
              "parallel_meridian_spacing": 5}
parser = nrmlSourceModelParser(areal_source_model)

for depth in sorted(list(set(catalogue.data['depth']))):
    basemap = HMTKBaseMap(map_config, '')

    source_model_read = parser.read_file('Areal Source Model')
    selected_sources = [source for source in source_model_read.sources 
                    if source.hypo_depth_dist.data[0][1] == depth]
    source_model_read.sources = selected_sources    
    selected_catalogue = PseudoCatalogue(source_model_read)

    basemap.add_source_model(source_model_read, overlay=True) 
    basemap.add_focal_mechanism(selected_catalogue, magnitude=False)
    for _, item in selected_catalogue.data.iterrows():
        plt.annotate(s=item.id, xy=(item.longitude, item.latitude))
    plt.savefig("ArealModel%gkmDepth.png" % depth, dpi=300,
                transparent=True, bbox_inches='tight', pad_inches=0.1)