# Source models for Nath & Thingbaijam (2012)

Read the source description input files from the online supplementary material and write them to XML.

In [None]:
%matplotlib inline
%load_ext autoreload

In [None]:
import os

%autoreload 2
import source_model_tools as smt

import numpy as np
import pandas as pd
import toolbox as tb
import lxml.etree as et
import matplotlib.pyplot as plt
from StringIO import StringIO
from IPython.display import display

import hmtk.sources as src
from openquake.hazardlib import tom, geo

from hmtk.plotting.mapping import HMTKBaseMap
from hmtk.parsers.source_model.nrml04_parser import nrmlSourceModelParser

pd.options.display.width = 120

In [None]:
layer_ids = [1, 2, 3, 4]
layer_depths_km = [0, 25, 70, 180, 300]
min_mags = [4.5, 5.5]

model_path = 'nath2012probabilistic'
polygon_file_template = os.path.join(model_path,'polygonlay%d.txt')
seismicity_file_template = os.path.join(model_path,'seismicitylay%d.txt')
polygon_files = [polygon_file_template % i for i in layer_ids] 
seismicity_files = [seismicity_file_template % i for i in layer_ids] 

aux_file = 'auxiliary data.csv'

smoothed_data_template = os.path.join(model_path,'lay%dsmooth%.1f.txt')
smoothed_data_files = [[smoothed_data_template % (i, m) 
                        for i in layer_ids] for m in min_mags]

In [None]:
def read_polygons(file_name):
    """
    Read polygon descriptions from text file into pandas.DataFrame. 
    
    File format as per Nath & Thingbaijam (2012).
    """
    with open(file_name) as f:
        keys = f.readline().strip().split(',')
        result = []
        for line in f.readlines():
            values = line.strip().split(',', len(keys) - 1)
            entry = {}
            for key, value in zip(keys, values):
                key = key.strip('[]')
                value = value.strip('[]').replace(';', '\n')
                if key == 'zoneid':
                    value = int(value)
                else:
                    value = np.genfromtxt(StringIO(value), delimiter=',')
                entry[key] = value
            result.append(entry)
    return pd.DataFrame.from_dict(result)

In [None]:
# read areal polygons and seismicity statistics for each layer
areal_source_df = pd.DataFrame()
areal_polygons_df = pd.DataFrame()
loop_data = zip(layer_ids, seismicity_files, polygon_files, 
                layer_depths_km[:-1], layer_depths_km[1:])
for layer_id, seismicity_file, polygon_file, z_min, z_max in loop_data:
        
    # read seismicity and polygons
    layer_seis_df = pd.read_csv(seismicity_file)
    layer_poly_df = read_polygons(polygon_file)
    
    # fill in depths, specify source mechanisms, clean up dip & rake
    n_zones = len(layer_seis_df)
    idx = layer_seis_df.index
    layer_seis_df['zmin'] = pd.Series(np.full(n_zones, z_min), index=idx)
    layer_seis_df['zmax'] = pd.Series(np.full(n_zones, z_max), index=idx)
    layer_seis_df['layerid'] = pd.Series(np.full(n_zones, layer_id), index=idx)
    layer_seis_df['rake'] = tb.wrap(layer_seis_df['rake'])
    layer_seis_df['mechanism'] = pd.Series(
        smt.focal_mechanisms(layer_seis_df['dip'], layer_seis_df['rake']), index=idx)
    layer_seis_df.loc[layer_seis_df['dip'] == -1, 'dip'] = 45
    layer_seis_df.loc[layer_seis_df['mechanism'] == 'undefined', 'rake'] = 90
    layer_seis_df.loc[layer_seis_df['strike'] == -1, 'strike'] = 0
    layer_seis_df['mmin'] = pd.Series(np.full(n_zones, min_mags[0]), index=idx) 
    
    # put it all together
    layer_source_df = pd.merge(layer_seis_df, layer_poly_df, on='zoneid')
    areal_source_df = pd.concat([areal_source_df, layer_source_df], ignore_index=True)
    areal_polygons_df = pd.concat([areal_polygons_df, layer_poly_df], ignore_index=True)

# merge auxiliary data (crucially, tectonic zones)
aux_df = pd.read_csv(aux_file)
aux_df = aux_df.drop(['zmin','zmax','dip','rake','mechanism'], axis=1)
areal_source_df = pd.merge(areal_source_df, aux_df, on='zoneid')

areal_source_df = smt.sort_and_reindex(areal_source_df)

In [None]:
areal_source_model_file = 'areal_source_model'
# write everything to TSV file
areal_output_df = smt.add_binwise_rates(
    smt.twin_source_by_magnitude(areal_source_df))
areal_output_df.to_csv(areal_source_model_file + '.tsv', sep='\t')

In [None]:
# write NRML source file with added megathrust zones
areal_source_list = smt.source_df_to_list(
    smt.twin_source_by_magnitude(areal_source_df))
areal_source_model = src.source_model.mtkSourceModel(
    identifier='1', 
    name=model_path + ' areal', 
    sources=areal_source_list)
areal_source_model.serialise_to_nrml(areal_source_model_file + '.xml')

In [None]:
# write each layer to KML with added binwise rates
areal_kml_df = smt.add_binwise_rates(areal_source_df)
for layer_id in layer_ids:
    indices = areal_kml_df['layerid'] == layer_id
    temp_df = areal_kml_df.drop(['layerid'], axis=1)
    smt.source_df_to_kml(temp_df.loc[indices,:], 
        '%s layer %d' % (areal_source_model_file, layer_id))

In [None]:
# show a summary including megathrust zones and bin statistics
drop_columns = ['polygon coordinates', 'tectonic zone', 'region', 
                'concerns', 'zmax', 'zmin',
                'aspect ratio', 'dip', 'rake', 'strike',
                'logN_5.0-6.0', 'logN_6.0-7.0']
display(pd.concat([areal_output_df.drop(drop_columns, axis=1).head(),
                   areal_output_df.drop(drop_columns, axis=1).tail()]))

In [None]:
areal_source_df[areal_source_df['tectonic subregion'] == 'no seismicity'].drop(['polygon coordinates'], axis=1)

In [None]:
# read smoothed seismicity data
smoothed_df_list = []
for mag, file_names in zip(min_mags, smoothed_data_files):
    
    layer_smoothed_df_list = []
    for layer_id, smoothed_file in zip(layer_ids, file_names):
        layer_smoothed_df = pd.read_csv(smoothed_file)
        #layer_smoothed_df.rename(columns={'nu4_5':'nu'}, inplace=True)
        #layer_smoothed_df.rename(columns={'nu5_5':'nu'}, inplace=True)
        layer_smoothed_df.rename(columns={'lat':'latitude'}, inplace=True)
        layer_smoothed_df.rename(columns={'lon':'longitude'}, inplace=True)
        
        # figure out what area each point is in
        layer_smoothed_df['layerid'] = layer_id
        layer_smoothed_df['mmin'] = mag
        layer_smoothed_df_list.append(layer_smoothed_df)
    
    layer_smoothed_df = pd.concat(layer_smoothed_df_list)
    smoothed_df_list.append(layer_smoothed_df)
    
smoothed_df = pd.concat(smoothed_df_list)
smoothed_df = smt.sort_and_reindex(smoothed_df)

display(pd.concat((smoothed_df.head(), smoothed_df.tail())))

In [None]:
# add information from smoothed seismicity from areal sources
columns_to_copy = ['zoneid', 'zmax', 'zmin', 'tectonic subregion',
                   'avalue', 'bvalue', 'stdbvalue', 'mmax', 'stdmmax',
                   'rake', 'dip', 'strike', 'aspect ratio', 'msr']

distances = np.full((len(smoothed_df),len(areal_source_df)), np.inf)
for i, area_series in areal_source_df.iterrows():

    at_depth = (smoothed_df['layerid'] == area_series['layerid']).values
    lons = smoothed_df.loc[at_depth, 'longitude'].values
    lats = smoothed_df.loc[at_depth, 'latitude'].values
    mesh = geo.mesh.Mesh(lons, lats)
    poly_coords = area_series['polygon coordinates']
    points = [geo.point.Point(lat, lon) for lat, lon in poly_coords]
    polygon = geo.polygon.Polygon(points)
    distances[at_depth, i] = polygon.distances(mesh)

index_min = distances.argmin(axis=1)
for i, area_series in areal_source_df.iterrows():
    picked = i == index_min
    for column in columns_to_copy:
        smoothed_df.loc[picked, column] = area_series[column]
    above_min = area_series['mmin'] > smoothed_df['mmin']
    smoothed_df.loc[picked & above_min, 'mmin'] = area_series['mmin']

In [None]:
pd.concat((smoothed_df[~smoothed_df['zoneid'].isnull()].head(),
           smoothed_df[~smoothed_df['zoneid'].isnull()].tail()))

In [None]:
display(smoothed_df[(smoothed_df['longitude'] == 98) & (smoothed_df['latitude'] == 3.7)])

In [None]:
smoothed_source_data_file = 'smoothed_source_model'
# write everything to TSV file
smoothed_output_df = smt.add_binwise_rates(
    smt.twin_source_by_magnitude(smoothed_df))
smoothed_output_df.to_csv(smoothed_source_data_file + '.tsv', sep='\t')

In [None]:
# write NRML source files with megathrust sources twinned
for min_mag in min_mags:
    indices = ((smoothed_df['mmin'] == min_mag) &
               ~smoothed_df['tectonic subregion'].isnull()) 
    smoothed_source_list = smt.source_df_to_list(
        smt.twin_source_by_magnitude(smoothed_df.loc[indices, :]))
    smoothed_source_model = src.source_model.mtkSourceModel(
        identifier='1', 
        name='%s smoothed m_min=%g' % (model_path, min_mag), 
        sources=smoothed_source_list)
    file_name = smoothed_source_data_file + '_mmin%g.xml' % min_mag 
    smoothed_source_model.serialise_to_nrml(file_name)

In [None]:
# write each layer to KML with added binwise rates
smoothed_kml_df = smt.add_binwise_rates(smoothed_df)
smoothed_kml_df.drop(['zmax','zmin','aspect ratio'], axis=1, inplace=True)
for min_mag in reversed(min_mags):
    for layer_id in reversed(layer_ids):
        indices = ((smoothed_df['mmin'] == min_mag) &
                   (smoothed_df['layerid'] == layer_id))
        temp_df = smoothed_kml_df.drop(['mmin','layerid'], axis=1)
        smt.source_df_to_kml(temp_df.loc[indices, :], 
            '%s layer %d mmin %g' % (smoothed_source_data_file, layer_id, min_mag))

In [None]:
class PseudoCatalogue:
    """
    ugly hack: construct pseudo-cataloge from average mechanisms
    """
    def __init__(self, source_model, select_depth='all'):
        data = []
        oq_sources = source_model.convert_to_oqhazardlib(tom.PoissonTOM(1.0))
        for source in oq_sources:
            
            longitude = np.mean(source.polygon.lons)
            latitude = np.mean(source.polygon.lats)
            strike = source.nodal_plane_distribution.data[0][1].strike
            dip = source.nodal_plane_distribution.data[0][1].dip
            rake = source.nodal_plane_distribution.data[0][1].rake
            magnitude = source.get_min_max_mag()[1]*2.5
            depth = source.hypocenter_distribution.data[0][1]
            name = source.id
            
            if select_depth == 'all' or depth == select_depth:
                data.append({'longitude': longitude, 'latitude': latitude,
                             'strike1': strike, 'dip1': dip, 'rake1': rake,
                             'magnitude': magnitude, 'depth': depth, 'id': name})
        self.data = pd.DataFrame(data)

    def get_number_tensors(self):
        return len(self.data.magnitude)

catalogue = PseudoCatalogue(areal_source_model)

In [None]:
map_config = {"min_lon": 60, "max_lon": 105, 
              "min_lat": 0,  "max_lat": 40, "resolution": "l",
              "parallel_meridian_spacing": 5}
parser = nrmlSourceModelParser(areal_source_model_file + '.xml')

for depth in sorted(list(set(catalogue.data['depth']))):
    basemap = HMTKBaseMap(map_config, '')

    source_model_read = parser.read_file('Areal Source Model')
    selected_sources = [source for source in source_model_read.sources 
                    if source.hypo_depth_dist.data[0][1] == depth]
    source_model_read.sources = selected_sources    
    selected_catalogue = PseudoCatalogue(source_model_read)

    basemap.add_source_model(source_model_read, overlay=True) 
    basemap.add_focal_mechanism(selected_catalogue, magnitude=False)
    for _, item in selected_catalogue.data.iterrows():
        plt.annotate(s=item.id, xy=(item.longitude, item.latitude))
    plt.savefig("ArealModel%gkmDepth.png" % depth, dpi=300,
                transparent=True, bbox_inches='tight', pad_inches=0.1)

In [None]:
(210 - 30)/2