In [1]:
# use the environment of py-gplate
import sys
import gplately
import numpy as np
import gplately.pygplates as pygplates
from gplately import ptt
import glob, os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.patches as mpatches
from matplotlib import gridspec
import cartopy.crs as ccrs
from plate_model_manager import PlateModelManager
import json

# directory to the aspect Lab
ASPECT_LAB_DIR = os.environ['ASPECT_LAB_DIR']
sys.path.append(ASPECT_LAB_DIR)
RESULT_DIR = os.path.join(ASPECT_LAB_DIR, 'results')
import shilofue.GPlateLib as GPlateLib
# import utilities in subdirectiory
sys.path.append(os.path.join(ASPECT_LAB_DIR, 'utilities', "python_scripts"))
import Utilities


### Workflow for Subduction Dataset from Plate Reconstruction

#### General workflow

This workflow defines the steps to query a subduction plate dataset based on plate reconstruction data.

##### Step 1: Loading Plate Reconstruction Data
To extract additional properties such as **plate name**, **start time**, and **end time**, it is necessary to load the plate reconstruction files.

##### Step 2: Fixing Subducting Plate Age
To accurately determine the age of the subducting plate, we must ensure that the points are correctly pinned to the subducting plate. This involves two workflows:

1. **Automatic Workflow**: The process attempts to automatically pin the points to the subducting plate.
2. **Manual Workflow**: If the automatic workflow produces invalid values, the manual workflow is used to correct these errors.

The results of both workflows are recorded in a CSV file, which can be loaded for further analysis.

##### Step 3: Extracting Key Information
The ultimate goal of the workflow is to pin sample points to the global subduction zones and extract the following information:

- **Location of points**: Geographic coordinates of the sample points.
- **Length of the arc**: The arc length of the subduction zone.
- **Plate age**: The age of the subducting plate at the pinned location.
- **Convergence rate**: The rate of convergence between the plates.
- **Trench mobility**: The movement of the trench over time.
- **Trench PID**: The process ID (PID) for the trench, as used in the reconstruction.
- **Subduction PID**: The process ID (PID) for the subduction zone, as used in the reconstruction.

> **Note**: The **trench PID** and **subduction PID** correspond to the IDs used in the plate reconstruction process.


#### To get a good dataset for a time step

1. **Select the Automatic Workflow and get a global dataset**:
   - Use the global dataset with the automatic workflow to initiate the data processing. This is a good reference dataset to refine points of respective trenches


2. **Extract a Local Dataset**:
    - Look for a id for a trench. Put that in the workflow to extract a single trench
    - When we are satisfied, damp the data in to a finalized dataset

3. **Analyze the Finalzed Dataset** 


### Initialization

First assign a reconstruction time

Here I need a file to lookup ids and names of the subductions for each reconstruction time.    

In [None]:
# assign a reconstruction time
reconstruction_time=0 # time of reconstruction, must be integar

# enter the directory of the plate reconstruction files and check
dir_re = os.path.join(ASPECT_LAB_DIR, "dtemp/gplate_export_test0/Muller_etal_2019_PlateBoundaries_no_topologies")
assert(os.path.isdir(dir_re))

# initiate the class
GClass = GPlateLib.GPLATE_CLASS()
GParseReconstruction = GPlateLib.PARSERECONSTRUCTION()


GClass.SetReconstructionTime(reconstruction_time)
GClass.Reconstruct()

infile = os.path.join(dir_re, "reconstructed_%.2dMa.xy" % reconstruction_time)
GParseReconstruction.ReadFile(infile)

### Search a Single Subduction Zone


#### Search with a key word

In [None]:
print(GParseReconstruction.trench_names)

keyword = "ryu"

matching_indices = [i for i, name in enumerate(GParseReconstruction.trench_names) if keyword.lower() in name.lower()]
for index in matching_indices:
    print(index)
    print("name: ", GParseReconstruction.trench_names[index])
    print("id: ", GParseReconstruction.trench_pids[index])
    print("")

#### Search with a trench id

In order to do this, first lookup in a global dataset. Then use the following block to query for the data

##### t = 0
12001, ,CAS
686, Indonesian bndy w AUS-mg, ANDA-SUM
651, Flores Banda SZ, JAVA
669, North Sulawesi Subduction, SULA
612, Luzon subduction, LUZ
678, Philippine trench, PHIL
648, Okinawa Trough (Ryuku) from EarthByte cob MG 4-20-07

In [None]:
trench_pid = 648
_name = GParseReconstruction.LookupNameByPid(trench_pid)
print(_name)

# get the data of this subduction zone
one_subduction_data = GClass.GetOneSubductionByTrenchId(trench_pid)

# basic plots
# plot the reconstructed zone
fig = plt.figure(figsize=(10,6), dpi=100)
ax = fig.add_subplot(111, projection=ccrs.Mollweide(central_longitude = 180))
gl=ax.gridlines(color='0.7',linestyle='--', xlocs=np.arange(-180,180,15), ylocs=np.arange(-90,90,15))
gl.left_labels = True
plt.title(f'{reconstruction_time} Ma')
# you may change the extent to global to see the sample points in a world map.
ax.set_global()
# ax.set_extent([-80,0,-70,0])
# plot the coastline
GClass.PlotCoastlines(ax)
#GClass.gplot.plot_trenches(ax, color='k')
#GClass.gplot.plot_subduction_teeth(ax, color='k')
# plot the seafloor age
im_age = GClass.PlotSeaFloorAges(ax)

# plot the subduction zone
im_sub = ax.scatter(one_subduction_data.lon, one_subduction_data.lat, marker=".", s=3, c='r', transform=ccrs.PlateCarree())

# set plot options
cbar_age = plt.colorbar(im_age) # colorbar for ages
cbar_age.ax.get_yaxis().labelpad = 15
cbar_age.ax.set_ylabel("Age (Ma)", rotation=90)
cbar = plt.colorbar(im_sub) # colorbar for trenches
cbar.ax.get_yaxis().labelpad = 15
# cbar.ax.set_ylabel('Trench Velocity Magnitude (in cm/yr)', rotation=90) # choose between these two labels to use for trenches
cbar.ax.set_ylabel('Convergence Velocity Magnitude (in cm/yr)', rotation=90)

### Workflow to extract a dataset

#### Defining the Methodology

In this section, we define the methodology for working with the subduction dataset.

1. **Loading Data**: 
   - You can choose to either load an existing CSV file containing previously saved data or start the process from scratch.

2. **Sampling Trenches**:
   - You can choose between two sampling methods:
     - **Sample all trenches at once**: This option allows you to sample all trenches in a single operation.
     - **Sample a specific trench**: Alternatively, you can focus on sampling a single trench for more granular analysis.

In [3]:
# resample by a give arc length edge and resample section
arc_length_edge = 0.0; arc_length_resample_section = 2.0  # by degree
# arc_length_edge = 2.0; arc_length_resample_section = 2.0  # by degree

use_recorded_file = True; resample_all = True; trench_pid = None # use this option to start from a recorded file
# use_recorded_file = False; resample_all = True; trench_pid = None
# use_recorded_file = False; resample_all = False; trench_pid = 648

# resample the subduction zones
recorded_file = os.path.join(ASPECT_LAB_DIR, "files", "ThDSubduction", "gplate_json_files", "subduction_resampled_t_%.2e.csv" \
                         % (reconstruction_time))
# recorded_file = os.path.join(ASPECT_LAB_DIR, "dtemp", "gplate_export_test0", "subduction_resampled_t_%.2e_edge_%.2f_section_%.2f.csv" \
#                            % (reconstruction_time, arc_length_edge, arc_length_resample_section))

#### Extract from the dataset

In [None]:
if use_recorded_file:
    print("use recorded file: ", recorded_file)
    assert(os.path.isfile(recorded_file))
    subduction_data_resampled = pd.read_csv(recorded_file) 
else:
    if resample_all:
        subduction_data_resampled = GClass.ResampleAllSubduction(arc_length_edge, arc_length_resample_section)
    else:
        subduction_data_resampled, _ = GClass.ResampleSubductionById(trench_pid, arc_length_edge, arc_length_resample_section)
    subduction_data_resampled['age'] = [np.nan for i in range(len(subduction_data_resampled))]
    subduction_data_resampled['lon_fix'] = [np.nan for i in range(len(subduction_data_resampled))]
    subduction_data_resampled['lat_fix'] = [np.nan for i in range(len(subduction_data_resampled))]
    subduction_data_resampled['fix_age_polarity'] = [np.nan for i in range(len(subduction_data_resampled))]
    # todo_ages
    subduction_data_resampled['marker'] = [np.nan for i in range(len(subduction_data_resampled))]
    subduction_data_resampled['marker_fill'] = ['none' for i in range(len(subduction_data_resampled))]
    subduction_data_resampled['color'] = [np.nan for i in range(len(subduction_data_resampled))]

    # fix the ages
    GClass.FixTrenchAge(subduction_data_resampled)

#### Fix the invalid ages and plot the dataset in a global overview

In [None]:
# get the invalid indexes
invalid_indexes = []
for i in range(len(subduction_data_resampled['age'])):
    if np.isnan(subduction_data_resampled['age'][i]):
        invalid_indexes.append(i)

print("len(ages): ")
print(len(subduction_data_resampled['age']))
print("ages: ")
print(subduction_data_resampled['age'])
print("invalid_indexes: ")
print(invalid_indexes)

# generate an output file for fixing the ages
fix_invalid_ds = [-1 for i in range(len(invalid_indexes))]
fix_invalid_thetas = [-1 for i in range(len(invalid_indexes))]
# fix_invalid_ds[0] = 200e3; fix_invalid_thetas[0] = 180.0


# get the original pint
subduction_data_resampled_local = None
if len(invalid_indexes) > 0:
    subduction_data_resampled_local = pd.DataFrame([subduction_data_resampled.iloc[i]])
# get the trench id
for i_i in range(len(invalid_indexes)):
    i = invalid_indexes[i_i]
    d = fix_invalid_ds[i_i]
    theta = fix_invalid_thetas[i_i]
    subduction_data_resampled_local.lon, subduction_data_resampled_local.lat = \
        Utilities.map_point_by_distance(subduction_data_resampled.iloc[i].lon, subduction_data_resampled.iloc[i].lat, theta, d)
    new_age = GClass.InterpolateAgeGrid(subduction_data_resampled_local)
    print(i, ": new age - ", new_age)
    subduction_data_resampled.loc[i, 'age'] = new_age
    if new_age is not np.nan:
        subduction_data_resampled['lon_fix'][i] = subduction_data_resampled_local.lon
        subduction_data_resampled['lat_fix'][i] = subduction_data_resampled_local.lat


# export the file to a temp file
if resample_all:
    temp_file = os.path.join(ASPECT_LAB_DIR, "dtemp", "gplate_export_test0", "subduction_resampled_t_%.2e_edge_%.2f_section_%.2f.csv" \
                            % (reconstruction_time, arc_length_edge, arc_length_resample_section))
else:
    temp_file = os.path.join(ASPECT_LAB_DIR, "dtemp", "gplate_export_test0", "subduction_resampled_t_%.2e_pid_%d_edge_%.2f_section_%.2f.csv" \
                         % (reconstruction_time, int(trench_pid), arc_length_edge, arc_length_resample_section))

# don't mess up the existing files
if not use_recorded_file:
    subduction_data_resampled.to_csv(temp_file)
    print("Data saved to %s" % temp_file)

In [None]:
# basic plots
# plot the reconstructed zone
fig = plt.figure(figsize=(10,6), dpi=100)
ax = fig.add_subplot(111, projection=ccrs.Mollweide(central_longitude = 180))
gl=ax.gridlines(color='0.7',linestyle='--', xlocs=np.arange(-180,180,15), ylocs=np.arange(-90,90,15))
gl.left_labels = True
plt.title(f'{reconstruction_time} Ma')
# you may change the extent to global to see the sample points in a world map.
ax.set_global()
# ax.set_extent([-80,0,-70,0])
# plot the coastline
GClass.PlotCoastlines(ax)
# plot the seafloor age
im_age = GClass.PlotSeaFloorAges(ax)
# plot all subduction zones and subduction teeth
# im_sub = GPlateLib.plot_one_subduction_data(ax, GClass.GetSubductionData())
GClass.gplot.plot_trenches(ax, color='k')
GClass.gplot.plot_subduction_teeth(ax, color='k')

# plot all the fixed ages
mask = (~subduction_data_resampled['age'].isna())
ax.scatter(subduction_data_resampled[mask].lon, subduction_data_resampled[mask].lat, marker=".", s=60, c='r', transform=ccrs.PlateCarree())
ax.scatter(subduction_data_resampled[~mask].lon, subduction_data_resampled[~mask].lat, marker=".", s=60, c='y', transform=ccrs.PlateCarree())
ax.scatter(subduction_data_resampled[mask].lon_fix, subduction_data_resampled[mask].lat_fix, marker=".", s=30, c='c', transform=ccrs.PlateCarree())

# set plot options
cbar_age = plt.colorbar(im_age) # colorbar for ages
cbar_age.ax.get_yaxis().labelpad = 15
cbar_age.ax.set_ylabel("Age (Ma)", rotation=90)

# write outputs
fileout = os.path.join(RESULT_DIR, "gplate_subduction_zones", "subduction_resampled_t_%.2e_edge_%.2f_section_%.2f.pdf" \
                         % (reconstruction_time, arc_length_edge, arc_length_resample_section))
fileout1 = os.path.join(RESULT_DIR, "gplate_subduction_zones", "subduction_resampled_t_%.2e_edge_%.2f_section_%.2f.png" \
                         % (reconstruction_time, arc_length_edge, arc_length_resample_section))
if not (os.path.isdir(os.path.dirname(fileout))):
    os.mkdir(os.path.dirname(fileout))
fig.savefig(fileout)
fig.savefig(fileout1)
print("Save figure: %s" % fileout)
print("Save figure: %s" % fileout1)

#### Fix the invalid values

First, open the recorded file under

    files/ThDSubduction/gplate_json_files/

If no file is recorded yet, copy the file under this folder first

    dtemp/gplate_export_test0/

Then map out the invalid ages from the outputs from the previous section.

Use the polarity entry to twik the result of ages. There are two things to do:

    1. switch between 0.0 and 1.0 to query the point to different side of the trench
    2. use a 2.0 value and manually assign the point to use (lon_fix, lat_fix)

The next section is for interact with existing results and improve on it.

In [None]:
# fix the invalid age manually
i = 8
theta = 240.0; # 525.9224 - 360.0 + 70.0 # None: use the azimuth angle instead
d = 1700e3 # m

# get the original pint
subduction_data_resampled_local = pd.DataFrame([subduction_data_resampled.iloc[i]])
# get the trench id
trench_id = subduction_data_resampled.iloc[i].trench_pid
if theta is None:
    theta = subduction_data_resampled.iloc[i].trench_azimuth_angle + 180.0

# migrate the point and interpolate age
subduction_data_resampled_local.lon, subduction_data_resampled_local.lat = \
    Utilities.map_point_by_distance(subduction_data_resampled.iloc[i].lon, subduction_data_resampled.iloc[i].lat, theta, d)
new_age = GClass.InterpolateAgeGrid(subduction_data_resampled_local)
print("theta: %.4f, d: %.4e" % (theta, d))
print("lon: %.4f, lat: %.4f" % (subduction_data_resampled_local.lon, subduction_data_resampled_local.lat))
print("new_age: ", new_age)

# get the data of this subduction zone
one_subduction_data = GClass.GetOneSubductionByTrenchId(trench_id)

# basic plots
# plot the reconstructed zone
fig = plt.figure(figsize=(10,6), dpi=100)
ax = fig.add_subplot(111, projection=ccrs.Mollweide(central_longitude = 180))
gl=ax.gridlines(color='0.7',linestyle='--', xlocs=np.arange(-180,180,15), ylocs=np.arange(-90,90,15))
gl.left_labels = True
plt.title(f'{reconstruction_time} Ma')
# you may change the extent to global to see the sample points in a world map.
ax.set_global()
# ax.set_extent([-80,0,-70,0])
# plot the coastline
GClass.PlotCoastlines(ax)
# plot the seafloor age
im_age = GClass.PlotSeaFloorAges(ax)

# plot the subduction zone
im_sub = GPlateLib.plot_one_subduction_data(ax, one_subduction_data)
# plot the original point
im_sub_point =ax.scatter(subduction_data_resampled.iloc[i].lon, subduction_data_resampled.iloc[i].lat, marker=".", s=60, c='r', transform=ccrs.PlateCarree())
# plot the migrated point
im_sub_point =ax.scatter(subduction_data_resampled_local.lon, subduction_data_resampled_local.lat, marker=".", s=30, c='c', transform=ccrs.PlateCarree())

# set plot options
cbar_age = plt.colorbar(im_age) # colorbar for ages
cbar_age.ax.get_yaxis().labelpad = 15
cbar_age.ax.set_ylabel("Age (Ma)", rotation=90)
cbar = plt.colorbar(im_sub) # colorbar for trenches
cbar.ax.get_yaxis().labelpad = 15
# cbar.ax.set_ylabel('Trench Velocity Magnitude (in cm/yr)', rotation=90) # choose between these two labels to use for trenches
cbar.ax.set_ylabel('Convergence Velocity Magnitude (in cm/yr)', rotation=90)

### Analyze extracted data set

We plot the convergence / trench retreat rate of trenches below.
The plot is also combined with a sea floor age

Issue: We get a lot of nan value in ages
1. check the position of the sample points
2. plot the raster of oceanic plate age

#### Some notes

* ANDA-SUM: The change from trench retreat to advance (as in L05) is missing
* JAVA: essientially, I am fixing the ages to one point (on the corner, above Australia); The trench velocity is retreat, instead of advance (L05)
* LUZ: gplately data suggest advance of 2.5, while L05 shows fast retreat motion
* Ryuku: data is missing in the current dataset, fixing using values form Table 1 in Lallemend et al., 2005

Here we generate a variation of the previous plot, by differentiating valid age values and invalid age values.

In [None]:
# Dictionary containing plot options for subduction zones by ID
# The dictionary keys represent subduction zone IDs, and the values specify

from matplotlib.path import Path
# the marker style, face color, and name associated with that ID.
# this definition of snowflake initially has an error in the "code" part
verts = [
    (0., 0.),   # Center
    (0.2, 0.6), # Upper arm
    (0., 0.),   # Center
    (0.4, 0.4), # Right diagonal
    (0., 0.),   # Center
    (0.6, 0.2), # Right arm
    (0., 0.),   # Center
    (0.4, -0.4),# Right down diagonal
    (0., 0.),   # Center
    (0.2, -0.6),# Bottom arm
    (0., 0.),   # Center
    (-0.4, -0.4),# Left down diagonal
    (0., 0.),   # Center
    (-0.6, -0.2),# Left arm
    (0., 0.),   # Center
    (-0.4, 0.4),# Left diagonal
    (0., 0.),   # Center
    (-0.2, 0.6),# Upper left arm
]

codes = [Path.MOVETO] + [Path.LINETO, Path.MOVETO] * 8 + [Path.MOVETO]

snowflake = Path(verts, codes)

plot_options = \
{
    903: {"marker": 'o',  "markerfacecolor": "yellow", "name": "CAS"},
    511: {"marker": 's',  "markerfacecolor": "yellow", "name": "ANDA-SUM"},
    801: {"marker": 'd',  "markerfacecolor": "yellow", "name": "JAVA"},
    645: {"marker": snowflake,  "markerfacecolor": "black", "name": "SULA"},
    602: {"marker": 'x',  "markerfacecolor": "blue", "name": "LUZ"},
    608: {"marker": 's',  "markerfacecolor": 'c', "name": "PHIL"}
}

# Create a figure and two subplots for plotting trench velocity data.
# `gs` defines a 2x1 grid layout for the subplots.
fig = plt.figure(figsize=(10, 10))
gs = gridspec.GridSpec(2, 1)
ax1 = fig.add_subplot(gs[0, 0])
ax2 = fig.add_subplot(gs[1, 0])
total_points_plotted = 0  # Variable to record the total number of plotted points.

# Filter out rows with NaN values in the "age" column.
mask = (~subduction_data_resampled["age"].isna())
total_points_plotted += len(subduction_data_resampled[mask])  # Count valid points.
subduction_data_resampled_valid = subduction_data_resampled[mask]  # Store valid data.

# Obtain a sorted list of unique subducting plate IDs from the valid data.
unique_subducting_ids = subduction_data_resampled_valid.subducting_pid.unique()
labels = []
patches = []
unique_subducting_ids.sort()  # Sort the unique subducting plate IDs.
print(unique_subducting_ids)

# Lookup and store subducting plate names based on their IDs.
unique_subducting_names = []
for i in range(len(unique_subducting_ids)):
    subducting_id = unique_subducting_ids[i]
    unique_subducting_names.append(GParseReconstruction.LookupNameByPid(int(subducting_id)))

# Loop through each subducting plate ID and plot the corresponding trench velocity.
for i in range(len(unique_subducting_ids)):
    _name = unique_subducting_names[i]

    subducting_id = unique_subducting_ids[i]
    try:
        plot_option = plot_options[int(subducting_id)]  # Get plot options for the ID.
    except KeyError:
        # If no specific plot option is found, use default settings.
        plot_option = {"marker": 'o',  "markerfacecolor": None, "name": "TBD"}
        
    # Create a mask for the current subducting plate and plot its trench velocity.
    mask = (subduction_data_resampled.subducting_pid == subducting_id)
    ages = subduction_data_resampled.age[mask]
    trench_velocities = subduction_data_resampled_valid.trench_velocity[mask]
    _patch = ax1.plot(ages, trench_velocities,\
             marker=plot_option["marker"], markerfacecolor=plot_option["markerfacecolor"],\
                markeredgecolor='black', markersize=10, linestyle=None, label=plot_option["name"])[0]
    patches.append(_patch)
i += 1  # Increment index.
    
# fix Ryuku
plot_option = {"marker": 'o',  "markerfacecolor": None, "name": "RYU"}
ages = [35.0, 38.0, 48.0, 50.0, 50.0]
trench_velocities = [3.0, 0.9, 1.2, 0.7, 0.9]
_patch = ax1.plot(ages, trench_velocities,\
                marker=plot_option["marker"], markerfacecolor=plot_option["markerfacecolor"],\
                markeredgecolor='black', markersize=10, linestyle=None, label=plot_option["name"])[0]
patches.append(_patch)

# Configure grid and legend for the second subplot.
ax1.grid()
ax2.legend(handles=patches, bbox_to_anchor=(0.5, 0.5), loc='center', ncol=2, numpoints=1, frameon=False)

# Output the total number of plotted points.
print("Total plotted points: %d" % total_points_plotted)

# Set axis limits and labels for the first plot (trench velocity vs age).
ax1.set_xlim([0, 160.0])
ax1.set_ylim([-10.0, 10.0])
ax1.set_xlabel("Age (Ma)")
ax1.set_ylabel("Trench Velocity Magnitude (cm/yr)")

# Save the figure to a PDF file with a name derived from the reconstruction parameters.
fileout = os.path.join(RESULT_DIR, "gplate_subduction_zones", "subduction_distribution_t_%.2e_edge_%.2f_section_%.2f.pdf"\
     % (reconstruction_time, arc_length_edge, arc_length_resample_section))
fig.savefig(fileout)
print("figure saved: %s" % fileout)

# Save the subducting plate ID and names to a CSV file for future reference.
csv_out = os.path.join(RESULT_DIR, "gplate_subduction_zones", "subduction_distribution_t_%.2e_edge_%.2f_section_%.2f.csv"\
     % (reconstruction_time, arc_length_edge, arc_length_resample_section))
unique_data = {
    "pid": unique_subducting_ids,
    'name': unique_subducting_names
}
df_unique_data = pd.DataFrame(unique_data)
df_unique_data.to_csv(csv_out)
print("csv file saved: %s" % csv_out)