In [None]:
# use the environment of py-gplate
import sys
import gplately
import numpy as np
import gplately.pygplates as pygplates
from gplately import ptt
import glob, os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.patches as mpatches
from matplotlib import gridspec
import cartopy.crs as ccrs
from plate_model_manager import PlateModelManager
import json
import math

# directory to the aspect Lab
ASPECT_LAB_DIR = os.environ['ASPECT_LAB_DIR']
sys.path.append(ASPECT_LAB_DIR)
RESULT_DIR = os.path.join(ASPECT_LAB_DIR, 'results')
import shilofue.GPlateLib as GPlateLib
# import utilities in subdirectiory
sys.path.append(os.path.join(ASPECT_LAB_DIR, 'utilities', "python_scripts"))
import Utilities


### Workflow for Subduction Dataset from Plate Reconstruction

#### General workflow

This workflow defines the steps to query a subduction plate dataset based on plate reconstruction data.

##### Step 1: Loading Plate Reconstruction Data
To extract additional properties such as **plate name**, **start time**, and **end time**, it is necessary to load the plate reconstruction files.

##### Step 2: Fixing Subducting Plate Age
To accurately determine the age of the subducting plate, we must ensure that the points are correctly pinned to the subducting plate. This involves two workflows:

1. **Automatic Workflow**: The process attempts to automatically pin the points to the subducting plate.
2. **Manual Workflow**: If the automatic workflow produces invalid values, the manual workflow is used to correct these errors.

The results of both workflows are recorded in a CSV file, which can be loaded for further analysis.

##### Step 3: Extracting Key Information
The ultimate goal of the workflow is to pin sample points to the global subduction zones and extract the following information:

- **Location of points**: Geographic coordinates of the sample points.
- **Length of the arc**: The arc length of the subduction zone.
- **Plate age**: The age of the subducting plate at the pinned location.
- **Convergence rate**: The rate of convergence between the plates.
- **Trench mobility**: The movement of the trench over time.
- **Trench PID**: The process ID (PID) for the trench, as used in the reconstruction.
- **Subduction PID**: The process ID (PID) for the subduction zone, as used in the reconstruction.

> **Note**: The **trench PID** and **subduction PID** correspond to the IDs used in the plate reconstruction process.


#### To get a good dataset for a time step

1. **Select the Automatic Workflow and get a global dataset**:
   - Use the global dataset with the automatic workflow to initiate the data processing. This is a good reference dataset to refine points of respective trenches


2. **Look in the global dataset for a local subset or extract a Local dataset directly**:
    - To directly extract a local dataset, look for a id for a trench. Put that in the workflow to extract a single trench
    - When we are satisfied, damp the data in to a finalized dataset

3. **Analyze the Finalzed Dataset** 


### Initialization

First assign a reconstruction time

Here I need a file to lookup ids and names of the subductions for each reconstruction time.    

In [None]:
# assign a reconstruction time
reconstruction_time=0 # time of reconstruction, must be integar

# enter the directory of the plate reconstruction files and check
dir_re = os.path.join(ASPECT_LAB_DIR, "dtemp/gplate_export_test0/Muller_etal_2019_PlateBoundaries_no_topologies")
assert(os.path.isdir(dir_re))

# initiate the class
GClass = GPlateLib.GPLATE_CLASS()
GParseReconstruction = GPlateLib.PARSERECONSTRUCTION()


GClass.SetReconstructionTime(reconstruction_time)
GClass.Reconstruct()

infile = os.path.join(dir_re, "reconstructed_%.2dMa.xy" % reconstruction_time)
GParseReconstruction.ReadFile(infile)

### Search a Single Subduction Zone


#### Search with a key word

In [None]:
print(GParseReconstruction.trench_names)

keyword = "ryu"

matching_indices = [i for i, name in enumerate(GParseReconstruction.trench_names) if keyword.lower() in name.lower()]
for index in matching_indices:
    print(index)
    print("name: ", GParseReconstruction.trench_names[index])
    print("id: ", GParseReconstruction.trench_pids[index])
    print("")

#### Search with a trench id

In order to do this, first lookup in a global dataset. Then use the following block to query for the data

##### trench id, t = 0
* 12001, ,CAS
* 686, Indonesian bndy w AUS-mg, ANDA-SUM
* 736, Java SZ
* 651, Flores Banda SZ, JAVA
* 669, North Sulawesi Subduction, SULA
* 612, Luzon subduction, LUZ
* 678, Philippine trench, PHIL
* 648, Okinawa Trough (Ryuku) from EarthByte cob MG 4-20-07
* 659, Izu Bonin Trench
* 699, Marianas Trench-NUVEL
* 111, Aleutian and Bering Sea Masking Polygon
* 406, Kamchatka SZ from EarthByte COB at 0Ma -- MG 4/20/07
* 413, Shirshov Ridge Subduction (not included), part of 901 (subduction id)
* 2000, Central American subduction from 135 Ma
* 201, South America trench taken from a combination of RUM and COB file
* 2031, Caribbean/farallon subduction iniating at 85 Ma
* 2011, Caribbean subduction
* 815, Sandwich Trench
* 821, Tonga-Kermadec-MG

In [None]:
trench_pid = 111
_name = GParseReconstruction.LookupNameByPid(trench_pid)
print(_name)

# get the data of this subduction zone
one_subduction_data = GClass.GetOneSubductionByTrenchId(trench_pid)

# basic plots
# plot the reconstructed zone
fig = plt.figure(figsize=(10,6), dpi=100)
ax = fig.add_subplot(111, projection=ccrs.Mollweide(central_longitude = 180))
gl=ax.gridlines(color='0.7',linestyle='--', xlocs=np.arange(-180,180,15), ylocs=np.arange(-90,90,15))
gl.left_labels = True
plt.title(f'{reconstruction_time} Ma')
# you may change the extent to global to see the sample points in a world map.
ax.set_global()
# ax.set_extent([-80,0,-70,0])
# plot the coastline
GClass.PlotCoastlines(ax)
#GClass.gplot.plot_trenches(ax, color='k')
#GClass.gplot.plot_subduction_teeth(ax, color='k')
# plot the seafloor age
im_age = GClass.PlotSeaFloorAges(ax)

# plot the subduction zone
im_sub = ax.scatter(one_subduction_data.lon, one_subduction_data.lat, marker=".", s=3, c='r', transform=ccrs.PlateCarree())

# set plot options
cbar_age = plt.colorbar(im_age) # colorbar for ages
cbar_age.ax.get_yaxis().labelpad = 15
cbar_age.ax.set_ylabel("Age (Ma)", rotation=90)
cbar = plt.colorbar(im_sub) # colorbar for trenches
cbar.ax.get_yaxis().labelpad = 15
# cbar.ax.set_ylabel('Trench Velocity Magnitude (in cm/yr)', rotation=90) # choose between these two labels to use for trenches
cbar.ax.set_ylabel('Convergence Velocity Magnitude (in cm/yr)', rotation=90)

### Workflow to extract a dataset

#### Defining the Methodology

In this section, we define the methodology for working with the subduction dataset.

1. **Loading Data**: 
   - You can choose to either load an existing CSV file containing previously saved data or start the process from scratch.

2. **Sampling Trenches**:
   - You can choose between two sampling methods:
     - **Sample all trenches at once**: This option allows you to sample all trenches in a single operation.
     - **Sample a specific trench**: Alternatively, you can focus on sampling a single trench for more granular analysis.

In [None]:
# resample by a give arc length edge and resample section
# arc_length_edge = 0.0; arc_length_resample_section = 2.0  # by degree
arc_length_edge = 2.0; arc_length_resample_section = 2.0  # by degree

use_recorded_file = True; resample_all = True; trench_pid = None # use this option to start from a recorded file
# use_recorded_file = False; resample_all = True; trench_pid = None
# use_recorded_file = False; resample_all = False; trench_pid = 2000

# resample the subduction zones
recorded_file = os.path.join(ASPECT_LAB_DIR, "files", "ThDSubduction", "gplate_json_files", "subduction_resampled_t_%.2e.csv" \
                         % (reconstruction_time))
# recorded_file = os.path.join(ASPECT_LAB_DIR, "dtemp", "gplate_export_test0", "subduction_resampled_t_%.2e_edge_%.2f_section_%.2f.csv" \
#                            % (reconstruction_time, arc_length_edge, arc_length_resample_section))

#### Extract from the dataset

In [None]:
if use_recorded_file:
    print("use recorded file: ", recorded_file)
    assert(os.path.isfile(recorded_file))
    subduction_data_resampled = pd.read_csv(recorded_file) 
else:
    if resample_all:
        subduction_data_resampled = GClass.ResampleAllSubduction(arc_length_edge, arc_length_resample_section)
    else:
        subduction_data_resampled, _ = GClass.ResampleSubductionById(trench_pid, arc_length_edge, arc_length_resample_section)
    subduction_data_resampled['age'] = [np.nan for i in range(len(subduction_data_resampled))]
    subduction_data_resampled['lon_fix'] = [np.nan for i in range(len(subduction_data_resampled))]
    subduction_data_resampled['lat_fix'] = [np.nan for i in range(len(subduction_data_resampled))]
    subduction_data_resampled['fix_age_polarity'] = [np.nan for i in range(len(subduction_data_resampled))]
    # todo_ages
    subduction_data_resampled['marker'] = [np.nan for i in range(len(subduction_data_resampled))]
    subduction_data_resampled['marker_fill'] = ['none' for i in range(len(subduction_data_resampled))]
    subduction_data_resampled['color'] = [np.nan for i in range(len(subduction_data_resampled))]

    # fix the ages
    GClass.FixTrenchAge(subduction_data_resampled)

#### Query for invalid values

In [None]:
# get the invalid indexes
invalid_indexes = []
for i in range(len(subduction_data_resampled['age'])):
    if np.isnan(subduction_data_resampled['age'][i]):
        invalid_indexes.append(i)

print("len(ages): ")
print(len(subduction_data_resampled['age']))
print("ages: ")
print(subduction_data_resampled['age'])
print("invalid_indexes: ")
print(invalid_indexes)

#### Extra steps

##### Fix the invalid values

First, map out the invalid ages from the outputs from the previous section.

Use the index, theta and direction to create a new sampling point interact with the raster data
* i_fs: the index
* theta_fs: theta of the direction. 0 is north and 180 is south.
* d_fs: distance along the direction

The next section is for plotting the current dataset

In [None]:
# i_fs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
# theta_fs = [180.0, 180.0, 180.0, 180.0, 180.0, 180.0, 180.0, 210.0, 210.0, 150.0, 180.0, 180.0]
# d_fs = [100e3, 100e3, 100e3, 100e3, 100e3, 200e3, 400e3, 300e3, 200e3, 300e3, 200e3, 1000e3]
# for ii in range(len(i_fs)):
#     i_f = i_fs[ii]
#     theta_f = theta_fs[ii]
#     d_f = d_fs[ii]
#     subduction_data_resampled_local = None
#     subduction_data_resampled_local = pd.DataFrame([subduction_data_resampled.iloc[i_f]])
#     subduction_data_resampled_local.lon, subduction_data_resampled_local.lat = \
#         Utilities.map_point_by_distance(subduction_data_resampled.iloc[i_f].lon, subduction_data_resampled.iloc[i_f].lat, theta_f, d_f)
#     new_age = GClass.InterpolateAgeGrid(subduction_data_resampled_local)
#     print(i_f, ": new age - ", new_age)
#     subduction_data_resampled.loc[i_f, 'age'] = new_age
#     if new_age is not np.nan:
#         new_lon = subduction_data_resampled_local.lon.values[0]
#         new_lat = subduction_data_resampled_local.lat.values[0]
#         subduction_data_resampled.loc[i_f, 'lon_fix'] = new_lon
#         subduction_data_resampled.loc[i_f, 'lat_fix'] = new_lat


##### Fix the dataset of Ryuku

In [None]:
# ryuku_dataset = pd.DataFrame(np.nan, index=range(5), columns=subduction_data_resampled.columns)

# ryuku_dataset['lat'] = [23.4, 24.2, 25.7, 27.5, 29.8]
# ryuku_dataset['lon'] = [124.0, 127.0, 129.0, 130.5, 132.0]
# ryuku_dataset['age'] = [35.0, 38.0, 48.0, 50.0, 50.0]
# ryuku_dataset['trench_velocity']= [3.0, 0.9, 1.2, 0.7, 0.9]

# subduction_data_resampled = pd.concat([subduction_data_resampled, ryuku_dataset], ignore_index=True)

##### Add a vector to determined the direction of convergence vector

In [None]:
# subduction_data_resampled['conv_angle_polarity'] = [0.0 for i in range(len(subduction_data_resampled))]

#### Plot the dataset

In [None]:
# basic plots
# plot the reconstructed zone
# i_p = None # plot all
i_p = 76 # plot one point
fig = plt.figure(figsize=(10,6), dpi=100)
ax = fig.add_subplot(111, projection=ccrs.Mollweide(central_longitude = 180))
gl=ax.gridlines(color='0.7',linestyle='--', xlocs=np.arange(-180,180,15), ylocs=np.arange(-90,90,15))
gl.left_labels = True
plt.title(f'{reconstruction_time} Ma')
# you may change the extent to global to see the sample points in a world map.
ax.set_global()
# ax.set_extent([-80,0,-70,0])
# plot the coastline
GClass.PlotCoastlines(ax)
# plot the seafloor age
im_age = GClass.PlotSeaFloorAges(ax)
# plot all subduction zones and subduction teeth
# im_sub = GPlateLib.plot_one_subduction_data(ax, GClass.GetSubductionData())
GClass.gplot.plot_trenches(ax, color='k')
GClass.gplot.plot_subduction_teeth(ax, color='k')

# plot all the fixed ages
mask = (~subduction_data_resampled['age'].isna())
if i_p is None:
    # plot all points
    ax.scatter(subduction_data_resampled[mask].lon, subduction_data_resampled[mask].lat, marker=".", s=60, c='r', transform=ccrs.PlateCarree())
    ax.scatter(subduction_data_resampled[~mask].lon, subduction_data_resampled[~mask].lat, marker=".", s=60, c='y', transform=ccrs.PlateCarree())
    ax.scatter(subduction_data_resampled[mask].lon_fix, subduction_data_resampled[mask].lat_fix, marker=".", s=30, c='c', transform=ccrs.PlateCarree())
else:
    # plot one point, don't apply mask
    ax.scatter(subduction_data_resampled.lon[i_p], subduction_data_resampled.lat[i_p], marker=".", s=60, c='r', transform=ccrs.PlateCarree())
    ax.scatter(subduction_data_resampled.lon_fix[i_p], subduction_data_resampled.lat_fix[i_p], marker=".", s=30, c='c', transform=ccrs.PlateCarree())

# set plot options
cbar_age = plt.colorbar(im_age) # colorbar for ages
cbar_age.ax.get_yaxis().labelpad = 15
cbar_age.ax.set_ylabel("Age (Ma)", rotation=90)

# write outputs
fileout = os.path.join(RESULT_DIR, "gplate_subduction_zones", "subduction_resampled_t_%.2e_edge_%.2f_section_%.2f.pdf" \
                         % (reconstruction_time, arc_length_edge, arc_length_resample_section))
fileout1 = os.path.join(RESULT_DIR, "gplate_subduction_zones", "subduction_resampled_t_%.2e_edge_%.2f_section_%.2f.png" \
                         % (reconstruction_time, arc_length_edge, arc_length_resample_section))
if not (os.path.isdir(os.path.dirname(fileout))):
    os.mkdir(os.path.dirname(fileout))
fig.savefig(fileout)
fig.savefig(fileout1)
print("Save figure: %s" % fileout)
print("Save figure: %s" % fileout1)

In [None]:
# debug
print(subduction_data_resampled)

#### Export to a csv file

In [None]:
record_file = True
# export the file to a temp file
if resample_all:
    temp_file = os.path.join(ASPECT_LAB_DIR, "dtemp", "gplate_export_test0", "subduction_resampled_t_%.2e_edge_%.2f_section_%.2f.csv" \
                            % (reconstruction_time, arc_length_edge, arc_length_resample_section))
else:
    temp_file = os.path.join(ASPECT_LAB_DIR, "dtemp", "gplate_export_test0", "subduction_resampled_t_%.2e_pid_%d_edge_%.2f_section_%.2f.csv" \
                         % (reconstruction_time, int(trench_pid), arc_length_edge, arc_length_resample_section))

# don't mess up the existing files
if record_file:
    subduction_data_resampled.to_csv(temp_file)
    print("Data saved to %s" % temp_file)

### Analyze extracted data set

We plot the convergence / trench retreat rate of trenches below.
The plot is also combined with a sea floor age

Issue: We get a lot of nan value in ages
1. check the position of the sample points
2. plot the raster of oceanic plate age

#### Some notes

* ANDA-SUM: The change from trench retreat to advance (as in L05) is missing
* JAVA: essientially, I am fixing the ages to one point (on the corner, above Australia); The trench velocity is retreat, instead of advance (L05)
* LUZ: gplately data suggest advance of 2.5, while L05 shows fast retreat motion
* Ryuku: data is missing in the current dataset, fixing using values form Table 1 in Lallemend et al., 2005
* KuKam: data shows retreat motion, while L05 shows advance motion
* PER-NCHI-JUAN-SCHI (subduction id 911): These seem to be all in the subduction id 901. Beyond a trench id of 201, the trench id varies from 201010 to 20101?. Up north, the component is marked with trench id 2031 (Caribbean/farallon subduction iniating at 85 Ma).
* South to SCHI: This is represented by the subduction id 801, but the trench id 201 continues from the previous 901 subduction.
* ANT: retreat motion in this dataset, rather than the advance motion in L05

##### Calculate a distance to an adjacent subduction zone

In [None]:
from pyproj import Geod

Ro = 6371e3
    
# Initialize the Geod object with the WGS84 ellipsoid model, 
# which provides accurate geodesic calculations on Earth's surface
geod = Geod(ellps="WGS84")

def haversine(lat1, lon1, lat2, lon2, radius=Ro):
    # Convert latitude and longitude from degrees to radians
    lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
    
    # Haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = math.sin(dlat / 2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    
    # Distance in the given radius unit (default is kilometers for Earth)
    distance = radius * c
    return distance

# Record both the minimum distance and the indexing in the dataset
subduction_distances = np.zeros(len(subduction_data_resampled))
subduction_indexes = np.array(range(len(subduction_data_resampled)))
subduction_min_distance_indexes = np.full(len(subduction_data_resampled), -1, dtype=int)
subduction_maker_lons = []
subduction_maker_lats = []

# By default, we assign the length of the equator and compare the computed
# distance to the previous value
# i_p = None; subducting_pid = None
# i_p = 142; subducting_pid = None # plot one point
# i_p = None; subducting_pid = 201; trench_pid = None # plot one point
i_p = None; subducting_pid = 901; trench_pid = 699 # plot one point

# Uncomment this part to modify the values of conv_angle_polarity
# if i_p is not None:
#   subduction_data_resampled.loc[i_p, "conv_angle_polarity"] = 0.0
# elif subducting_pid is not None:
#   # Figure out the indexes of the selected points and their matching
#   # points in the dataset
#   mask1 = subduction_data_resampled.subducting_pid == subducting_pid
#   if trench_pid is not None:
#     mask2 = subduction_data_resampled.trench_pid == trench_pid
#   else:
#     mask2 = np.ones(len(subduction_data_resampled), dtype=bool)
#   mask = (mask1 & mask2)
#   subduction_data_resampled.loc[mask, "conv_angle_polarity"] = 0.0

d_m = 1000e3 # distance of marker point
theta_diff = 10.0 # degree
for i_1 in range(len(subduction_data_resampled)):
  subducting_pid_1 = int(subduction_data_resampled.subducting_pid[i_1])
  lat1, lon1 = subduction_data_resampled.lat[i_1], subduction_data_resampled.lon[i_1]
  # Pin a marker point with a distance d_m and an angle of convergence (theta, from east)
  # Then, get a shortest path connecting the query point and the marker point.
  # The direction of convergence varies by assigning a polarity variable
  if subduction_data_resampled.conv_angle_polarity[i_1] == 0.0:
    theta = subduction_data_resampled.conv_angle[i_1] + 90.0
  elif subduction_data_resampled.conv_angle_polarity[i_1] == 1.0:
    theta = subduction_data_resampled.conv_angle[i_1] - 90.0
  theta = (theta + 360) % 360 # normalize theta
  lon_m, lat_m = Utilities.map_point_by_distance(lon1, lat1, theta, d_m)
  path_lon_m , path_lat_m = Utilities.shortest_path_between_two_points([lon1, lat1], [lon_m, lat_m], geod.npts, 100)
  subduction_maker_lons.append(path_lon_m); subduction_maker_lats.append(path_lat_m) 
  distance = 2 * np.pi * Ro
  for j_2 in range(len(subduction_data_resampled)):
    subducting_pid_2 = int(subduction_data_resampled.subducting_pid[j_2])
    lat2, lon2 = subduction_data_resampled.lat[j_2], subduction_data_resampled.lon[j_2]
    theta_1 = Utilities.calculate_bearing(lon1, lat1, lon2, lat2) # theta_1 is normalized
    if j_2 == i_1:
      continue
    if subducting_pid_1 == subducting_pid_2:
      continue
    distance_between_points = haversine(lat1, lon1, lat2, lon2)
    if distance_between_points < distance and abs(theta_1 - theta) < theta_diff:
      distance = distance_between_points
      subduction_min_distance_indexes[i_1] = j_2
  subduction_distances[i_1] = distance

subduction_data_resampled["near_distance"] = subduction_distances


##### Visualize the matching points of minimum distance

In [None]:
# i_p = 43; subducting_pid = None # plot one point
# i_p = None; subducting_pid = 801 # plot one subduction zone

# First, the global coastline is plotted
fig = plt.figure(figsize=(10,6), dpi=100)
ax = fig.add_subplot(111, projection=ccrs.Mollweide(central_longitude = 180))
gl=ax.gridlines(color='0.7',linestyle='--', xlocs=np.arange(-180,180,15), ylocs=np.arange(-90,90,15))
gl.left_labels = True
plt.title(f'{reconstruction_time} Ma')
ax.set_global()
# plot the coastline
GClass.PlotCoastlines(ax)
im_age = GClass.PlotSeaFloorAges(ax)
GClass.gplot.plot_trenches(ax, color='k')
GClass.gplot.plot_subduction_teeth(ax, color='k')

# Then, a mask is generated based on the availability of the dataset
# mask1 - user selection
# mask2 - matched points
# mask - mask1 & mask2
mask1 = np.ones(len(subduction_data_resampled), dtype=bool)
indexes_valid = None
if i_p is not None:
  min_indexes_valid = subduction_min_distance_indexes[i_p]
elif subducting_pid is not None:
  # Figure out the indexes of the selected points and their matching
  # points in the dataset
  mask1 = subduction_data_resampled.subducting_pid == subducting_pid

  mask2 = (subduction_min_distance_indexes >= 0)
  if trench_pid is not None:
    mask3 = subduction_data_resampled.trench_pid == trench_pid
  else:
    mask3 = np.ones(len(subduction_data_resampled), dtype=bool)

  mask = mask1 & mask2 & mask3
  mask_in = mask1 & (~mask2) & mask3

  min_indexes_valid = subduction_min_distance_indexes[mask]
  indexes_valid = subduction_indexes[mask]

  min_indexes_invalid = subduction_min_distance_indexes[mask_in]
else:
  mask = np.ones(len(subduction_data_resampled), dtype=bool)
  min_indexes_valid = subduction_min_distance_indexes[mask]
  indexes_valid = subduction_indexes[mask]


# Here we plot the pairs of matching points in the subduction zones
# We also plot the marker path along the direction of local convergence
# and a length of d_m
if i_p is not None:
  # plot the query point
  ax.scatter(subduction_data_resampled.lon[i_p], subduction_data_resampled.lat[i_p], marker=".", s=60, c='r', transform=ccrs.PlateCarree())
  # plot the marker path
  ax.scatter(subduction_maker_lons[i_p], subduction_maker_lats[i_p], marker='.', s=5, c='orange', transform=ccrs.PlateCarree())
  # plot the matching points
  ax.scatter(subduction_data_resampled.lon[min_indexes_valid], subduction_data_resampled.lat[min_indexes_valid], marker=".", s=60, c='b', transform=ccrs.PlateCarree())
  ax.plot([subduction_data_resampled.lon[i_p], subduction_data_resampled.lon[min_indexes_valid]],\
             [subduction_data_resampled.lat[i_p], subduction_data_resampled.lat[min_indexes_valid]],\
              c='c', transform=ccrs.PlateCarree())
else:
  # plot the query points
  ax.scatter(subduction_data_resampled.lon[mask], subduction_data_resampled.lat[mask], marker=".", s=60, c='r', transform=ccrs.PlateCarree())
  for i in range(len(min_indexes_valid)):
    # plot the marker path
    ax.scatter(subduction_maker_lons[indexes_valid[i]], subduction_maker_lats[indexes_valid[i]], marker='.', s=5, c='orange', transform=ccrs.PlateCarree())
    # plot the matching points
    try:
      ax.scatter(subduction_data_resampled.lon[min_indexes_valid[i]], subduction_data_resampled.lat[min_indexes_valid[i]], marker=".", s=60, c='b', transform=ccrs.PlateCarree())
      ax.plot([subduction_data_resampled.lon[indexes_valid[i]], subduction_data_resampled.lon[min_indexes_valid[i]]],\
             [subduction_data_resampled.lat[indexes_valid[i]], subduction_data_resampled.lat[min_indexes_valid[i]]],\
              c='c', transform=ccrs.PlateCarree())
    except KeyError:
      pass # debug

In [None]:
print(subduction_distances)

##### Plot results of analysis
Here we generate a variation of the previous plot, by differentiating valid age values and invalid age values.

In [None]:
# The dictionary keys represent subduction zone IDs, and the values specify

from matplotlib.path import Path
# the marker style, face color, and name associated with that ID.
# this definition of snowflake initially has an error in the "code" part
verts = [
    (0., 0.),   # Center
    (0.2, 0.6), # Upper arm
    (0., 0.),   # Center
    (0.4, 0.4), # Right diagonal
    (0., 0.),   # Center
    (0.6, 0.2), # Right arm
    (0., 0.),   # Center
    (0.4, -0.4),# Right down diagonal
    (0., 0.),   # Center
    (0.2, -0.6),# Bottom arm
    (0., 0.),   # Center
    (-0.4, -0.4),# Left down diagonal
    (0., 0.),   # Center
    (-0.6, -0.2),# Left arm
    (0., 0.),   # Center
    (-0.4, 0.4),# Left diagonal
    (0., 0.),   # Center
    (-0.2, 0.6),# Upper left arm
]
codes = [Path.MOVETO] + [Path.LINETO, Path.MOVETO] * 8 + [Path.MOVETO]
snowflake = Path(verts, codes)

# Define vertices for two equilateral triangles
vertices = [
    [0, 1], [-np.sqrt(3)/2, -0.5], [np.sqrt(3)/2, -0.5], [0, 1],  # First triangle
    [0, -1], [-np.sqrt(3)/2, 0.5], [np.sqrt(3)/2, 0.5], [0, -1]   # Second triangle
]
# Flatten the vertices list for creating the Path
vertices = np.array(vertices)
# Define path codes (all 'LINETO' except the start 'MOVETO')
codes = [Path.MOVETO] + [Path.LINETO] * (len(vertices) - 1)

star_path = Path(vertices, codes)

plot_options = \
{
    903: {"marker": 'o',  "markerfacecolor": "yellow", "name": "CAS"},
    511: {"marker": 's',  "markerfacecolor": "yellow", "name": "ANDA-SUM"},
    801: {"marker": 'd',  "markerfacecolor": "yellow", "name": "JAVA"},
    645: {"marker": snowflake,  "markerfacecolor": "black", "name": "SULA"},
    602: {"marker": 'x',  "markerfacecolor": "blue", "name": "LUZ"},
    608: {"marker": 's',  "markerfacecolor": 'c', "name": "PHIL"},
    901: {
        699: {"marker": '>',  "markerfacecolor": 'red', "name": "MAR"},
        659: {"marker": 's',  "markerfacecolor": 'red', "name": "IZU"},
        (601112.0, 601118.0): {"marker": '^',  "markerfacecolor": 'green', "name": "JAP"},
        406:{"marker": 'v',  "markerfacecolor": 'green', "name": "KUKAM"},
        111: {"marker": 'o',  "markerfacecolor": 'pink', "name": "ALE-ALA"},
        (806, 821): {"marker": 'd',  "markerfacecolor": 'blue', "name": "TON-KERM"}
        },
    909: {"marker": star_path,  "markerfacecolor": 'c', "name": "MEX"},
    911: {"marker": 'o',  "markerfacecolor": 'k', "name": "PER-NCHI-JUAN-SCHI"},
    802: {"marker": 'd',  "markerfacecolor": 'k', "name": "SSCHI-TBD"},
    201: {
        2011:{"marker": '+',  "markerfacecolor": 'pink', "name": "ANT"},
        815:{"marker": '*',  "markerfacecolor": 'r', "name": "SAND"}
        },
    1: {"marker": 'd',  "markerfacecolor": "r", "name": "RYU"}
}

# Create a figure and two subplots for plotting trench velocity data.
# `gs` defines a 2x1 grid layout for the subplots.
fig = plt.figure(figsize=(10, 15))
gs = gridspec.GridSpec(3, 1)
ax1 = fig.add_subplot(gs[0, 0])
ax2 = fig.add_subplot(gs[1, 0])
ax3 = fig.add_subplot(gs[2, 0])
total_points_plotted = 0  # Variable to record the total number of plotted points.

# Filter out rows with NaN values in the "age" column.
mask_age = (~subduction_data_resampled["age"].isna())
total_points_plotted += len(subduction_data_resampled[mask_age])  # Count valid points.
subduction_data_resampled_valid = subduction_data_resampled[mask_age]  # Store valid data.

# Obtain a sorted list of unique subducting plate IDs from the valid data.
unique_subducting_ids = subduction_data_resampled_valid.subducting_pid.unique()
labels = []
patches = []
unique_subducting_ids.sort()  # Sort the unique subducting plate IDs.
print(unique_subducting_ids)

# Lookup and store subducting plate names based on their IDs.
unique_subducting_names = []
for i in range(len(unique_subducting_ids)):
    subducting_id = unique_subducting_ids[i]
    unique_subducting_names.append(GParseReconstruction.LookupNameByPid(int(subducting_id)))

# Loop through each subducting plate ID and plot the corresponding trench velocity.
for i in range(len(unique_subducting_ids)):
    _name = unique_subducting_names[i]

    subducting_id = unique_subducting_ids[i]
    try:
        plot_option_sub_dict = plot_options[int(subducting_id)]  # Get plot options for the ID.
    except KeyError:
        # If no specific plot option is found, use default settings.
        print("Id %s not found, marked as TBD" % int(subducting_id))
        plot_option_sub_dict = {"marker": 'o',  "markerfacecolor": None, "name": "TBD"}

    # Make an output for the plotting function to loop over the trench ids
    plot_trench_pids = None; plot_option_list = None
    if 'name' in plot_option_sub_dict:
        # A subduction contains a single trench
        plot_trench_pids = [None]
        plot_option_list = [plot_option_sub_dict.copy()]
    else:
        # A subduction contains multiple trenches
        plot_trench_pids = []
        plot_option_list = []
        for key, value in plot_option_sub_dict.items():
            plot_trench_pids.append(key)
            plot_option_list.append(value.copy())

    # Loop over the trench ids and plot the markers
    for i_tr in range(len(plot_trench_pids)):
        trench_pid = plot_trench_pids[i_tr]
        plot_option = plot_option_list[i_tr]
        # We want trench_pid options to be flexible.
        # It could be a - a value; b - a range and c - multiple values
        # d - None
        # Create a mask for the current subducting plate and plot its trench velocity.
        # We allow a variation of 0.1 from the integar value
        # mask1 - match the subducting id
        # mask2 - match the trench pid condition.
        mask1 = (abs(subduction_data_resampled.subducting_pid - subducting_id) < 0.1)
        if trench_pid is None:
            mask = mask1
        elif type(trench_pid) == float or type(trench_pid) == int:
            mask = mask1 & (abs(subduction_data_resampled.trench_pid - trench_pid) < 0.1)
        elif type(trench_pid) == list:
            # mutiple values
            mask2 = (abs(subduction_data_resampled.trench_pid - trench_pid[0]) < 0.1)
            for trench_sub_pid in trench_pid[1:]:
                mask2 = mask2 | (abs(subduction_data_resampled.trench_pid - trench_sub_pid) < 0.1)
            mask = mask1 & mask2
        elif type(trench_pid) == tuple:
            # a range
            assert(len(trench_pid) == 2)
            mask2 = ((subduction_data_resampled.trench_pid >= trench_pid[0]) & (subduction_data_resampled.trench_pid <= trench_pid[1]))
            mask = mask1 & mask2
        else:
            raise ValueError("Type of trench pid is wrong. Possible types are [None, float, int, list, dict]")
        ages = subduction_data_resampled_valid.age[mask]
        trench_velocities = subduction_data_resampled_valid.trench_velocity[mask]
        near_distances = subduction_data_resampled_valid.near_distance[mask]
        _patch = ax1.plot(ages, trench_velocities,\
                marker=plot_option["marker"], markerfacecolor=plot_option["markerfacecolor"],\
                markeredgecolor='black', markersize=10, linestyle='', label=plot_option["name"])[0]
        _patch_d = ax3.plot(near_distances, trench_velocities,\
                marker=plot_option["marker"], markerfacecolor=plot_option["markerfacecolor"],\
                markeredgecolor='black', markersize=10, linestyle='', label=plot_option["name"])[0]
        patches.append(_patch)

i += 1  # Increment index.
    
# fix Ryuku
# plot_option = {"marker": 'd',  "markerfacecolor": "blue", "name": "RYU"}
# ages = [35.0, 38.0, 48.0, 50.0, 50.0]
# trench_velocities = [3.0, 0.9, 1.2, 0.7, 0.9]
# _patch = ax1.plot(ages, trench_velocities,\
#                 marker=plot_option["marker"], markerfacecolor=plot_option["markerfacecolor"],\
#                 markeredgecolor='red', markersize=10, linestyle=None, label=plot_option["name"])[0]
# patches.append(_patch)

# Configure grid and legend for the second subplot.
ax1.grid()
ax3.grid()
ax2.legend(handles=patches, bbox_to_anchor=(0.5, 0.5), loc='center', ncol=2, numpoints=1, frameon=False)

# Output the total number of plotted points.
print("Total plotted points: %d" % total_points_plotted)

# Set axis limits and labels for the first plot (trench velocity vs age).
ax1.set_xlim([0, 160.0])
ax1.set_ylim([-10.0, 10.0])
ax3.set_ylim([-10.0, 10.0])
ax1.set_xlabel("Age (Ma)")
ax1.set_ylabel("Trench Velocity Magnitude (cm/yr)")
ax3.set_ylabel("Trench Velocity Magnitude (cm/yr)")

# Save the figure to a PDF file with a name derived from the reconstruction parameters.
fileout = os.path.join(RESULT_DIR, "gplate_subduction_zones", "subduction_distribution_t_%.2e_edge_%.2f_section_%.2f.pdf"\
     % (reconstruction_time, arc_length_edge, arc_length_resample_section))
fig.savefig(fileout)
print("figure saved: %s" % fileout)

# Save the subducting plate ID and names to a CSV file for future reference.
csv_out = os.path.join(RESULT_DIR, "gplate_subduction_zones", "subduction_distribution_t_%.2e_edge_%.2f_section_%.2f.csv"\
     % (reconstruction_time, arc_length_edge, arc_length_resample_section))
unique_data = {
    "pid": unique_subducting_ids,
    'name': unique_subducting_names
}
df_unique_data = pd.DataFrame(unique_data)
df_unique_data.to_csv(csv_out)
print("csv file saved: %s" % csv_out)