In [1]:
from pathlib import Path
import os
import pandas as pd
from smount_predictors import SeamountTransformer, SeamountHelp, SeamountCVSplitter
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
import numpy as np
import xarray as xr
import simplekml

In [2]:
# Data File generation. Convert to markdown if not needed

point_zones = np.array([
    [[-49.759482,130.811756], [-50.078953,145.181173], [-62.484863,129.672869], [-61.904762,150.058463]],
    [[-35.906902, 90.063967], [-37.659520, 96.793828], [-41.388205, 86.617601], [-43.792914, 95.062691]],
    [[-19.25,-111], [-16.85,-111], [-18,-112.3], [-18,-110.5]]
    ])
name_coords = {}
name=1
for zone in point_zones:
    min_lat = min(zone[:, 0])
    max_lat = max(zone[:, 0])
    min_lon = min(zone[:, 1])
    max_lon = max(zone[:, 1])
    os.system(f'gmt grdcut data/vgg_swot.grd -Gdata/unlabled_zone_{name}.nc -R{min_lon}/{max_lon}/{min_lat}/{max_lat}')
    name_coords[name] = [min_lat, max_lat, min_lon, max_lon]
    name+=1
name_coords



{1: [np.float64(-62.484863),
  np.float64(-49.759482),
  np.float64(129.672869),
  np.float64(150.058463)],
 2: [np.float64(-43.792914),
  np.float64(-35.906902),
  np.float64(86.617601),
  np.float64(96.793828)],
 3: [np.float64(-19.25),
  np.float64(-16.85),
  np.float64(-112.3),
  np.float64(-110.5)]}

In [3]:
kml = simplekml.Kml()
mounts = pd.read_csv('out/handpick_train_coords.csv')
lls = mounts.to_numpy()
for ind, ll in enumerate(lls):
    kml.newpoint(coords=[(ll[2], ll[1])], name=ll[0])
kml.save('out/handpick_train_coords.kml')

In [4]:
x_lst = []
y_lst = []
data = pd.DataFrame()
for name, coords in name_coords.items():
    new_coords = mounts
    old_coords = pd.read_csv('data/all_training.xyhrdnc', sep=' ', names=['lon', 'lat', 'height', 'radius', 'dept', 'name', 'cat'])
    old_coords = old_coords[['lat', 'lon', 'radius', 'name']]
    points = SeamountHelp.readKMLbounds(Path('data/seamount_training_zone.kml'))
    points = [coords[2], coords[3], coords[0], coords[1]]
    old_coords = old_coords[(old_coords['lat'] > points[0]) & (old_coords['lat'] < points[1]) & (old_coords['lon'] > points[2]) & (old_coords['lon'] < points[3])]
    new_coords = new_coords[(new_coords['lat'] > points[0]) & (new_coords['lat'] < points[1]) & (new_coords['lon'] > points[2]) & (new_coords['lon'] < points[3])]
    train_coords = pd.concat([old_coords, new_coords], ignore_index=True)
    data = xr.open_dataset(f'data/unlabled_zone_{name}.nc').to_dataframe().reset_index()
    seamounts = train_coords
    labled_data = SeamountHelp.seamount_radial_match(data, train_coords)
    pd.concat([labled_data, data])

In [5]:
pipe = Pipeline([
    ('trans', SeamountTransformer()),
    ('predictor', SVC(kernel='rbf', class_weight='balanced'))
])

param_grid = {
    'predictor__C': np.linspace(1, 10),
    'trans__sigma': np.linspace(0.1, 2)
    }

search = GridSearchCV(estimator=pipe, param_grid=param_grid, n_jobs=-1, cv=SeamountCVSplitter(5), verbose=3, scoring='recall')

In [6]:
points = SeamountHelp.readKMLbounds(Path('data/seamount_training_zone.kml'))
data = SeamountHelp.readAndFilterGRD(Path('data') / 'training_data_new.nc')
X = data.to_dataframe().reset_index()[['lat', 'lon' , 'z']]

In [None]:
y = data.to_dataframe().reset_index()['Labels']
search.fit(X, y)
print(f'train score: {search.score(X, y)}')

Fitting 5 folds for each of 2500 candidates, totalling 12500 fits
[CV 4/5] END predictor__C=1.0, trans__sigma=0.13877551020408163;, score=0.976 total time= 7.6min
[CV 4/5] END predictor__C=1.0, trans__sigma=0.1;, score=0.976 total time= 7.7min
[CV 1/5] END predictor__C=1.0, trans__sigma=0.13877551020408163;, score=0.954 total time= 7.9min
[CV 5/5] END predictor__C=1.0, trans__sigma=0.13877551020408163;, score=0.978 total time= 7.9min
[CV 5/5] END predictor__C=1.0, trans__sigma=0.1;, score=0.978 total time= 7.9min
[CV 2/5] END predictor__C=1.0, trans__sigma=0.17755102040816328;, score=0.963 total time= 9.1min
[CV 2/5] END predictor__C=1.0, trans__sigma=0.1;, score=0.963 total time= 9.3min
[CV 2/5] END predictor__C=1.0, trans__sigma=0.13877551020408163;, score=0.963 total time= 9.3min
[CV 3/5] END predictor__C=1.0, trans__sigma=0.1;, score=0.977 total time= 9.5min
[CV 3/5] END predictor__C=1.0, trans__sigma=0.13877551020408163;, score=0.977 total time= 9.7min
[CV 1/5] END predictor__C=1.



[CV 2/5] END predictor__C=1.0, trans__sigma=0.6428571428571428;, score=0.964 total time= 5.7min
[CV 3/5] END predictor__C=1.0, trans__sigma=0.6040816326530613;, score=0.977 total time= 7.2min
[CV 4/5] END predictor__C=1.0, trans__sigma=0.6040816326530613;, score=0.977 total time= 7.2min
[CV 5/5] END predictor__C=1.0, trans__sigma=0.6040816326530613;, score=0.980 total time= 7.4min
[CV 2/5] END predictor__C=1.0, trans__sigma=0.6040816326530613;, score=0.964 total time= 9.2min
[CV 1/5] END predictor__C=1.0, trans__sigma=0.6428571428571428;, score=0.955 total time= 8.4min
[CV 1/5] END predictor__C=1.0, trans__sigma=0.6816326530612244;, score=0.954 total time= 5.8min
[CV 4/5] END predictor__C=1.0, trans__sigma=0.6428571428571428;, score=0.977 total time= 7.3min
[CV 5/5] END predictor__C=1.0, trans__sigma=0.6428571428571428;, score=0.979 total time= 7.6min
[CV 2/5] END predictor__C=1.0, trans__sigma=0.6816326530612244;, score=0.965 total time= 7.2min
[CV 3/5] END predictor__C=1.0, trans__si

In [4]:
import pickle
from sklearn.cluster import DBSCAN
from smount_predictors.src.SeamountHelp import PipelinePredictor

full_pipeline = PipelinePredictor(pipe, DBSCAN(eps=0.00029088820866630336, min_samples=4, metric='haversine'))
# DBSCAN epsilon perameter above is the cartesian measurement of grid spacing in 1 arcmin spaced lat/lon grid in radians
pickle.dump(full_pipeline, open('out/3d_model.pkl', 'wb'))

In [None]:
import xarray as xr
import pygmt


X['Labels'] = pipe.predict(X)
predictions = xr.Dataset.from_dataframe(X.set_index(['lat', 'lon'])).set_coords(['lon', 'lat'])

In [None]:
points = (-19.20600998877477, -15.16349705205003, -117.7208544442338, -110.2604021311965)
srtm = xr.open_dataset('data/SRTM15_V2.5.nc')
fig = pygmt.Figure()
pygmt.config(FORMAT_GEO_MAP="ddd.x", MAP_FRAME_TYPE="plain", FONT_LABEL="15p,Helvetica,black", FONT_ANNOT="15p")
pygmt.config(FONT_ANNOT_PRIMARY="15p,Helvetica,black")

############################ SWIR ##################################
pygmt.makecpt(cmap="haxby", series=[-4000, -1500, 1], background='o')
fig.grdimage(
    grid = xr.DataArray(srtm.z, coords=(srtm.lat, srtm.lon)),
    # shading = shade_SWIR,
    projection="M90c",
    region = [points[2], points[3], points[0], points[1]],
    frame=["WSrt", "xa0.5", "ya0.5"],
    cmap = True,
    # shading = pygmt.grdgradient(xr.DataArray(srtm.z, coords=(srtm.lat, srtm.lon)), direction='a', normalize=True)
    )
fig.grdcontour(
    grid=predictions['Labels'],
    annotation="200+f12p", 
    interval=1,
    pen = "1p, black",
    limit=[-3400, 2800]
    )
fig.show()