In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
from smount_predictors import SeamountScorer, SeamountTransformer, SeamountHelp, SeamountCVSplitter
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC, SVC
import plotly.express as px
from sklearn.model_selection import GridSearchCV

In [2]:
seamount_centers = SeamountHelp.read_seamount_centers(Path('data/seamount_training_zone.kml'))[['lat', 'lon']].to_numpy()

In [3]:
pipe = Pipeline([
    ('trans', SeamountTransformer()),
    ('predictor', SVC(kernel='linear', class_weight={0: 1, 1: 10}))
])

param_grid = {
    'predictor__C': np.logspace(1, 5, 10),
    'trans__sigma': np.linspace(0.1, 1, 10),
}

scorer = SeamountScorer(seamount_centers)

grid = GridSearchCV(
    pipe,
    param_grid,
    cv=SeamountCVSplitter(5),
    n_jobs=-1,
    error_score='raise',
    verbose=3,
    scoring='recall'
    )

In [4]:
points = SeamountHelp.readKMLbounds(Path('data/seamount_training_zone.kml'))
data = SeamountHelp.readAndFilterGRD(Path('data') / 'swot_seamounts_labled.nc')
X = data.to_dataframe().reset_index()

In [5]:
splitter = SeamountCVSplitter(5)
X_train, X_test = next(splitter.split(X[['lat', 'lon', 'z']], X['Labels']))
X_train = X.iloc[X_train]
y_train = X_train['Labels'].to_numpy()
X_train = X_train[['lat', 'lon', 'z']].to_numpy()
X_test = X.iloc[X_test]
y_test = X_test['Labels'].to_numpy()
X_test = X_test[['lat', 'lon', 'z']].to_numpy()

In [6]:
grid.fit(X_train, y_train)

Fitting 5 folds for each of 100 candidates, totalling 500 fits
[CV 1/5] END predictor__C=10.0, trans__sigma=0.30000000000000004;, score=0.410 total time=   5.1s
[CV 5/5] END predictor__C=10.0, trans__sigma=0.1;, score=0.325 total time=   5.3s
[CV 1/5] END predictor__C=10.0, trans__sigma=0.2;, score=0.404 total time=   5.4s
[CV 5/5] END predictor__C=10.0, trans__sigma=0.2;, score=0.325 total time=   5.5s
[CV 1/5] END predictor__C=10.0, trans__sigma=0.1;, score=0.404 total time=   5.8s
[CV 4/5] END predictor__C=10.0, trans__sigma=0.2;, score=0.291 total time=   6.0s
[CV 4/5] END predictor__C=10.0, trans__sigma=0.1;, score=0.291 total time=   6.1s
[CV 3/5] END predictor__C=10.0, trans__sigma=0.1;, score=0.330 total time=   6.6s
[CV 2/5] END predictor__C=10.0, trans__sigma=0.2;, score=0.426 total time=   6.6s
[CV 3/5] END predictor__C=10.0, trans__sigma=0.2;, score=0.330 total time=   6.7s
[CV 2/5] END predictor__C=10.0, trans__sigma=0.1;, score=0.426 total time=   6.8s
[CV 2/5] END predic

In [None]:
grid.best_score_, grid.best_params_

In [None]:
grid.score(X_test, y_test)

In [None]:
X_test

In [None]:
X_test = pd.DataFrame(X_test, columns=['lat', 'lon', 'z'])

In [None]:
fig = px.scatter(X_test, x='lon', y='lat', color=grid.predict(X_test))
fig.update_layout(
        width=300,
        height=800,
        # xaxis=dict(type='linear', autorange=True),  # Adjust x-axis properties
        yaxis=dict(type='linear', autorange=True),  # Adjust y-axis properties
    )
fig.show()

In [None]:
y_true = px.scatter(X_test, x='lon', y='lat', color='z')
y_true.update_layout(
        width=320,
        height=800,
        xaxis=dict(type='linear', autorange=True),  # Adjust x-axis properties
        yaxis=dict(type='linear', autorange=True),  # Adjust y-axis properties
    )