In [16]:
from pathlib import Path
import re
import pandas as pd
import numpy as np
from smount_predictors import SeamountScorer, SeamountTransformer, SeamountHelp, SeamountCVSplitter
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC, SVC
import plotly.express as px
from sklearn.model_selection import GridSearchCV

In [17]:
seamount_centers = SeamountHelp.read_seamount_centers(Path('data/seamount_training_zone.kml'))[['lat', 'lon']].to_numpy()

In [18]:
results = Path('out') / 'crossval.txt'
with results.open('r') as f:
    text = f.read()
floats = re.findall(r'=([\d\.]+)', text)
data = {'C': [], 'sigma': [] , 'score': []}
for i in range(0, len(floats) - 1, 3):
    data['C'].append(float(floats[i]))
    data['sigma'].append(float(floats[i+1]))
    data['score'].append(float(floats[i+2]))
df = pd.DataFrame(data)
df.groupby(['C', 'sigma']).mean().sort_values('score', ascending=False)

score    0.44375
Name: (100000.0, 0.6), dtype: float64

In [19]:
pipe = Pipeline([
    ('trans', SeamountTransformer(sigma=0.6)),
    ('predictor', SVC(C=1000000, kernel='linear', class_weight={0: 1, 1: 10}))
])

In [20]:
points = SeamountHelp.readKMLbounds(Path('data/seamount_training_zone.kml'))
data = SeamountHelp.readAndFilterGRD(Path('data') / 'swot_seamounts_labled.nc')
X = data.to_dataframe().reset_index()

In [21]:
splitter = SeamountCVSplitter(5)
X_train, X_test = next(splitter.split(X[['lat', 'lon', 'z']], X['Labels']))
X_train = X.iloc[X_train]
y_train = X_train['Labels'].to_numpy()
X_train = X_train[['lat', 'lon', 'z']].to_numpy()
X_test = X.iloc[X_test]
y_test = X_test['Labels'].to_numpy()
X_test = X_test[['lat', 'lon', 'z']].to_numpy()

In [22]:
pipe.fit(X_train, y_train)