In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
from smount_predictors import SeamountScorer, SeamountTransformer, SeamountHelp
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV

In [None]:
pipe = Pipeline([
    ('trans', SeamountTransformer()),
    ('predictor', LinearSVC())
])

param_grid = {
    'predictor__C': np.logspace(-4, 4, 9)
}

grid = GridSearchCV(pipe, param_grid, cv=5, n_jobs=-1, scoring=SeamountScorer())

In [None]:
coords_file = Path('data') / 'Seamount_training_zone.kml'
with open(coords_file) as f:
    soup = BeautifulSoup(f, 'xml')
marks = soup.find_all('Placemark')
coords = np.array([[float(mark.find('latitude').text), float(mark.find('longitude').text)] for mark in marks])
coords = (np.min(coords[:, 0]), np.max(coords[:, 0]), np.min(coords[:, 1]), np.max(coords[:, 1]))
with open(Path('data') / 'vgg_swot.xyz', 'r') as f:
    zone = SeamountHelp.readCroppedxyz(f, coords)

In [None]:
y_file = Path('out') / 'new_seamounts.kml'
with open(y_file, 'r') as f:
    soup = BeautifulSoup(f, 'xml')
marks = soup.find_all('Placemark')
y = np.array([mark.find('coordinates').text.split(',')[:2] for mark in marks]).astype(float)
y = y[:, [1, 0]]

In [None]:
train_data = pd.DataFrame(zone, columns=['lat', 'lon', 'z'])
train_data = train_data.set_index(['lat', 'lon'])
train_data['y'] = 0
train_data[(y[:, 1], y[:, 0])] = 1