In [1]:
from pathlib import Path
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.cluster import DBSCAN
import simplekml

In [2]:
seamounts = pd.read_excel(Path('data') / 'sample_mask.txt.xlsx')
coords_file = Path('data') / 'Seamount_training_zone.kml'
with open(coords_file) as f:
    soup = BeautifulSoup(f, 'xml')
marks = soup.find_all('Placemark')
coords = np.array([[float(mark.find('longitude').text), float(mark.find('latitude').text)] for mark in marks])


In [3]:
np.unique(coords)

array([-112.7645837 , -111.92594347, -111.21112942,  -18.27991839,
        -17.62160512,  -17.22596008])

In [4]:
seamounts = seamounts[(seamounts['Longitude'] > coords[:, 0].min()) & (seamounts['Longitude'] < coords[:, 0].max())]
seamounts = seamounts[(seamounts['Latitude'] > coords[:, 1].min()) & (seamounts['Latitude'] < coords[:, 1].max())]

In [5]:
new_coords = pd.read_csv(Path('data') / 'new_coords.txt', sep='\t', header=None)
new_coords.columns = ['Latitude', 'Longitude']

In [6]:
seamounts = pd.concat([seamounts, new_coords], ignore_index=True)

In [7]:
nan_names = [f'mh{str(i).zfill(2)}' for i in range(1, 42)]
# seamounts['Name'] = seamounts['Name'].fillna(pd.Series(nan_names))

In [8]:
seamounts = seamounts[['Longitude', 'Latitude', 'Name']]

In [9]:
seamounts['Name'].isna().sum(), len(nan_names)

(29, 41)

In [10]:
seamounts['Name'] = seamounts['Name'].fillna(pd.Series(nan_names))

In [11]:
seamounts.fillna('mh42', inplace=True)

In [12]:
new_entry = {'Latitude':  [-18.267421, -17.640363], 'Longitude': [-111.157616, -112.091609], 'Name': ['mh42', 'mh43']}
seamounts = pd.concat([seamounts, pd.DataFrame(new_entry)], ignore_index=True)

In [13]:
seamounts

Unnamed: 0,Longitude,Latitude,Name
0,-111.891667,-18.191667,SIO-08041
1,-111.758333,-17.491667,KW-13668
2,-112.641667,-17.458333,SIO-08045
3,-112.608333,-17.641667,SIO-08047
4,-112.025,-17.408333,SIO-08050
5,-112.491667,-17.341667,SIO2-04545
6,-111.425,-18.175,SIO-08370
7,-111.658333,-18.008333,SIO-08048
8,-112.491667,-17.875,SIO2-04546
9,-112.341667,-18.058333,SIO2-04555


In [14]:
kml = simplekml.Kml()
for i, row in seamounts.iterrows():
    kml.newpoint(name=row['Name'], coords=[(row['Longitude'], row['Latitude'])])

In [15]:
kml.save(Path('out') / 'new_seamounts.kml')

In [16]:
seamounts.to_csv(Path('out') / 'new_seamounts.csv', index=False)