In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
import plotly.express as px
from DBSCANSupport import *
from LocalPath import LOCALPATH

Load gravity data

In [2]:
grav = pd.read_csv(LOCALPATH + 'data/test_grav.csv').drop(columns=["old_ind"])


In [3]:
lat = list(grav['Latitude'])
longg = list(grav['Longitude'])
inten = list(grav['Intensity'])
#fig.show()

In [4]:
X = grav[['Latitude', 'Longitude', 'Intensity']].to_numpy()
X

array([[ -1.50833333, -97.99166667,  -5.5733223 ],
       [ -1.50833333, -97.975     ,  -5.16694736],
       [ -1.50833333, -97.95833333,  -4.97332668],
       ...,
       [ -5.99166667, -90.04166667,  -2.35047531],
       [ -5.99166667, -90.025     ,  -0.47824201],
       [ -5.99166667, -90.00833333,  -0.67400765]])

In [5]:
test_eps = np.linspace(0.1, 1, 20)
test_samp = np.arange(1, 50)
DBModel_test = DBSCANSupport(LOCALPATH+"data/sample_mask.txt.xlsx", test_zone=(-6, -1.5, -98, -90))

In [6]:
score, params, data_out  = DBModel_test.gridSearch(test_eps, test_samp, X, DBModel_test.outlierDeviation, verbose=True)

Score for 0.1 and 1 is 0.0
Score for 0.1 and 2 is -45.16556291390729
Score for 0.1 and 3 is -91.8543046357616
Score for 0.1 and 4 is -165.33112582781456
Score for 0.1 and 5 is -255.90066225165563
Score for 0.1 and 6 is -347.5364238410596
Score for 0.1 and 7 is -437.8675496688742
Score for 0.1 and 8 is -514.1456953642385
Score for 0.1 and 9 is -577.1390728476821
Score for 0.1 and 10 is -626.5894039735099
Score for 0.1 and 11 is -665.2516556291391
Score for 0.1 and 12 is -692.3708609271523
Score for 0.1 and 13 is -716.3576158940398
Score for 0.1 and 14 is -731.3708609271523
Score for 0.1 and 15 is -743.1721854304636
Score for 0.1 and 16 is -750.9271523178808
Score for 0.1 and 17 is -753.6622516556291
Score for 0.1 and 18 is -757.5827814569536
Score for 0.1 and 19 is -759.933774834437
Score for 0.1 and 20 is -761.1324503311258
Score for 0.1 and 21 is -762.0728476821192
Score for 0.1 and 22 is -762.5165562913908
Score for 0.1 and 23 is -762.8079470198676
0.1 and 24 produced 1 (too few) clu

In [7]:
data_out

array([[ -1.50833333, -97.99166667,  -1.        ,  -5.5733223 ],
       [ -1.50833333, -97.975     ,   0.        ,  -5.16694736],
       [ -1.50833333, -97.95833333,   0.        ,  -4.97332668],
       ...,
       [ -5.99166667, -90.04166667,  -1.        ,  -2.35047531],
       [ -5.99166667, -90.025     ,  -1.        ,  -0.47824201],
       [ -5.99166667, -90.00833333,  -1.        ,  -0.67400765]])

In [8]:
dfout = pd.DataFrame(data_out, columns=["Latitude", "Longitude", "Label", "Intensity"])
DBModel_test.matchPoints(dfout)
df_labeled = dfout[dfout['Label'] == -1]

In [9]:

fig = px.scatter(df_labeled, x="Longitude", y="Latitude")
fig.show()

In [10]:
test_zone=(-6, -1.5, -98, -90)
seamounts = pd.read_excel(LOCALPATH+"data/sample_mask.txt.xlsx", \
                              sheet_name="new mask")
seamounts = seamounts.drop(columns=["VGG Height", "Radius", "base_depth", "-",
                                        "Name", "Charted", "surface_depth"])
seamounts = seamounts[(seamounts["Latitude"] >= test_zone[0]) & (seamounts["Latitude"] <= test_zone[1]) &
                          (seamounts["Longitude"] >= test_zone[2]) & (seamounts["Longitude"] <= test_zone[3])]
seamounts = seamounts.to_numpy()
fig3 = px.scatter(x=seamounts[:, 0], y=seamounts[:, 1])
fig3.show()

In [11]:
fig = px.scatter(df_labeled, x="Longitude", y="Latitude", color="True_Seamount")
fig.add_trace(px.scatter(x=seamounts[:, 0], y=seamounts[:, 1]).data[0])
fig.update_xaxes(
    scaleanchor="y",
    scaleratio=1,
  )
fig.show()


In [12]:
score

4.7218543046357615

In [13]:
params

(0.24210526315789474, 5)

In [14]:
dfout.to_csv(LOCALPATH+"data/DBSCAN_test.csv", index=False)

In [15]:
params

(0.24210526315789474, 5)