In [None]:
from pathlib import Path

import numpy as np  # noqa: F401
import pandas as pd  # noqa: F401

from blooms_ml.utils_ferrybox import (
    add_previous,
    get_dataframe_ferrybox2002to2018,
    get_ferrytracks,
    plot_temp_salt_flu,
    to_differences,
)

Extract ferrybox tracks snippets

In [None]:
datadir = f"{Path.home()}/data_ferrybox"
dfs = get_ferrytracks(datadir)

In [None]:
snippet = dfs[100].copy()
snippet.set_index("Time", inplace=True)

In [None]:
print(f"Start date: {snippet.index[0]}")
print(f"End date: {snippet.index[-1]}")
print(f"Length: {len(snippet)}")
plot_temp_salt_flu(snippet)

Prepare data

In [None]:
df = get_dataframe_ferrybox2002to2018(dfs, normalize=True)
df

In [None]:
df_stacked = add_previous(df)
df_stacked

In [None]:
df_diff = to_differences(df)
df_diff

PCA

In [None]:
df_blooms = df_diff[df_diff["labels"] == 1]
df_no_blooms = df_diff[df_diff["labels"] == 0].sample(len(df_blooms))
df_balanced = pd.concat([df_blooms, df_no_blooms], axis=0)
df_balanced = df_balanced.sample(frac=1).reset_index(drop=True)

In [None]:
import time

from sklearn.decomposition import PCA

In [None]:
X = df_balanced.drop(["labels"], axis=1)
y = df_balanced["labels"]

In [None]:
# PCA
t0 = time.time()
pca = PCA(n_components=10, random_state=42)
X_reduced_pca = pca.fit_transform(X.values)
t1 = time.time()
print(f"PCA took {t1 - t0:.2} s")

In [None]:
print(pca.explained_variance_ratio_)

In [None]:
x1, x2, x3 = X_reduced_pca[::3, 0], X_reduced_pca[::3, 1], X_reduced_pca[::3, 2]

In [None]:
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt

In [None]:
# Create a 3D scatter plot
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111, projection='3d')

blue_patch = mpatches.Patch(color="#0A0AFF", label="No bloom")
red_patch = mpatches.Patch(color="#AF0000", label="bloom")

# Scatter plot
ax.scatter(x1, x2, x3, c=(y[::3] == 0), cmap="coolwarm", label="No bloom", linewidths=2)
ax.scatter(x1, x2, x3, c=(y[::3] == 1), cmap="coolwarm", label="bloom", linewidths=2)

ax.grid(True)
ax.legend(handles=[blue_patch, red_patch])
plt.show()

Classification

In [None]:
from sklearn.model_selection import cross_val_score, train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X_train = X_train.values
X_test = X_test.values
y_train = y_train.values
y_test = y_test.values

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
classifier = DecisionTreeClassifier()

In [None]:
classifier.fit(X_train, y_train)

In [None]:
training_score = cross_val_score(classifier, X_test, y_test, cv=5)

In [None]:
print(
    "Classifier: ",
    classifier.__class__.__name__,
    "Has a training score of",
    round(training_score.mean(), 2) * 100,
    "% accuracy score",
)