In [7]:
import numpy as np
import csv
from tqdm import tqdm
import pandas as pd
from pathlib import Path
from random import sample
import pandas as pd
import geopandas as gpd
from shapely import wkt
from copy import deepcopy

data_path = Path('./npy/')

file = Path('./plankton_data/planktons_med.csv')

df = pd.read_csv(file)
species = df['taxon'].unique().tolist()

In [6]:
labels = [    
    'sst',
    'nh4_med',
    'no3_med',
    'po4_med',
    'o2_med',
    'chl_med',
    'thetao_med',
    'so_med',
    ]

## Calculate weights for training

Count occurrences by species to adjust weights while training (copy result to config file).

In [8]:
df = pd.read_csv(file)
vcounts = dict(df['taxon'].value_counts())


In [12]:
df['taxon'].value_counts()

taxon
Pseudo-nitzschia        14292
Dinophysis              13276
Alexandrium minutum     11013
Karenia mikimotoi        9314
Dinophysis acuminata     8187
Chaetoceros              6193
Name: count, dtype: int64

## Calculate normalization values

In [14]:
# raw files : ./npy/plankton_med-npy/***.npy
path = Path('./npy/plankton_med-npy/')
l = []

for (i, f) in enumerate(path.iterdir()):
    l.append(np.load(f))

In [16]:
bigdata = np.array(l)
bigdata.shape

meds = np.nanmedian(bigdata, axis = [0,1,2])
perc1 = np.nanpercentile(bigdata, 1, axis = [0,1,2])
perc99 = np.nanpercentile(bigdata, 99, axis = [0,1,2])

to_save = np.stack([meds, perc1, perc99])
np.save('stats/stats.npy', to_save)

In [3]:
stats_npy = np.load('stats/stats.npy')
print(stats_npy.shape)
print("med:", stats_npy[0])
print("perc1:", stats_npy[1])
print("perc99:", stats_npy[2])

(3, 8)
med: [2.90460938e+02 2.41126478e-01 1.28848404e+00 2.89569497e-02
 2.38811340e+02 1.34011731e-01 1.70971699e+01 3.80487022e+01]
perc1: [2.84582031e+02 8.12963646e-02 2.78342098e-01 6.37406856e-03
 2.12764661e+02 4.39251997e-02 1.14192577e+01 3.68528938e+01]
perc99: [2.99338257e+02 9.40352598e-01 6.87029123e+00 2.64072537e-01
 2.68525482e+02 8.04472029e-01 2.61159210e+01 3.84697037e+01]


## Normalize

In [17]:
meds, perc1, perc99 = np.load("stats/stats.npy")

#### Training data

In [None]:
# for (i, f) in enumerate(path.iterdir()):
#         item = np.load(f)

#         # Fill NaNs with median values (tile median or, if empty, dataset median)
#         all_nans = np.isnan(item).all(axis=(0,1))
#         some_nans = np.logical_and(~all_nans, np.isnan(item).any(axis=(0,1)))
#         fill_values = all_nans * meds + some_nans * np.nan_to_num(np.nanmedian(item, axis = [0,1]), nan=0)
#         filled = np.nan_to_num(item, nan=fill_values)

#         # Normalize
#         normed = (filled - perc1) / (perc99 - perc1)

#         # Remove extreme values
#         final = np.clip(normed, 0,1)


#         # Save
#         np.save('npy/plankton_med-npy-norm/' + f.name, final)


## Display data

In [21]:
variables = [
    'sst',
    'nh4_med',
    'no3_med',
    'po4_med',
    'o2_med',
    'chl_med',
    'thetao_med',
    'so_med',
    ]

In [None]:
# from matplotlib import pyplot as plt
# from mpl_toolkits.axes_grid1 import make_axes_locatable
# %matplotlib widget

# plt.figure(figsize=(18,20))
# w, h, _ = real_values.shape

# for i in range(real_values.shape[2]):
#     ax = plt.subplot(5, 5, i+1)
#     plt.imshow(real_values[:,:,i], cmap = 'viridis')
#     plt.title(labels[i])
#     plt.axis('off')
#     plt.plot((w-1)/2, (h-1)/2, marker="x", markersize=20, markeredgecolor="white")
#     divider = make_axes_locatable(ax)
#     cax = divider.append_axes("bottom", size="5%", pad=0.05)
#     plt.colorbar(cax=cax, location='bottom')

# #plt.tight_layout()

    

In [9]:
id = '935'

file = 'npy/plankton_med-npy-norm/' + id + '.npy'
item = np.load(file)

meds, perc1, perc99 = np.load("stats/stats.npy")
real_values = (perc99 - perc1) * item + perc1

real_values.shape

(32, 32, 8)

In [10]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

# Assuming real_values and labels are defined as in your context
# real_values = (perc99 - perc1) * item + perc1
# labels = ['sst', 'nh4_med', 'no3_med', 'po4_med', 'o2_med', 'chl_med', 'thetao_med', 'so_med']

rows = 5  # Adjust as per your requirement
cols = 5  # Adjust as per your requirement
subplot_titles = labels  # Titles for each subplot

fig = make_subplots(rows=rows, cols=cols, subplot_titles=subplot_titles)

for i in range(real_values.shape[2]):
    row = i // cols + 1
    col = i % cols + 1
    fig.add_trace(
        go.Heatmap(
            z=real_values[:, :, i],
            colorscale='Viridis',
            showscale=(i == real_values.shape[2] - 1)  # Show color scale only for the last plot
        ),
        row=row, col=col
    )

# Update layout
fig.update_layout(height=1000, width=1000, title_text=f"Plankton Data Visualization for ID {id}")
fig.show()
