Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added a simple MLP neural network for wet-dry classification #146

Merged
merged 7 commits into from
Jan 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
382 changes: 382 additions & 0 deletions notebooks/Rain event detection methods.ipynb

Large diffs are not rendered by default.

97 changes: 97 additions & 0 deletions pycomlink/processing/wet_dry/mlp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import numpy as np
from numpy.lib.stride_tricks import sliding_window_view
import tensorflow as tf
import pkg_resources
import pandas as pd

def get_model_file_path():
return pkg_resources.resource_filename(
"pycomlink", "/processing/wet_dry/mlp_model_files"
)

model = tf.keras.models.load_model(str(get_model_file_path() + "/model_mlp.keras"))

def mlp_wet_dry(
trsl_channel_1,
trsl_channel_2,
threshold=None, # 0.5 is often good, or argmax
):
"""
Wet dry classification using a simple neural network:

This MLP calculates wet and dry periods using a 40 minutes rolling window
for the CML signal loss from two sublinks (trsl_channel_1 and
trsl_channel_2) with temporal resolution equal to 1 minute. It consists of
one fully connected hidden layers with 20 neurons using the relu
activation function. The MLP was trained to predict rainfall recorded
at narby disdrometers at 10 minute resolution for one month of data with 14
pairs of CMLs and disdrometers from different locations in Norway. The MLP
was trained using MLPClassifier from sklearn and then transformed
to tensorflow to be compatible with the pycomlink environment.

If only one channel is available from the CML, use that channel for both
trsl_channel_1 and trsl_channel_2.

The error "WARNING:absl:Skipping variable loading for optimizer 'Adam',
because it has 13 variables whereas the saved optimizer has 1 variables."
can safely be ignored.

Parameters
----------
trsl_channel_1 : iterable of float
Time series of received signal level of channel 1
trsl_channel_2 : iterable of float
Time series of received signal level of channel 2
threshold : float
Threshold (0 - 1) for setting event as wet or dry. If set to None
(default), returns the continuous probability of wet [0, 1] from the
logistic activation function.

Returns
-------
iterable of float
Time series of wet/dry probability or (if threshold is provided)
wet dry classification

References
----------


"""
# Normalization
trsl_channel_1_norm = (trsl_channel_1 - np.nanmean(trsl_channel_1)) / np.nanstd(trsl_channel_1)
trsl_channel_2_norm = (trsl_channel_2 - np.nanmean(trsl_channel_2)) / np.nanstd(trsl_channel_2)

# add nan to start and end
windowsize = 40 # use two channels
x_start = np.ones([int(windowsize/2), windowsize*2])*np.nan
x_end = np.ones([int(windowsize/2)- 1, windowsize*2])*np.nan

# sliding window
sliding_window_ch1 = sliding_window_view(
trsl_channel_1_norm,
window_shape = windowsize
)

sliding_window_ch2 = sliding_window_view(
trsl_channel_2_norm,
window_shape = windowsize
)

x_fts = np.vstack(
[x_start, np.hstack([sliding_window_ch1, sliding_window_ch2]), x_end]
)

mlp_pred = np.zeros([x_fts.shape[0], 2])*np.nan
indices = np.argwhere(~np.isnan(x_fts).any(axis = 1)).ravel()

if indices.size > 0: # else: predictions are kept as nan
mlp_pred_ = model.predict(x_fts[indices], verbose=0)
mlp_pred[indices] = mlp_pred_

if threshold == None:
return mlp_pred
else:
mlp_pred = mlp_pred[:, 1]
mlp_pred[indices] = mlp_pred[indices] > threshold
return mlp_pred
Binary file not shown.
69 changes: 69 additions & 0 deletions pycomlink/tests/test_wet_dry_mlp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import unittest
import numpy as np
from pycomlink.processing.wet_dry.mlp import mlp_wet_dry

class Testmlppred(unittest.TestCase):
"""
This runs the same tests as test_wet_dry_cnn.py but with different
content in truth_raw.
"""

def test_mlppred(self):
trsl_channel_1 = np.arange(0, 60 * 8).astype(float)
trsl_channel_2 = np.arange(0, 60 * 8).astype(float)

trsl_channel_1[310] = np.nan # shorter window than in cnn

pred_raw = mlp_wet_dry(
trsl_channel_1,
trsl_channel_2,
threshold=None,
)[:, 1]

pred = mlp_wet_dry(
trsl_channel_1,
trsl_channel_2,
threshold=0.197, # low threshold for testing
)

# check if length of array is the same
assert len(pred_raw) == 60 * 8
assert len(pred) == 60 * 8

# check if array is as expected
truth_raw = np.array(
[
0.19271295,
0.19395444,
0.19520202,
0.19645563,
0.19771534,
0.19898114,
0.20025298,
0.20153098,
0.20281503,
0.20410511,
0.20540135,
np.nan,
np.nan,
]
)
truth = np.array(
[
0,
0,
0,
0,
1,
1,
1,
1,
1,
1,
1,
np.nan,
np.nan,
]
)
np.testing.assert_almost_equal(pred[280:293], truth)
np.testing.assert_almost_equal(pred_raw[280:293], truth_raw, decimal=7)
Loading