# Edge-ml example: Train a classifier from data stored on edgeml

In [14]:
from edgeml import edgeml
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.preprocessing import Normalizer
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.utils import to_categorical
import tensorflow as tf

#### Set some global variables

In [15]:
BACKEND_URL = "https://beta.edge-ml.org" # You don't need to change this
READ_KEY = "86a0e3e2fdbe2835b49463fe99f62af8" # Enter your own key here

# Set parameters as you wish
LABELING_NAME = "ShakeRest"

WINDOW_LEN = 20
WINDOW_OVERLAP = 10

#### Load the data from edge-ml

In [16]:
datasetRetriever = edgeml.DatasetReceiver(BACKEND_URL, READ_KEY)
datasetRetriever.loadData()
datasets = datasetRetriever.datasets

labelings = datasetRetriever.labeligns
print("Labelings in project: ", ",".join([x["name"] for x in labelings]))

labelingMap = {x["_id"]: x for x in datasetRetriever.labeligns[0]["labels"]}

Labelings in project:  ShakeRest


#### Convert datasets to numpy array

In [17]:
def datasets_to_numpy(datasets):
    res_datasets = []
    for dataset in datasets:
        merged_df = dataset.timeSeries[0].data
        for time_series in dataset.timeSeries[1:]:
            merged_df = pd.merge(merged_df, time_series.data, on="time", how="outer")
        merged_df.sort_values(by="time", inplace=True)
        merged_df.reset_index(drop=True, inplace=True)

        interpol_df = merged_df.interpolate(method="linear").ffill().bfill()
        interpol_df["time"] = interpol_df["time"].values.astype(np.int64) // 10**6

        labels = next((labeling.labels for labeling in dataset.labelings if labeling.name == LABELING_NAME), None)

        if labels is None:
            continue

        interpol_df["label"] = ""
        for label in labels:
            interpol_df.loc[(interpol_df["time"] >= label.start) & (interpol_df["time"] <= label.end), "label"] = labelingMap[label.type]["name"]

        interpol_df.drop(columns=["time"], inplace=True)
        res_datasets.append(interpol_df.to_numpy())
    return res_datasets

#### Window the data

In [18]:
def window_data(datasets):
    windows = []
    for data in datasets:
        for i in range(0, len(data) - WINDOW_LEN, WINDOW_OVERLAP):
            windows.append(data[i:i+WINDOW_LEN])

    window_labels = np.array(windows)[:, :,-1]
    # Reduce to most likely label
    window_labels = np.array([max(set(x), key=list(x).count) for x in window_labels])
    
    windows = np.array(windows)[:, :,:-1]

    # Filter out windows with no label
    windows = windows[window_labels != ""]
    window_labels = window_labels[window_labels != ""]

    # window_labels are strings, convert to integers starting from 0
    unique_labels = np.unique(window_labels)
    label_map = {label: i for i, label in enumerate(unique_labels)}
    window_labels = np.array([label_map[label] for label in window_labels])

    return windows, window_labels

#### Normalize the data

In [19]:
def normalize(X_train, X_test, Y_train, Y_test):
    normalizer = Normalizer()
    X_train = normalizer.fit_transform(X_train)
    X_test = normalizer.transform(X_test)
    return X_train, X_test, Y_train, Y_test

#### Classifier

In [20]:
class BaseModel:
    def train(self, X_data, Y_data):
        raise NotImplementedError("The train method must be implemented by the subclass.")

    def predict(self, X_data):
        raise NotImplementedError("The predict method must be implemented by the subclass.")

    def evaluate(self, X_data, Y_data):
        y_pred = self.predict(X_data)
        accuracy = accuracy_score(Y_data, y_pred)
        f1 = f1_score(Y_data, y_pred, average='weighted')
        precision = precision_score(Y_data, y_pred, average='weighted')
        recall = recall_score(Y_data, y_pred, average='weighted')
        return accuracy, f1, precision, recall

    def print_metrics(self, X_data, Y_data):
        accuracy, f1, precision, recall = self.evaluate(X_data, Y_data)
        print(f"Metrics - {self.__class__.__name__}:\n"
              f"  Accuracy: {accuracy:.4f}\n"
              f"  F1 Score: {f1:.4f}\n"
              f"  Precision: {precision:.4f}\n"
              f"  Recall: {recall:.4f}")


class DecisionTreeModel(BaseModel):
    def __init__(self):
        self.model = DecisionTreeClassifier()

    def train(self, X_data, Y_data):
        self.model.fit(X_data, Y_data)

    def predict(self, X_data):
        return self.model.predict(X_data)


class DenseNNModel(BaseModel):
    def __init__(self):
        self.model = None

    def train(self, X_data, Y_data):
        Y_data = to_categorical(Y_data)
        self.model = Sequential([
            Flatten(),
            Dense(256, activation='relu'),
            Dense(256, activation='relu'),
            Dense(2, activation='relu'),
            Dense(Y_data.shape[-1], activation='softmax')
        ])
        self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        self.model.fit(X_data, Y_data, epochs=10, batch_size=32, verbose=0)

    def predict(self, X_data):
        y_pred = self.model.predict(X_data, verbose=0)
        return np.argmax(y_pred, axis=1)

### Put it all together

In [21]:
datasets = datasets_to_numpy(datasets)
X_data, Y_data = window_data(datasets)
X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.2, random_state=42)

# Flatten the arrays for sklearn
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

X_train, X_test, Y_train, Y_test = normalize(X_train, X_test, Y_train, Y_test)

# Decision Tree
dt_model = DecisionTreeModel()
dt_model.train(X_train, Y_train)
dt_model.print_metrics(X_test, Y_test)

# Dense Neural Network
nn_model = DenseNNModel()
nn_model.train(X_train, Y_train)
nn_model.print_metrics(X_test, Y_test)

Metrics - DecisionTreeModel:
  Accuracy: 0.9333
  F1 Score: 0.9327
  Precision: 0.9407
  Recall: 0.9333
Metrics - DenseNNModel:
  Accuracy: 1.0000
  F1 Score: 1.0000
  Precision: 1.0000
  Recall: 1.0000


#### Quantize NN

In order to save space, speed up compute and reduce RAM usage quantization can be applied:
See: https://www.tensorflow.org/model_optimization/guide/quantization/post_training

In [22]:
nn = nn_model.model

# Convert to tflite
converter = tf.lite.TFLiteConverter.from_keras_model(nn)
tflite_model = converter.convert()
print("Non-quanized model len: ", len(tflite_model))

# Convert with quantization
converter = tf.lite.TFLiteConverter.from_keras_model(nn)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quant_model = converter.convert()
print("Quanized model len: ", len(tflite_quant_model))

print("Compression ratio: ", len(tflite_model) / len(tflite_quant_model))


INFO:tensorflow:Assets written to: /var/folders/c7/01xzhz690mj0sss9z1vdfwqc0000gn/T/tmptcip8y5a/assets


INFO:tensorflow:Assets written to: /var/folders/c7/01xzhz690mj0sss9z1vdfwqc0000gn/T/tmptcip8y5a/assets


Saved artifact at '/var/folders/c7/01xzhz690mj0sss9z1vdfwqc0000gn/T/tmptcip8y5a'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 180), dtype=tf.float32, name='keras_tensor_6')
Output Type:
  TensorSpec(shape=(None, 2), dtype=tf.float32, name=None)
Captures:
  13011985296: TensorSpec(shape=(), dtype=tf.resource, name=None)
  13011987600: TensorSpec(shape=(), dtype=tf.resource, name=None)
  13011987984: TensorSpec(shape=(), dtype=tf.resource, name=None)
  13011988944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  13011989328: TensorSpec(shape=(), dtype=tf.resource, name=None)
  13011990288: TensorSpec(shape=(), dtype=tf.resource, name=None)
  13011989712: TensorSpec(shape=(), dtype=tf.resource, name=None)
  13011991056: TensorSpec(shape=(), dtype=tf.resource, name=None)
Non-quanized model len:  453312
INFO:tensorflow:Assets written to: /var/folders/c7/01xzhz690mj0sss9z1vdfwqc0000gn/T/tmpf5tz0f7o/assets


W0000 00:00:1733840821.704671 4200472 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1733840821.704687 4200472 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2024-12-10 15:27:01.704823: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/c7/01xzhz690mj0sss9z1vdfwqc0000gn/T/tmptcip8y5a
2024-12-10 15:27:01.705229: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2024-12-10 15:27:01.705235: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /var/folders/c7/01xzhz690mj0sss9z1vdfwqc0000gn/T/tmptcip8y5a
2024-12-10 15:27:01.708578: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2024-12-10 15:27:01.729956: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /var/folders/c7/01xzhz690mj0sss9z1vdfwqc0000gn/T/tmptcip8y5a
2024-12-10 15:27:01.736743: I tensorflow/cc/saved_model/loader.cc:

Saved artifact at '/var/folders/c7/01xzhz690mj0sss9z1vdfwqc0000gn/T/tmpf5tz0f7o'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 180), dtype=tf.float32, name='keras_tensor_6')
Output Type:
  TensorSpec(shape=(None, 2), dtype=tf.float32, name=None)
Captures:
  13011985296: TensorSpec(shape=(), dtype=tf.resource, name=None)
  13011987600: TensorSpec(shape=(), dtype=tf.resource, name=None)
  13011987984: TensorSpec(shape=(), dtype=tf.resource, name=None)
  13011988944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  13011989328: TensorSpec(shape=(), dtype=tf.resource, name=None)
  13011990288: TensorSpec(shape=(), dtype=tf.resource, name=None)
  13011989712: TensorSpec(shape=(), dtype=tf.resource, name=None)
  13011991056: TensorSpec(shape=(), dtype=tf.resource, name=None)


W0000 00:00:1733840821.897145 4200472 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1733840821.897165 4200472 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2024-12-10 15:27:01.897278: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/c7/01xzhz690mj0sss9z1vdfwqc0000gn/T/tmpf5tz0f7o
2024-12-10 15:27:01.897661: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2024-12-10 15:27:01.897666: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /var/folders/c7/01xzhz690mj0sss9z1vdfwqc0000gn/T/tmpf5tz0f7o
2024-12-10 15:27:01.900971: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2024-12-10 15:27:01.922081: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /var/folders/c7/01xzhz690mj0sss9z1vdfwqc0000gn/T/tmpf5tz0f7o
2024-12-10 15:27:01.928804: I tensorflow/cc/saved_model/loader.cc:

Quanized model len:  124696
Compression ratio:  3.635337139924296
