In [19]:
import os
import shutil
import pandas as pd
from pydantic import BaseModel, Field
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [27]:
class LSTMConfig(BaseModel):
    feature_columns: list[str]
    label_column: str
    scaled_labels: bool
    n_steps: int = Field(default=50, ge=1)
    batch_size: int = Field(default=32, ge=1)
    epochs: int = Field(default=20, ge=1)
    test_size: float = Field(default=0.2, ge=0, le=1)
    checkpoint_path: str = Field(default="")


class TimeSeriesLSTM:

    def __init__(self, config: LSTMConfig):
        self.config = config
        self.model: Model = None
        self.build_model()

    def preprocess_data(self, dataframe: pd.DataFrame, train_data=True):
        scaler = MinMaxScaler()
        scaled_data = scaler.fit_transform(dataframe[self.config.feature_columns])
        if train_data:
            if self.config.scaled_labels:
                labels = dataframe[self.config.label_column] / dataframe[self.config.label_column].max()
            else:
                labels = dataframe[self.config.label_column].values

        X, y = [], []
        for i in range(len(scaled_data) - self.config.n_steps):
            X.append(scaled_data[i:i + self.config.n_steps])
            if train_data:
                y.append(labels[i + self.config.n_steps])

        if train_data:
            return np.array(X), np.array(y)
        return np.array(X)

    def load_model(self, path):
        self.model = tf.keras.models.load_model(path)
        print("Model loaded!")
        
    def save_model(self, path):
        self.model.save(path)
        print("Model saved!")

    def build_model(self):
        self.model = Sequential()
        self.model.add(Input((self.config.n_steps, len(self.config.feature_columns)), dtype=tf.float32, name='input_1'))
        self.model.add(Dense(32, activation="relu", dtype=tf.float32))
        self.model.add(Dropout(0.5))
        self.model.add(LSTM(64, return_sequences=False, dtype=tf.float32))
        self.model.add(Dense(1, activation="linear", dtype=tf.float32))
        self.model.compile(optimizer='adam', loss='mean_squared_error')

    def train(self, dataframe: pd.DataFrame):
        X, y = self.preprocess_data(dataframe)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=self.config.test_size, random_state=69)

        self.model.fit(X_train, y_train, epochs=self.config.epochs, batch_size=self.config.batch_size)

        loss = self.model.evaluate(X_test, y_test)
        print(f"Loss: {loss}")

    def predict(self, input_data: pd.DataFrame):
        X = self.preprocess_data(input_data, train_data=False)
        #X = X.reshape((1, self.config.n_steps, len(self.config.feature_columns)))
        return self.model.predict(X)


def generate_representative_data(data: pd.DataFrame, config: LSTMConfig):
        X = []
        
        for i in range(len(data) - config.n_steps + 1):
            X.append(data.iloc[i:i + config.n_steps].values)  # Hol dir die Zeitfenster
        
        X = np.array(X)

        for sample in X:
            sample = sample.reshape(1, config.n_steps, len(config.feature_columns))  # Form anpassen
            yield {'input_1': sample}

def convert_model_to_tflite(keras_path, tflite_path, representative_dataset, config: LSTMConfig): 
    print("Creating temp SavedModel!")
    keras_model = tf.keras.models.load_model(keras_path)
    keras_model.export("tmp")
    
    print("Converting SavedModel to tflite!")
    converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir="tmp")
    converter.optimizations = {tf.lite.Optimize.DEFAULT}
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS , tf.lite.OpsSet.SELECT_TF_OPS]
    converter.inference_input_type = tf.float32
    converter.inference_output_type = tf.float32

    # Wird benötigt für den Quantizierungs-Prozess
    converter.representative_dataset = lambda: generate_representative_data(representative_dataset, config)
    
    tflite_model = converter.convert()
    with open(tflite_path, 'wb') as f:
        f.write(tflite_model)
        
    print("Removing temp SavedModel!")
    shutil.rmtree("tmp")
    print("Finished!")

# Datensatz

### Es wird ein zufälliger Datensatz erzeugt

In [7]:
# Zufällige Datensätze erzeugen
train_dates = pd.date_range(start='2020-01-01', periods=10000)
pred_dates = pd.date_range(start='2021-01-01', periods=100)

train_data = {
    'feature1': np.random.rand(10000).astype(np.float32),
    'feature2': np.random.rand(10000).astype(np.float32),
    'feature3': np.random.rand(10000).astype(np.float32),
    'label': np.random.rand(10000).astype(np.float32)
}

pred_data = {
    'feature1': np.random.rand(100).astype(np.float32),
    'feature2': np.random.rand(100).astype(np.float32),
    'feature3': np.random.rand(100).astype(np.float32),
}

df_train = pd.DataFrame(train_data, index=train_dates)
df_pred = pd.DataFrame(pred_data, index=pred_dates)

# Model Initialisierung

### Das Model wird mit gegebener Konfiguration initialisiert

In [4]:
# Konfiguration erstellen und Modell instanziieren
config = LSTMConfig(
    feature_columns=['feature1', 'feature2', 'feature3'],
    label_column='label',
    scaled_labels=False,
    n_steps=25,
    batch_size=64,
    epochs=10,
)
ts_lstm = TimeSeriesLSTM(config=config)

I0000 00:00:1728993761.772353   59162 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-10-15 14:02:41.772674: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2343] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


### Trainings-Step

In [33]:
# Start des Trainings
ts_lstm.train(dataframe=df_train)

Epoch 1/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0825
Epoch 2/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0832
Epoch 3/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0826
Epoch 4/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0828
Epoch 5/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0823
Epoch 6/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0832
Epoch 7/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0820
Epoch 8/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0817
Epoch 9/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0842
Epoch 10/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - lo

### Prediction Step

In [22]:
# Modell Predictions erzeugen

pred_results = ts_lstm.predict(df_pred)

df_pred_no_time = pd.DataFrame(pred_data)
for index, row in df_pred_no_time.iterrows():
    if index < config.n_steps:
        print("F:", row['feature1'], row['feature2'], row['feature3'], " P:", "Not enough data")
    else:
        print("F:", row['feature1'], row['feature2'], row['feature3'], " P:", pred_results[index-config.n_steps])

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
F: 0.51403713 0.19882914 0.5911012  P: Not enough data
F: 0.11934123 0.40530437 0.89096767  P: Not enough data
F: 0.38564697 0.29791084 0.19773565  P: Not enough data
F: 0.6703891 0.71410924 0.5589309  P: Not enough data
F: 0.6856324 0.26695275 0.604667  P: Not enough data
F: 0.37407333 0.5748829 0.76460654  P: Not enough data
F: 0.045838956 0.85503596 0.37089372  P: Not enough data
F: 0.11443184 0.062243324 0.710078  P: Not enough data
F: 0.35948795 0.42584494 0.7620492  P: Not enough data
F: 0.5214783 0.36130518 0.88608074  P: Not enough data
F: 0.9309409 0.4325166 0.7721853  P: Not enough data
F: 0.5279233 0.8800477 0.5392751  P: Not enough data
F: 0.33663088 0.6341414 0.3659117  P: Not enough data
F: 0.8563349 0.4537363 0.38099846  P: Not enough data
F: 0.7088956 0.10979994 0.4552031  P: Not enough data
F: 0.42051426 0.9532006 0.94734293  P: Not enough data
F: 0.62419176 0.5616601 0.22580338  P: Not enough data

# Modell Speichern, Laden und Konvertieren

In [34]:
ts_lstm.save_model("TestModel.keras")

Model saved!


In [29]:
# Lade das Modell aus der TF-Lite Datei
ts_lstm.load_model("TestModel.keras")

Model loaded!


In [35]:
# Konvertiere das Keras-Modell in ein tflite-Modell
convert_model_to_tflite("TestModel.keras", "TestModel.tflite", df_pred, config)

Creating temp SavedModel!
INFO:tensorflow:Assets written to: tmp/assets


INFO:tensorflow:Assets written to: tmp/assets


Saved artifact at 'tmp'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 25, 3), dtype=tf.float32, name='input_1')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  133969986470656: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133969986481920: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133969987150848: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133969987152256: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133969987154016: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133969986472064: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133970011606528: TensorSpec(shape=(), dtype=tf.resource, name=None)
Converting SavedModel to tflite!


W0000 00:00:1728994635.508054   59162 tf_tfl_flatbuffer_helpers.cc:392] Ignored output_format.
W0000 00:00:1728994635.508067   59162 tf_tfl_flatbuffer_helpers.cc:395] Ignored drop_control_dependency.
2024-10-15 14:17:15.508205: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: tmp
2024-10-15 14:17:15.508665: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2024-10-15 14:17:15.508674: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: tmp
2024-10-15 14:17:15.513278: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2024-10-15 14:17:15.537195: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: tmp
2024-10-15 14:17:15.545987: I tensorflow/cc/saved_model/loader.cc:462] SavedModel load for tags { serve }; Status: success: OK. Took 37784 microseconds.
2024-10-15 14:17:15.589835: W tensorflow/compiler/mlir/lite/flatbuffer_export.cc:346

Removing temp SavedModel!
Finished!


fully_quantize: 0, inference_type: 6, input_inference_type: FLOAT32, output_inference_type: FLOAT32


In [51]:
df_train = pd.read_json("data/4_squashed_format/2024-08-21.json", dtype=np.float32)
df_train = df_train.dropna()

feature_columns = [
    "movementData",
    #"sleepLevels",
    #"restlessMoments",
    "heartrate",
    #"stress",
    #"bodyBattery",
    #"hrv"
]

label_column = 'sleepLevels'

config = LSTMConfig(
    feature_columns=feature_columns,
    label_column=label_column,
    n_steps=50,
    batch_size=32,
    epochs=1,
    scaled_labels=True,
    checkpoint_path="models/chkp.keras")
ts_lstm = TimeSeriesLSTM(config=config)
ts_lstm.train(dataframe=df_train[feature_columns + [label_column]])

inputs = np.ones((1,50,2)) * 0.1

print("Inputs:", inputs, "|", "Output:", ts_lstm.predict(inputs))

ts_lstm.save_tflite_model("models/tfLiteModel.tflite", dataset=df_train[feature_columns])

  df_train = pd.read_json("data/4_squashed_format/2024-08-21.json", dtype=np.float32)


No checkpoint found!


  y.append(labels[i + self.config.n_steps])


[1m743/744[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - loss: 0.0316
Epoch 1: saving model to models/chkp.keras
[1m744/744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - loss: 0.0316
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0262
Loss: 0.026939673349261284
INFO:tensorflow:Assets written to: testModel/assets


INFO:tensorflow:Assets written to: testModel/assets


Saved artifact at 'testModel/'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 50, 2), dtype=tf.float32, name='input_1')
Output Type:
  TensorSpec(shape=(None, 50, 1), dtype=tf.float32, name=None)
Captures:
  130926510587072: TensorSpec(shape=(), dtype=tf.resource, name=None)
  130926510587248: TensorSpec(shape=(), dtype=tf.resource, name=None)
  130926510582496: TensorSpec(shape=(), dtype=tf.resource, name=None)
  130926511002848: TensorSpec(shape=(), dtype=tf.resource, name=None)
  130926511003728: TensorSpec(shape=(), dtype=tf.resource, name=None)


W0000 00:00:1728568957.471736   21587 tf_tfl_flatbuffer_helpers.cc:392] Ignored output_format.
W0000 00:00:1728568957.471755   21587 tf_tfl_flatbuffer_helpers.cc:395] Ignored drop_control_dependency.
2024-10-10 16:02:37.471911: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: testModel/
2024-10-10 16:02:37.472380: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2024-10-10 16:02:37.472390: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: testModel/
2024-10-10 16:02:37.476396: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2024-10-10 16:02:37.495943: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: testModel/
2024-10-10 16:02:37.503940: I tensorflow/cc/saved_model/loader.cc:462] SavedModel load for tags { serve }; Status: success: OK. Took 32032 microseconds.
2024-10-10 16:02:37.542021: W tensorflow/compiler/mlir/lite/fla

# Utils

In [None]:
def list_all_files(path):
    try:
        files = []

        for f in os.listdir(path):
            filepath = os.path.join(path, f)
            if os.path.isfile(filepath):  # Prüft, ob es sich um eine Datei handelt
                files.append(f)

        return files
    except FileNotFoundError:
        print(f"Das Verzeichnis '{path}' wurde nicht gefunden.")
        return []