In [9]:
import os
import shutil
import flatbuffers
import pandas as pd
from pydantic import BaseModel, Field
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input, Concatenate, Normalization
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

from tflite_support import metadata_schema_py_generated as _metadata_fb
from tflite_support import metadata as _metadata

In [64]:
class LSTMConfig(BaseModel):
    feature_columns: list[str]
    label_names: list[str]
    scaled_labels: bool
    n_steps: int = Field(default=50, ge=1)
    batch_size: int = Field(default=32, ge=1)
    epochs: int = Field(default=20, ge=1)
    test_size: float = Field(default=0.2, ge=0, le=1)
    checkpoint_path: str = Field(default="")


class TimeSeriesLSTM:

    def __init__(self, config: LSTMConfig):
        self.config = config
        self.model: Model = None

    def _create_segments(self, data: pd.Series, n_steps):
        num_samples = len(data) - n_steps + 1
        return np.array([data[i:i + n_steps] for i in range(num_samples)]).astype(np.float32)
    
    
    def dataframe_to_pred_data(self, df_prediction: pd.DataFrame):
        features = self.config.feature_columns
        n_steps = self.config.n_steps
        
        X = []
        # Create Time Series in Timestamp
        segmented_data = self._create_segments(df_prediction.index.asi8, n_steps)
        X.append(segmented_data[:, :, np.newaxis])
        
        for feature in features:
            segmented_data = self._create_segments(df_prediction[feature], n_steps)
            X.append(segmented_data[:, :, np.newaxis])
        
        return np.concatenate(X, axis=2)
    

    def dataframe_to_train_data(self, df_training: pd.DataFrame):
        features = self.config.feature_columns
        labels = self.config.label_names
        n_steps = self.config.n_steps
        
        X = self.dataframe_to_pred_data(df_training[features])
        
        label_arrays = [self._create_segments(df_training[label], n_steps)[:, -1] for label in labels]
        y = np.stack(label_arrays, axis=1)
        
        return X, y

    def load_model(self, path):
        self.model = tf.keras.models.load_model(path)
        print("Model loaded!")
        
    def save_model(self, path):
        self.model.save(path)
        print("Model saved!")

    def build_model(self, adaptation_data):
        inputs = []
        lstm_outputs = []
        
        for feature_idx in range(len(self.config.feature_columns)+1):
            if feature_idx > 0:
                feature_name = self.config.feature_columns[feature_idx-1]
            else:
                feature_name = "Time"
            
            input_tensor = Input(shape=(self.config.n_steps, 1), dtype=tf.float32, name=f'input_{feature_name}')
            inputs.append(input_tensor)
            normalization = Normalization()
            normalization.adapt(adaptation_data)
            lstm = LSTM(64, return_sequences=False, dtype=tf.float32)
            lstm_outputs.append(lstm(normalization(input_tensor)))

        merged = Concatenate(axis=1)(lstm_outputs)
        
        x = Dense(64, activation='relu')(merged)
        output = Dense(len(self.config.label_names))(x)
        
        self.model = Model(inputs=inputs, outputs=output)
        self.model.compile(optimizer=Adam(), loss='mean_squared_error', metrics=['accuracy'])
        print(self.model.summary())

    def train(self, dataframe: pd.DataFrame):
        X, y = self.dataframe_to_train_data(dataframe)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=self.config.test_size, random_state=69)
        
        X_train_split = np.split(X_train, indices_or_sections=3, axis=2)
        X_test_split = np.split(X_test, indices_or_sections=3, axis=2)

        X_train_squeeze = [arr.squeeze(axis=2) for arr in X_train_split]
        X_test_squeeze = [arr.squeeze(axis=2) for arr in X_test_split]

        self.model.fit(X_train_squeeze, y_train, epochs=self.config.epochs, batch_size=self.config.batch_size)
        loss = self.model.evaluate(X_test_squeeze, y_test)
        
        print(f"Loss: {loss}")

    def predict(self, input_data: pd.DataFrame):
        X = self.dataframe_to_pred_data(input_data)       
        X_split = np.split(X, indices_or_sections=3, axis=2)
        X_squeeze = [arr.squeeze(axis=2) for arr in X_split]
        return self.model.predict(X_squeeze)


def generate_representative_data(data: pd.DataFrame, config: LSTMConfig):
    X = []
    
    for i in range(len(data) - config.n_steps + 1):
        X.append(data.iloc[i:i + config.n_steps].values)  # Hol dir die Zeitfenster
    
    X = np.array(X)       

    for i, sample in enumerate(X):
        sample = sample.reshape(1, config.n_steps, 1)  # Form anpassen
        yield {f'input_{config.feature_columns[i]}': sample}

def convert_model_to_tflite(keras_path, tflite_path, representative_dataset, config: LSTMConfig): 
    print("Creating temp SavedModel!")
    keras_model = tf.keras.models.load_model(keras_path)
    keras_model.export("tmp")
    
    print("Converting SavedModel to tflite!")
    converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir="tmp")
    converter.optimizations = {tf.lite.Optimize.DEFAULT}
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS , tf.lite.OpsSet.SELECT_TF_OPS]
    converter.inference_input_type = tf.float32
    converter.inference_output_type = tf.float32

    # Wird benötigt für den Quantizierungs-Prozess
    converter.representative_dataset = lambda: generate_representative_data(representative_dataset, config)
    
    tflite_model = converter.convert()
    with open(tflite_path, 'wb') as f:
        f.write(tflite_model)
        
    print("Removing temp SavedModel!")
    shutil.rmtree("tmp")
    print("Finished!")

# Datensatz

### Es wird ein zufälliger Datensatz erzeugt

In [16]:
# Zufällige Datensätze erzeugen
train_dates = pd.date_range(start='2020-01-01', periods=10000)
pred_dates = pd.date_range(start='2021-01-01', periods=100)

train_labels = np.random.randint(low=0, high=3, size=10000)

train_data = {
    'feature1': np.random.rand(10000).astype(np.float32),
    'feature2': np.random.rand(10000).astype(np.float32),
    'feature3': np.random.rand(10000).astype(np.float32),
    'sleep': np.array([1 if lb == 0 else 0 for lb in train_labels]).astype(np.float32),
    'awake': np.array([1 if lb == 1 else 0 for lb in train_labels]).astype(np.float32),
    'rem': np.array([1 if lb == 2 else 0 for lb in train_labels]).astype(np.float32),
    'light': np.array([1 if lb == 3 else 0 for lb in train_labels]).astype(np.float32)
}

pred_data = {
    'feature1': np.random.rand(100).astype(np.float32),
    'feature2': np.random.rand(100).astype(np.float32),
    'feature3': np.random.rand(100).astype(np.float32),
}

df_train = pd.DataFrame(train_data, index=train_dates)
df_pred = pd.DataFrame(pred_data, index=pred_dates)

# Model Initialisierung

### Das Model wird mit gegebener Konfiguration initialisiert

In [3]:
# Konfiguration erstellen und Modell instanziieren
config = LSTMConfig(
    feature_columns=['time', 'movement', 'heartrate'],
    label_names=['sleeplabel'],
    scaled_labels=False,
    n_steps=25,
    batch_size=64,
    epochs=10,
)
ts_lstm = TimeSeriesLSTM(config=config)

2025-01-10 14:25:45.335999: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_time (InputLayer)     [(None, 25, 1)]              0         []                            
                                                                                                  
 input_movement (InputLayer  [(None, 25, 1)]              0         []                            
 )                                                                                                
                                                                                                  
 input_heartrate (InputLaye  [(None, 25, 1)]              0         []                            
 r)                                                                                               
                                                                                              

### Trainings-Step

In [26]:
# Start des Trainings
ts_lstm.train(dataframe=df_train)

AttributeError: 'LSTMConfig' object has no attribute 'label_column'

### Prediction Step

In [22]:
# Modell Predictions erzeugen

pred_results = ts_lstm.predict(df_pred)

df_pred_no_time = pd.DataFrame(pred_data)
for index, row in df_pred_no_time.iterrows():
    if index < config.n_steps:
        print("F:", row['feature1'], row['feature2'], row['feature3'], " P:", "Not enough data")
    else:
        print("F:", row['feature1'], row['feature2'], row['feature3'], " P:", pred_results[index-config.n_steps])

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
F: 0.51403713 0.19882914 0.5911012  P: Not enough data
F: 0.11934123 0.40530437 0.89096767  P: Not enough data
F: 0.38564697 0.29791084 0.19773565  P: Not enough data
F: 0.6703891 0.71410924 0.5589309  P: Not enough data
F: 0.6856324 0.26695275 0.604667  P: Not enough data
F: 0.37407333 0.5748829 0.76460654  P: Not enough data
F: 0.045838956 0.85503596 0.37089372  P: Not enough data
F: 0.11443184 0.062243324 0.710078  P: Not enough data
F: 0.35948795 0.42584494 0.7620492  P: Not enough data
F: 0.5214783 0.36130518 0.88608074  P: Not enough data
F: 0.9309409 0.4325166 0.7721853  P: Not enough data
F: 0.5279233 0.8800477 0.5392751  P: Not enough data
F: 0.33663088 0.6341414 0.3659117  P: Not enough data
F: 0.8563349 0.4537363 0.38099846  P: Not enough data
F: 0.7088956 0.10979994 0.4552031  P: Not enough data
F: 0.42051426 0.9532006 0.94734293  P: Not enough data
F: 0.62419176 0.5616601 0.22580338  P: Not enough data

# Modell Speichern, Laden und Konvertieren

In [7]:
ts_lstm.save_model("TestModel.keras")

Model saved!


In [16]:
# Lade das Modell aus der TF-Lite Datei
ts_lstm.load_model("TestModel.keras")

Model loaded!


In [26]:

class ModelSpecificInfo(BaseModel):
    name: str
    version: float
    num_inputs: int
    num_outputs: int        


_MODEL_INFO = ModelSpecificInfo(
        name="Testmodell",
        version=1.0,
        num_inputs=3,
        num_outputs=2
)


def add_metadata_to_model(model_path: str, metadata: ModelSpecificInfo):
    
    model_meta = _metadata_fb.ModelMetadataT()
    model_meta.name = metadata.name
    model_meta.description = "Ein zufälliges Modell"
    
    input_meta_0 = _metadata_fb.TensorMetadataT()
    input_meta_0.name = "Time"
    input_meta_0.dimensionNames = ['sin', 'cos']
    input_meta_0.description = "Die Tageszeit an dem der Wert aufgenommen wurde"
    input_meta_0.content = _metadata_fb.ContentT()
    input_meta_0.content.contentProperties = _metadata_fb.ValueRangeT()
    input_meta_0.content.contentProperties.min = 0
    input_meta_0.content.contentProperties.max = 10
    
    input_meta_1 = _metadata_fb.TensorMetadataT()
    input_meta_1.name = "Movement"
    input_meta_1.description = "Die Bewegungsintensität gemittelt aus den Beschleunigungssensoren"
    input_meta_1.content = _metadata_fb.ContentT()
    input_meta_1.content.contentProperties = _metadata_fb.ValueRangeT()
    input_meta_1.content.contentProperties.min = 0
    input_meta_1.content.contentProperties.max = 1
    
    input_meta_2 = _metadata_fb.TensorMetadataT()
    input_meta_2.name = "Heartrate"
    input_meta_2.description = "Die Anzahl an Herzschlägen pro Minute"
    input_meta_2.content = _metadata_fb.ContentT()
    input_meta_2.content.contentProperties = _metadata_fb.ValueRangeT()
    input_meta_2.content.contentProperties.min = 0
    input_meta_2.content.contentProperties.max = 1
    
    output_meta_1 = _metadata_fb.TensorMetadataT()
    output_meta_1.name = "Sleeplabel"
    output_meta_1.description = "Die aktuelle Schlafphase"
    

    # Creates subgraph info.
    subgraph = _metadata_fb.SubGraphMetadataT()
    subgraph.inputTensorMetadata = [input_meta_0, input_meta_1,input_meta_2]
    subgraph.outputTensorMetadata = [output_meta_1]
    model_meta.subgraphMetadata = [subgraph]
    
    b = flatbuffers.Builder(0)
    
    packed = model_meta.Pack(b)
    
    b.Finish(
        packed,
        _metadata.MetadataPopulator.METADATA_FILE_IDENTIFIER
    )
    metadata_buffer = b.Output()
    populator = _metadata.MetadataPopulator.with_model_file(model_path)
    populator.load_metadata_buffer(metadata_buffer)
    populator.populate()
    
add_metadata_to_model("TestModel.tflite", _MODEL_INFO)

ValueError: The number of input tensors (1) should match the number of input tensor metadata (3)

In [18]:
# Konvertiere das Keras-Modell in ein tflite-Modell
convert_model_to_tflite("TestModel.keras", "TestModel.tflite", df_pred, config)

Creating temp SavedModel!
INFO:tensorflow:Assets written to: tmp/assets


INFO:tensorflow:Assets written to: tmp/assets


Saved artifact at 'tmp'. The following endpoints are available:

* Endpoint 'serve'
  Args:
    args_0: float32 Tensor, shape=(None, 25, 3)
  Returns:
    float32 Tensor, shape=(None, 1)
Converting SavedModel to tflite!


2024-12-17 18:33:55.480951: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.


Removing temp SavedModel!
Finished!


2024-12-17 18:33:55.480970: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2024-12-17 18:33:55.481197: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: tmp
2024-12-17 18:33:55.481761: I tensorflow/cc/saved_model/reader.cc:91] Reading meta graph with tags { serve }
2024-12-17 18:33:55.481770: I tensorflow/cc/saved_model/reader.cc:132] Reading SavedModel debug info (if present) from: tmp
2024-12-17 18:33:55.483050: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:375] MLIR V1 optimization pass is not enabled
2024-12-17 18:33:55.483493: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2024-12-17 18:33:55.508332: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: tmp
2024-12-17 18:33:55.523818: I tensorflow/cc/saved_model/loader.cc:314] SavedModel load for tags { serve }; Status: success: OK. Took 42619 microseconds.
2024-12-17 18:33:

In [67]:
df_train = pd.read_json("data/4_squashed_format/2024-08-21.json", dtype=np.float32)
df_train = df_train.dropna()

feature_columns = [
    "movementData",
    #"sleepLevels",
    #"restlessMoments",
    "heartrate",
    #"stress",
    #"bodyBattery",
    #"hrv"
]

label_names = ['sleepLevels']

config = LSTMConfig(
    feature_columns=feature_columns,
    label_names=label_names,
    n_steps=120,
    batch_size=32,
    epochs=10,
    scaled_labels=True,
    checkpoint_path="models/chkp.keras")
ts_lstm = TimeSeriesLSTM(config=config)
ts_lstm.build_model(df_train[feature_columns])


ts_lstm.train(dataframe=df_train[feature_columns + label_names])

print("Training Completed!")

ts_lstm.save_model("models/sleepPhaseAnalyserV1.keras")

  df_train = pd.read_json("data/4_squashed_format/2024-08-21.json", dtype=np.float32)


Model: "model_30"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_Time (InputLayer)     [(None, 120, 1)]             0         []                            
                                                                                                  
 input_movementData (InputL  [(None, 120, 1)]             0         []                            
 ayer)                                                                                            
                                                                                                  
 input_heartrate (InputLaye  [(None, 120, 1)]             0         []                            
 r)                                                                                               
                                                                                           

In [None]:
df_train = pd.read_json("data/4_squashed_format/2024-08-21.json", dtype=np.float32)
df_train = df_train.dropna()

ts_lstm.train(dataframe=df_train[feature_columns + label_names])

  df_train = pd.read_json("data/4_squashed_format/2024-08-21.json", dtype=np.float32)


Epoch 1/10

# Utils

In [None]:
def list_all_files(path):
    try:
        files = []

        for f in os.listdir(path):
            filepath = os.path.join(path, f)
            if os.path.isfile(filepath):  # Prüft, ob es sich um eine Datei handelt
                files.append(f)

        return files
    except FileNotFoundError:
        print(f"Das Verzeichnis '{path}' wurde nicht gefunden.")
        return []