<a href="https://colab.research.google.com/github/prinzessinmarlenifee/SenseCap/blob/main/SenseCap_v2_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Versuch 2: Fused Excel sheets ohne sync



In [1]:
# mount drive
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
#import packages
import os
import json
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import tensorflow as tf

layers = tf.keras.layers
models = tf.keras.models


In [13]:
# --- PARAMETER ---
sampling_rate = 60       # 60 Hz nach SDI-Algorithmus
window_size = 60         # 1 Sekunde = 60 Frames
step_size = 30           # 50% Überlappung

# Basisverzeichnis (sollte die 18 Session-Ordner enthalten)
base_dir = '/content/drive/MyDrive/ML-MTB-Modell/IMU-Sessions/'


#Sessions einlesen

In [12]:
session_dirs = sorted([
    d for d in os.listdir(base_dir)
    if os.path.isdir(os.path.join(base_dir, d))
])
print(f"Gefundene Sessions: {len(session_dirs)} -> {session_dirs}")


Gefundene Sessions: 14 -> ['Session_01', 'Session_02', 'Session_03', 'Session_04', 'Session_05', 'Session_06', 'Session_07', 'Session_08', 'Session_09', 'Session_10', 'Session_11', 'Session_12', 'Session_13', 'Session_14']


Fuktionen zum LAbel-Parsing und Datei finden

In [14]:
# 5.1 parse_hot_labels: Liest die _hot.json-Datei ein, erstellt für jeden Frame ein Label
def parse_hot_labels(json_path, total_frames):
    with open(json_path, 'r') as f:
        data = json.load(f)
    entries = data['button_presses'].strip().split(';')

    label_changes = []
    for entry in entries:
        if ':' in entry:
            label, frame = entry.strip().split(':')
            label = label.strip()
            # Korrigiere evtl. "Peadling" → "Pedaling"
            if label.lower() == 'peadling':
                label = 'Pedaling'
            label_changes.append((int(frame.strip()), label))

    frame_labels = ['Unknown'] * total_frames
    for i, (start_frame, label) in enumerate(label_changes):
        end_frame = label_changes[i + 1][0] if i + 1 < len(label_changes) else total_frames
        for f in range(start_frame, min(end_frame, total_frames)):
            frame_labels[f] = label
    return frame_labels

# 5.2 find_sensor_file: Findet CSV-Datei, deren Name mit dem Prefix beginnt (Head_, Wrist_, Seat_)
def find_sensor_file(folder, prefix):
    for f in os.listdir(folder):
        if f.lower().startswith(prefix.lower()):
            return os.path.join(folder, f)
    raise FileNotFoundError(f"❌ Keine Datei mit Prefix '{prefix}' in {folder} gefunden.")

# 5.3 find_hot_file: Findet JSON-Datei, deren Name auf "_hot.json" endet
def find_hot_file(folder):
    for f in os.listdir(folder):
        if f.lower().endswith('_hot.json'):
            return os.path.join(folder, f)
    raise FileNotFoundError(f"❌ Keine Datei mit Suffix '_hot.json' in {folder} gefunden.")


6. Fensterung für drei Sensoren kombinieren

In [6]:
def window_data_multiple_sensors(head_data, wrist_data, seat_data, frame_labels):
    X_windows, y_windows = [], []
    total_frames = len(frame_labels)

    # Annahme: head_data, wrist_data, seat_data haben alle dieselbe Anzahl Zeilen = total_frames
    for start in range(0, total_frames - window_size + 1, step_size):
        end = start + window_size

        win_h = head_data[start:end]    # (window_size, 6)
        win_w = wrist_data[start:end]
        win_s = seat_data[start:end]
        window = np.concatenate([win_h, win_w, win_s], axis=1)  # → (window_size, 18)

        label_window = frame_labels[start:end]
        dominant_label = Counter(label_window).most_common(1)[0][0]
        if dominant_label == 'Unknown':
            continue

        X_windows.append(window)
        y_windows.append(dominant_label)

    return np.array(X_windows), np.array(y_windows)


In [None]:
#debug
print(f"🔍 Session {sess_dir} → Fenster: {len(X_win)}, Shape: {X_win.shape if len(X_win) > 0 else 'n/a'}")


NameError: name 'sess_dir' is not defined

#7. Daten einlesen und Fenster / Labels erzeugen
→ Nach Ausführung siehst du für jede Session etwa: “→ 153 Fenster, 3 Klassen” etc.

In [10]:
sessions_X = []
sessions_y = []
valid_sessions = []  # <- neue Liste! mit nur valid sessions
skipped_sessions = []

for sess_dir in session_dirs:
    print(f"\nLade Session: {sess_dir}")
    session_path = os.path.join(base_dir, sess_dir)

    # 7.1 Sensor-Dateien finden
    head_path  = find_sensor_file(session_path, 'Head_')
    wrist_path = find_sensor_file(session_path, 'Wrist_')
    seat_path  = find_sensor_file(session_path, 'Seat_')

    # 7.2 Hot-JSON-Datei finden
    hot_path = find_hot_file(session_path)

    # 7.3 IMU-Daten laden
    head_data  = pd.read_csv(head_path).values   # (n_frames, 6)
    wrist_data = pd.read_csv(wrist_path).values  # (n_frames, 6)
    seat_data  = pd.read_csv(seat_path).values   # (n_frames, 6)

    print(f"📊 Sensorlängen: Head={head_data.shape}, Wrist={wrist_data.shape}, Seat={seat_data.shape}")


    #7.4 Labels laden
    total_frames = head_data.shape[0]
    frame_labels = parse_hot_labels(hot_path, total_frames)

    # 7.5 ALLE gleich lang schneiden #sonst-Fehler, erstmal Sicherehitskürzung, später mit sync-datei arbeiten
    min_len = min(len(head_data), len(wrist_data), len(seat_data))
    head_data  = head_data[:min_len]
    wrist_data = wrist_data[:min_len]
    seat_data  = seat_data[:min_len]
    frame_labels = frame_labels[:min_len]



    # 7.6 Fensterung & Label-Zuweisung
    X_win, y_win = window_data_multiple_sensors(
        head_data, wrist_data, seat_data, frame_labels
    )

    # Nur Sessions mit gültigen Fenstern verwenden
    if len(X_win) == 0:
        print(f"⚠️  Session {sess_dir} übersprungen – keine gültigen Fenster.")
        continue


    print(f"\n⛔️ Übersprungene Sessions: {skipped_sessions}")

     # Nach erfolgreicher Prüfung:
    if len(X_win) == 0 or X_win.shape[1:] != (window_size, 18):
        print(f"⚠️  Session {sess_dir} übersprungen.")
        continue








Lade Session: Session_01


KeyboardInterrupt: 

In [9]:
#Debug:shape der daten anzeigen

print("\n✅ Preprocessing abgeschlossen. Shape jeder Session:")
for idx, sess in enumerate(session_dirs):
    print(f"{sess}: {sessions_X[idx].shape}")


✅ Preprocessing abgeschlossen. Shape jeder Session:


IndexError: list index out of range

#8. Leave-one-session-out: Training and Evaluation

In [None]:
all_accuracies = []
accuracy_summary = {'Session': [], 'Accuracy': []}

for test_idx in range(len(sessions_X)):
    sess_name = session_dirs[test_idx]
    print(f"\n📌 Teste auf Session (unbekannt): {sess_name} ({test_idx+1}/{len(sessions_X)})")

    # 8.1 Test-Daten definieren
    X_test = sessions_X[test_idx]
    y_test = sessions_y[test_idx]



    # 8.2 Train-Daten: alle anderen Sessions zusammenschneiden
    X_train = np.concatenate([x for i, x in enumerate(sessions_X) if i != test_idx])
    y_train = np.concatenate([y for i, y in enumerate(sessions_y) if i != test_idx])

    # 8.3 Label-Encoding (fit auf Trainingsdaten, transform auf beides)
    le = LabelEncoder()
    y_train_enc = le.fit_transform(y_train)
    y_test_enc = le.transform(y_test)

    # 8.4 Modell-Definition: CNN + LSTM
    model = models.Sequential([
        layers.Conv1D(64, 3, activation='relu', input_shape=X_train.shape[1:]),
        layers.Conv1D(64, 3, activation='relu'),
        layers.MaxPooling1D(pool_size=2),
        layers.Dropout(0.3),
        layers.LSTM(64),
        layers.Dropout(0.3),
        layers.Dense(100, activation='relu'),
        layers.Dense(len(le.classes_), activation='softmax')
    ])

    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    # 8.5 Training (mit 10 % Validierungssplit aus Trainingsdaten)
    history = model.fit(
        X_train, y_train_enc,
        validation_split=0.1,
        epochs=20,
        batch_size=32,
        verbose=0
    )

    # 8.6 Evaluation auf Test-Session
    test_loss, test_acc = model.evaluate(X_test, y_test_enc, verbose=0)
    print(f"✅ Test-Accuracy für {sess_name}: {test_acc:.2f}")
    all_accuracies.append(test_acc)
    accuracy_summary['Session'].append(sess_name)
    accuracy_summary['Accuracy'].append(test_acc)

    # 8.7 Klassifikationsbericht & Confusion Matrix
    y_pred_probs = model.predict(X_test, verbose=0)
    y_pred_classes = np.argmax(y_pred_probs, axis=1)

    print("\nKlassifikationsbericht:")
    print(classification_report(y_test_enc, y_pred_classes, target_names=le.classes_))

    cm = confusion_matrix(y_test_enc, y_pred_classes)
    disp = ConfusionMatrixDisplay(cm, display_labels=le.classes_)
    disp.plot(xticks_rotation=45)
    plt.title(f"Confusion Matrix – {sess_name}")
    plt.show()



📌 Teste auf Session (unbekannt): Session_01 (1/13)


ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 2, the array at index 0 has size 6 and the array at index 9 has size 35