In [1]:
import pandas as pd

# Csak O
def load_and_process_measurements(csv_path: str) -> pd.DataFrame:
    df = pd.read_csv(csv_path)

    df['label'] = -1
    current_label = -1
    in_measurement = False

    for idx, row in df.iterrows():
        if (row == 'O').any():
            current_label += 1
            in_measurement = True
        if in_measurement:
            df.at[idx, 'label'] = current_label
        else:
            df.at[idx, 'label'] = -1

    df = df[~df.apply(
        lambda row: row.astype(str).eq('X').any() or row.astype(str).eq('O').any(), axis=1)]

    df = df[df['label'] != -1]
    df.reset_index(inplace=True, drop=True)

    df = df.astype(float)

    df['time'] = df['Timestamp'] - df['Timestamp'].iloc[0]

    df['relative_time'] = df.groupby('label')['Timestamp'].transform(lambda x: x - x.iloc[0])

    df = df.drop(columns=['Timestamp'])

    return df


In [2]:
import pandas as pd

# X O
def load_and_process_measurements2(csv_path: str) -> pd.DataFrame:
    df = pd.read_csv(csv_path)

    df['label'] = -1
    current_label = -1
    in_measurement = False

    for idx, row in df.iterrows():
        if (row == 'X').any():
            current_label += 1
            in_measurement = True
            df.at[idx, 'label'] = current_label
        elif (row == 'O').any():
            df.at[idx, 'label'] = current_label
            in_measurement = False
        elif in_measurement:
            df.at[idx, 'label'] = current_label
        else:
            df.at[idx, 'label'] = -1

    df = df[~df.apply(
        lambda row: row.astype(str).eq('X').any() or row.astype(str).eq('O').any(), axis=1)]

    df = df[df['label'] != -1]
    df.reset_index(inplace=True, drop=True)

    df = df.astype(float)

    df['time'] = df['Timestamp'] - df['Timestamp'].iloc[0]

    df['relative_time'] = df.groupby('label')['Timestamp'].transform(lambda x: x - x.iloc[0])

    df = df.drop(columns=['Timestamp'])

    return df


In [3]:
correct = load_and_process_measurements(r'correct_evezesek3.csv')
print('Correct circles length:', correct.shape[0], 'Number of measurements:', correct['label'].nunique(), 'Avg measurement length:', correct.shape[0] / correct['label'].nunique())
print('Longest measuremet:', correct['label'].value_counts().max(), 'Shortest:', correct['label'].value_counts().min())
correct.head()

Correct circles length: 4699 Number of measurements: 139 Avg measurement length: 33.805755395683455
Longest measuremet: 71 Shortest: 16


Unnamed: 0,linear_accelerationX,linear_accelerationY,linear_accelerationZ,magX,magY,magZ,gyroX,gyroY,gyroZ,accX,accY,accZ,label,time,relative_time
0,-0.49534,-0.240406,-0.489004,40.148926,7.897949,-52.490234,55.969238,43.945312,2.441406,0.376709,-0.005981,-0.542114,0.0,0.0,0.0
1,-0.468664,-0.20834,-0.756682,41.85791,7.775879,-48.010254,36.376953,69.335938,20.629883,0.395508,0.022583,-0.82251,0.0,0.045399,0.045399
2,-0.379162,0.045053,-1.139748,41.943359,5.725098,-43.017578,-8.728027,109.558105,38.757324,0.478638,0.276733,-1.224731,0.0,0.090187,0.090187
3,0.146213,0.182609,-0.921982,42.163086,1.721191,-36.535645,29.846191,152.46582,70.3125,1.00647,0.417358,-1.022461,0.0,0.135009,0.135009
4,1.387541,0.082828,-1.032558,40.551758,-2.331543,-28.564453,-67.687988,174.499512,91.003418,2.271118,0.31897,-1.150391,0.0,0.180028,0.180028


In [4]:
incorrect = load_and_process_measurements(r'david_szar_evezesei2.csv')
print('Incorrect circles length:', incorrect.shape[0], 'Number of measurements:', incorrect['label'].nunique(), 'Avg measurement length:', incorrect.shape[0] / incorrect['label'].nunique())
print('Longest measuremet:', incorrect['label'].value_counts().max(), 'Shortest:', incorrect['label'].value_counts().min())
incorrect.head()

Incorrect circles length: 3547 Number of measurements: 63 Avg measurement length: 56.301587301587304
Longest measuremet: 692 Shortest: 23


Unnamed: 0,linear_accelerationX,linear_accelerationY,linear_accelerationZ,magX,magY,magZ,gyroX,gyroY,gyroZ,accX,accY,accZ,label,time,relative_time
0,-0.06246,0.227142,0.127306,41.784668,-2.13623,-16.19873,55.908203,22.338867,7.385254,0.865845,0.054443,0.112183,0.0,0.0,0.0
1,0.039296,0.002371,0.163859,41.503906,-2.954102,-16.149902,38.818359,18.676758,11.108398,0.968262,-0.170288,0.151489,0.0,0.044976,0.044976
2,0.009234,0.103056,0.064508,41.784668,-3.527832,-16.784668,27.832031,13.85498,12.573242,0.938354,-0.067871,0.053223,0.0,0.090005,0.090005
3,-0.117454,0.101833,-0.042925,40.856934,-3.967285,-18.212891,26.733398,11.108398,13.61084,0.809692,-0.067383,-0.054932,0.0,0.135043,0.135043
4,0.044397,0.163297,-0.043656,40.820312,-4.223633,-17.858887,58.59375,9.887695,13.793945,0.97229,-0.003174,-0.056396,0.0,0.179982,0.179982


In [5]:
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

def prepare_sequence_data(df: pd.DataFrame, label_value: int, max_len: int = None):
    feature_cols = df.columns.difference(['label', 'time'])
    sequences = []
    labels = []
    
    grouped = df.groupby('label')

    for _, group in grouped:
        seq = group[feature_cols].values
        sequences.append(seq)
        labels.append(label_value)

    if max_len is None:
        max_len = max(len(seq) for seq in sequences)

    sequences_padded = pad_sequences(sequences, maxlen=max_len, padding='post', dtype='float32')
    labels = np.array(labels)

    return sequences_padded, labels





In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, GlobalMaxPooling1D, Dense, Dropout, Masking

X_correct, y_correct = prepare_sequence_data(correct, label_value=1)
X_incorrect, y_incorrect = prepare_sequence_data(incorrect, label_value=0, max_len=X_correct.shape[1])

X = np.concatenate([X_correct, X_incorrect], axis=0)
y = np.concatenate([y_correct, y_incorrect], axis=0)

import numpy as np

test_size = 0.2

indices = np.arange(len(X))
np.random.shuffle(indices)

split_index = int(len(X) * (1 - test_size))

X_train, X_test = X[indices[:split_index]], X[indices[split_index:]]
y_train, y_test = y[indices[:split_index]], y[indices[split_index:]]


model = Sequential([
    Masking(mask_value=0., input_shape=(X.shape[1], X.shape[2])),
    Conv1D(64, kernel_size=5, activation='relu'),
    GlobalMaxPooling1D(),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()




Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 masking (Masking)           (None, 71, 13)            0         
                                                                 
 conv1d (Conv1D)             (None, 67, 64)            4224      
                                                                 
 global_max_pooling1d (Glob  (None, 64)                0         
 alMaxPooling1D)                                                 
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense (Dense)               (None, 32)                2080      
                                                                 
 dense_1 (Dense)             (None, 1)                 33        
                                                      

In [8]:
from tensorflow.keras.callbacks import ModelCheckpoint

ch = ModelCheckpoint(
    'stroke_modelp3_9_21.h5',
    monitor='val_accuracy',
    verbose=0,
    save_best_only=True,
    mode='auto'
)

In [9]:
model.fit(X_train, y_train, epochs=20, batch_size=8, validation_split=0.2, callbacks=[ch])

loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.2f}")

Epoch 1/20


Epoch 2/20
Epoch 3/20
 1/16 [>.............................] - ETA: 0s - loss: 6.9077 - accuracy: 0.7500

  saving_api.save_model(


Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test Accuracy: 0.98


In [11]:
from tensorflow.keras.models import load_model

best_model = load_model('stroke_modelp3_9_21.h5')

loss, acc = best_model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.2f}")

Test Accuracy: 0.93
