In [5]:
import pandas as pd

# Csak O
def load_and_process_measurements(csv_path: str) -> pd.DataFrame:
    df = pd.read_csv(csv_path)

    df['label'] = -1
    current_label = -1
    in_measurement = False

    for idx, row in df.iterrows():
        if (row == 'O').any():
            current_label += 1
            in_measurement = True
        if in_measurement:
            df.at[idx, 'label'] = current_label
        else:
            df.at[idx, 'label'] = -1

    df = df[~df.apply(
        lambda row: row.astype(str).eq('X').any() or row.astype(str).eq('O').any(), axis=1)]

    df = df[df['label'] != -1]
    df.reset_index(inplace=True, drop=True)

    df = df.astype(float)

    df['time'] = df['Timestamp'] - df['Timestamp'].iloc[0]

    df['relative_time'] = df.groupby('label')['Timestamp'].transform(lambda x: x - x.iloc[0])

    df = df.drop(columns=['Timestamp'])

    return df


In [1]:
import pandas as pd

# X O
def load_and_process_measurements2(csv_path: str) -> pd.DataFrame:
    df = pd.read_csv(csv_path)

    df['label'] = -1
    current_label = -1
    in_measurement = False

    for idx, row in df.iterrows():
        if (row == 'X').any():
            current_label += 1
            in_measurement = True
            df.at[idx, 'label'] = current_label
        elif (row == 'O').any():
            df.at[idx, 'label'] = current_label
            in_measurement = False
        elif in_measurement:
            df.at[idx, 'label'] = current_label
        else:
            df.at[idx, 'label'] = -1

    df = df[~df.apply(
        lambda row: row.astype(str).eq('X').any() or row.astype(str).eq('O').any(), axis=1)]

    df = df[df['label'] != -1]
    df.reset_index(inplace=True, drop=True)

    df = df.astype(float)

    df['time'] = df['Timestamp'] - df['Timestamp'].iloc[0]

    df['relative_time'] = df.groupby('label')['Timestamp'].transform(lambda x: x - x.iloc[0])

    df = df.drop(columns=['Timestamp'])

    return df


In [8]:
correct = load_and_process_measurements(r'jo_evezesek_final.csv')
print('Correct circles length:', correct.shape[0], 'Number of measurements:', correct['label'].nunique(), 'Avg measurement length:', correct.shape[0] / correct['label'].nunique())
print('Longest measuremet:', correct['label'].value_counts().max(), 'Shortest:', correct['label'].value_counts().min())
correct.head()

Correct circles length: 4144 Number of measurements: 120 Avg measurement length: 34.53333333333333
Longest measuremet: 98 Shortest: 16


Unnamed: 0,linear_accelerationX,linear_accelerationY,linear_accelerationZ,magX,magY,magZ,gyroX,gyroY,gyroZ,accX,accY,accZ,label,time,relative_time
0,-0.52288,-0.001667,-0.306069,17.895508,25.500488,-103.112793,16.601562,36.92627,4.394531,0.376099,0.109009,-0.39978,0.0,0.0,0.0
1,-0.369454,0.063189,-0.504979,19.702148,25.305176,-101.916504,38.208008,63.903809,13.671875,0.523315,0.174927,-0.607178,0.0,0.045751,0.045751
2,-0.007031,0.070908,-0.46902,21.191406,26.367188,-100.915527,23.254395,95.397949,32.897949,0.88562,0.183838,-0.579102,0.0,0.089975,0.089975
3,0.270887,0.164697,-0.427053,23.88916,24.963379,-98.266602,-3.356934,119.873047,51.452637,1.168091,0.280396,-0.544312,0.0,0.134983,0.134983
4,0.545052,0.10435,-0.377135,26.855469,22.937012,-95.166016,-28.686523,132.446289,64.025879,1.451416,0.221802,-0.500732,0.0,0.18038,0.18038


In [4]:
incorrect = load_and_process_measurements(r'rossz_evezesek_final.csv')
print('Incorrect circles length:', incorrect.shape[0], 'Number of measurements:', incorrect['label'].nunique(), 'Avg measurement length:', incorrect.shape[0] / incorrect['label'].nunique())
print('Longest measuremet:', incorrect['label'].value_counts().max(), 'Shortest:', incorrect['label'].value_counts().min())
incorrect.head()

Incorrect circles length: 5882 Number of measurements: 222 Avg measurement length: 26.495495495495497
Longest measuremet: 196 Shortest: 4


Unnamed: 0,linear_accelerationX,linear_accelerationY,linear_accelerationZ,magX,magY,magZ,gyroX,gyroY,gyroZ,accX,accY,accZ,label,time,relative_time
0,0.680171,-0.002148,-0.267128,33.215332,24.121094,-89.294434,-57.006836,27.160645,30.151367,1.353516,-0.222534,-0.182861,0.0,0.0,0.0
1,0.561841,0.064877,0.0876,33.349609,20.568848,-86.730957,-61.706543,50.354004,38.391113,1.244629,-0.154419,0.17334,0.0,0.090254,0.090254
2,0.512697,-0.047244,0.173071,33.947754,17.1875,-85.36377,-43.395996,69.519043,40.95459,1.204102,-0.267334,0.261719,0.0,0.091939,0.091939
3,0.63328,-0.077197,0.345727,33.642578,15.600586,-82.763672,4.516602,74.645996,38.330078,1.335327,-0.298584,0.440186,0.0,0.223984,0.223984
4,0.497117,-0.020577,0.339652,33.508301,13.647461,-80.480957,30.456543,78.125,38.330078,1.20752,-0.24231,0.439819,0.0,0.226223,0.226223


In [9]:
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

def prepare_sequence_data(df: pd.DataFrame, label_value: int, max_len: int = None):
    feature_cols = df.columns.difference(['label', 'time'])
    sequences = []
    labels = []
    
    grouped = df.groupby('label')

    for _, group in grouped:
        seq = group[feature_cols].values
        sequences.append(seq)
        labels.append(label_value)

    if max_len is None:
        max_len = max(len(seq) for seq in sequences)

    sequences_padded = pad_sequences(sequences, maxlen=max_len, padding='post', dtype='float32')
    labels = np.array(labels)

    return sequences_padded, labels


In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, GlobalMaxPooling1D, Dense, Dropout, Masking

X_correct, y_correct = prepare_sequence_data(correct, label_value=1)
X_incorrect, y_incorrect = prepare_sequence_data(incorrect, label_value=0, max_len=X_correct.shape[1])

X = np.concatenate([X_correct, X_incorrect], axis=0)
y = np.concatenate([y_correct, y_incorrect], axis=0)

import numpy as np

test_size = 0.2

indices = np.arange(len(X))
np.random.shuffle(indices)

split_index = int(len(X) * (1 - test_size))

X_train, X_test = X[indices[:split_index]], X[indices[split_index:]]
y_train, y_test = y[indices[:split_index]], y[indices[split_index:]]


model = Sequential([
    Masking(mask_value=0., input_shape=(X.shape[1], X.shape[2])),
    Conv1D(64, kernel_size=5, activation='relu'),
    GlobalMaxPooling1D(),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 masking_1 (Masking)         (None, 98, 13)            0         
                                                                 
 conv1d_1 (Conv1D)           (None, 94, 64)            4224      
                                                                 
 global_max_pooling1d_1 (Gl  (None, 64)                0         
 obalMaxPooling1D)                                               
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 32)                2080      
                                                                 
 dense_3 (Dense)             (None, 1)                 33        
                                                      

In [11]:
from tensorflow.keras.callbacks import ModelCheckpoint

ch = ModelCheckpoint(
    'stroke_modelp3_9_21.h5',
    monitor='val_accuracy',
    verbose=0,
    save_best_only=True,
    mode='auto'
)

In [12]:
model.fit(X_train, y_train, epochs=20, batch_size=8, validation_split=0.2, callbacks=[ch])

loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.2f}")

Epoch 1/20


Epoch 2/20


  saving_api.save_model(


Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test Accuracy: 0.96


In [13]:
from tensorflow.keras.models import load_model

best_model = load_model('stroke_modelp3_9_21.h5')

loss, acc = best_model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.2f}")

Test Accuracy: 0.93
