In [8]:
import pandas as pd

def load_and_process_measurements(csv_path: str) -> pd.DataFrame:
    df = pd.read_csv(csv_path)

    df['label'] = -1
    current_label = -1
    in_measurement = False

    for idx, row in df.iterrows():
        if (row == 'X').any():
            current_label += 1
            in_measurement = True
            df.at[idx, 'label'] = current_label
        elif (row == 'O').any():
            df.at[idx, 'label'] = current_label
            in_measurement = False
        elif in_measurement:
            df.at[idx, 'label'] = current_label
        else:
            df.at[idx, 'label'] = -1

    df = df[~df.apply(
        lambda row: row.astype(str).eq('X').any() or row.astype(str).eq('O').any(), axis=1)]

    df = df[df['label'] != -1]
    df.reset_index(inplace=True, drop=True)

    df = df.astype(float)

    df['time'] = df['Timestamp'] - df['Timestamp'].iloc[0]

    df['relative_time'] = df.groupby('label')['Timestamp'].transform(lambda x: x - x.iloc[0])

    df = df.drop(columns=['Timestamp'])

    return df


In [9]:
correct = load_and_process_measurements(r'correct_circles.csv')
print('Correct circles length:', correct.shape[0], 'Number of measurements:', correct['label'].nunique(), 'Avg measurement length:', correct.shape[0] / correct['label'].nunique())
print('Longest measuremet:', correct['label'].value_counts().max(), 'Shortest:', correct['label'].value_counts().min())
correct.head()

Correct circles length: 7108 Number of measurements: 183 Avg measurement length: 38.84153005464481
Longest measuremet: 65 Shortest: 23


Unnamed: 0,linear_accelerationX,linear_accelerationY,linear_accelerationZ,magX,magY,magZ,gyroX,gyroY,gyroZ,accX,accY,accZ,label,time,relative_time
0,-0.045276,-0.086649,0.04508,3.186035,32.385254,-98.010254,-2.563477,4.272461,-0.183105,-0.014771,-0.000977,0.972534,0.0,0.0,0.0
1,-0.067938,-0.105986,0.042895,3.540039,32.666016,-95.935059,-1.525879,5.065918,-3.112793,-0.038574,-0.022095,0.971069,0.0,0.001999,0.001999
2,-0.044365,-0.079023,0.043506,3.540039,32.702637,-96.582031,-0.488281,4.882812,-1.525879,-0.015747,0.00354,0.972412,0.0,0.059856,0.059856
3,-0.045673,-0.076636,0.04867,3.308105,32.617188,-96.887207,0.854492,5.065918,-0.244141,-0.017822,0.004639,0.978394,0.0,0.120137,0.120137
4,-0.039875,-0.07669,0.041502,3.430176,31.872559,-96.606445,1.464844,5.737305,-3.967285,-0.012695,0.003296,0.971924,0.0,0.239192,0.239192


In [10]:
incorrect = load_and_process_measurements(r'incorrect_circles.csv')
print('Incorrect circles length:', incorrect.shape[0], 'Number of measurements:', incorrect['label'].nunique(), 'Avg measurement length:', incorrect.shape[0] / incorrect['label'].nunique())
print('Longest measuremet:', incorrect['label'].value_counts().max(), 'Shortest:', incorrect['label'].value_counts().min())
incorrect.head()

Incorrect circles length: 1036 Number of measurements: 27 Avg measurement length: 38.370370370370374
Longest measuremet: 49 Shortest: 28


Unnamed: 0,linear_accelerationX,linear_accelerationY,linear_accelerationZ,magX,magY,magZ,gyroX,gyroY,gyroZ,accX,accY,accZ,label,time,relative_time
0,0.10721,0.308428,0.074314,22.900391,-21.643066,-77.124023,-1.77002,7.385254,-3.051758,-0.069824,0.828369,0.593018,0.0,0.0,0.0
1,0.127767,0.283521,0.009217,23.742676,-21.862793,-75.769043,3.601074,1.708984,-3.173828,-0.047119,0.808228,0.528076,0.0,0.002003,0.002003
2,0.049542,0.193477,0.018429,23.059082,-20.874023,-76.477051,2.01416,-6.469727,2.990723,-0.124512,0.721436,0.537598,0.0,0.003003,0.003003
3,0.136362,0.284401,0.03145,23.144531,-21.557617,-75.585938,8.850098,-1.098633,1.525879,-0.0354,0.817139,0.551147,0.0,0.06088,0.06088
4,0.120422,0.244764,0.148342,22.680664,-21.618652,-75.476074,7.507324,0.488281,-2.807617,-0.049316,0.781616,0.670532,0.0,0.120578,0.120578


In [11]:
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

def prepare_sequence_data(df: pd.DataFrame, label_value: int, max_len: int = None):
    feature_cols = df.columns.difference(['label', 'time'])
    sequences = []
    labels = []
    
    grouped = df.groupby('label')

    for _, group in grouped:
        seq = group[feature_cols].values
        sequences.append(seq)
        labels.append(label_value)

    if max_len is None:
        max_len = max(len(seq) for seq in sequences)

    sequences_padded = pad_sequences(sequences, maxlen=max_len, padding='post', dtype='float32')
    labels = np.array(labels)

    return sequences_padded, labels


In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, GlobalMaxPooling1D, Dense, Dropout, Masking

X_correct, y_correct = prepare_sequence_data(correct, label_value=1)
X_incorrect, y_incorrect = prepare_sequence_data(incorrect, label_value=0, max_len=X_correct.shape[1])

X = np.concatenate([X_correct, X_incorrect], axis=0)
y = np.concatenate([y_correct, y_incorrect], axis=0)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential([
    Masking(mask_value=0., input_shape=(X.shape[1], X.shape[2])),
    Conv1D(64, kernel_size=5, activation='relu'),
    GlobalMaxPooling1D(),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 masking_1 (Masking)         (None, 65, 13)            0         
                                                                 
 conv1d_1 (Conv1D)           (None, 61, 64)            4224      
                                                                 
 global_max_pooling1d_1 (Gl  (None, 64)                0         
 obalMaxPooling1D)                                               
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 32)                2080      
                                                                 
 dense_3 (Dense)             (None, 1)                 33        
                                                      

In [13]:
model.fit(X_train, y_train, epochs=20, batch_size=8, validation_split=0.2)

loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.2f}")

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test Accuracy: 0.93


In [14]:
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.2f}")

Test Accuracy: 0.93
