# Contains magnetometer data and combined data

In [3]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [None]:
data_frames= []

# USING Magnetometer data (mx,my,mz)

In [None]:
from sklearn.utils import shuffle

csv_files = ['FlyMX.csv', 'BackMX.csv', 'BreastMX.csv', 'FreeMX.csv']


# Load each file
for file in csv_files:
    df = pd.read_csv(file)
    data_frames.append(df)

# Concatenate all the data frames
combined_df = pd.concat(data_frames, ignore_index=True)

# applying scaler to normalize
scaler = StandardScaler()
features = ['x', 'y', 'z']

combined_df[features] = scaler.fit_transform(combined_df[features])


combined_df = shuffle(combined_df, random_state=42)



#double check
print(combined_df.head(-1))

      Time (seconds)         x         y         z  stroke
6179          301.34 -0.952398 -1.436431  0.668102       4
1832          162.16 -1.006306  0.500644 -1.520171       2
6005          349.77 -0.467226 -0.437389  0.517492       3
4048          265.07 -1.114122 -0.078953  0.477625       3
3127          218.24  1.078137  1.461555 -0.713965       2
...              ...       ...       ...       ...     ...
5191          314.46  0.754689 -0.254358  0.451046       3
5226          316.06  0.889459 -0.338247  0.451046       3
5390          323.07  1.096106 -0.261984  0.176405       3
860           192.80 -0.835597  0.432007  0.676961       1
7603          362.99  1.221891 -2.557494 -0.098237       4

[7697 rows x 5 columns]


**Preprocessing**

In [None]:
# Encoding labels

from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Encoding labels
encoder = LabelEncoder()
combined_df['stroke'] = encoder.fit_transform(combined_df['stroke'])
labels = to_categorical(combined_df['stroke'].values)



In [None]:
import numpy as np

sequence_length = 50  # assuming a cycle of a stroke is 2 seconds

def create_sequences(data, labels, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length + 1):
        X.append(data[i:(i + sequence_length)])
        y.append(labels[i + sequence_length - 1])
    return np.array(X), np.array(y)

data = combined_df[features].values
X, y = create_sequences(data, labels, sequence_length)


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


**Running LSTM**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

model_lstm = Sequential()
model_lstm.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model_lstm.add(LSTM(units=50))
model_lstm.add(Dense(units=y_train.shape[1], activation='softmax'))

model_lstm.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_lstm.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)

test_loss, test_accuracy = model_lstm.evaluate(X_test, y_test)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 0.46743929386138916
Test Accuracy: 0.801307201385498


In [None]:
from sklearn.metrics import classification_report
import numpy as np

# Generate predictions for the test set
y_pred = model_lstm.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

# Calculate precision, recall, and F1-score
report = classification_report(y_test_classes, y_pred_classes)
print(report)


              precision    recall  f1-score   support

           0       0.68      0.51      0.58       340
           1       0.96      0.88      0.92       430
           2       0.68      0.84      0.75       466
           3       0.95      0.95      0.95       294

    accuracy                           0.80      1530
   macro avg       0.81      0.80      0.80      1530
weighted avg       0.81      0.80      0.80      1530



Increasing the number of units and more layers adding in dropout


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional

model_lstm = Sequential()

# Input layer with Bidirectional LSTM
model_lstm.add(Bidirectional(LSTM(units=100, return_sequences=True), input_shape=(X_train.shape[1], X_train.shape[2])))

# Additional Bidirectional LSTM layer with return sequences set to True for stacking
model_lstm.add(Bidirectional(LSTM(units=100, return_sequences=True)))
model_lstm.add(Dropout(0.2))  # Dropout for regularization

# Another Bidirectional LSTM Layer, still returning sequences to allow stacking
model_lstm.add(Bidirectional(LSTM(units=100, return_sequences=True)))
model_lstm.add(Dropout(0.2))  # Additional Dropout layer

# Final Bidirectional LSTM Layer, no return_sequences as we're moving to the Dense layer
model_lstm.add(Bidirectional(LSTM(units=100)))
model_lstm.add(Dropout(0.2))  # Final Dropout layer before the Dense layer

# Output layer
model_lstm.add(Dense(units=y_train.shape[1], activation='softmax'))

model_lstm.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model_lstm.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)

# Evaluate the model on the test set
test_loss, test_accuracy = model_lstm.evaluate(X_test, y_test)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 0.4366164207458496
Test Accuracy: 0.8333333134651184


In [None]:
from sklearn.metrics import classification_report
import numpy as np

# Generate predictions for the test set
y_pred = model_lstm.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

# Calculate precision, recall, and F1-score
report = classification_report(y_test_classes, y_pred_classes)
print(report)


              precision    recall  f1-score   support

           0       0.76      0.61      0.68       340
           1       0.95      0.89      0.92       430
           2       0.73      0.87      0.79       466
           3       0.95      0.95      0.95       294

    accuracy                           0.83      1530
   macro avg       0.85      0.83      0.83      1530
weighted avg       0.84      0.83      0.83      1530



DTW- 3 features (mx,my,mz)

In [7]:
!pip3 install tslearn

Collecting tslearn
  Downloading tslearn-0.6.3-py3-none-any.whl (374 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/374.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.6/374.4 kB[0m [31m3.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m374.4/374.4 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tslearn
Successfully installed tslearn-0.6.3


In [8]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tslearn.utils import to_time_series_dataset
from tslearn.neighbors import KNeighborsTimeSeriesClassifier
from sklearn.metrics import accuracy_score

In [9]:
import pandas as pd
csv_files = ['FlyMX.csv', 'BackMX.csv', 'BreastMX.csv', 'FreeMX.csv']

data_frames= []
# Load each file
for file in csv_files:
    df = pd.read_csv(file)
    data_frames.append(df)

# Concatenate all the data frames
combined_df = pd.concat(data_frames, ignore_index=True)

# applying scaler to normalize
scaler = StandardScaler()
features = ['x', 'y', 'z']

combined_df[features] = scaler.fit_transform(combined_df[features])

In [10]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Encoding labels
encoder = LabelEncoder()
combined_df['stroke'] = encoder.fit_transform(combined_df['stroke'])

# Formatting for DTW
time_series = to_time_series_dataset(combined_df[['x', 'y', 'z']].values.reshape(-1, 1, 3))
labels = combined_df['stroke'].values

In [11]:
X_train, X_test, y_train, y_test = train_test_split(time_series, labels, test_size=0.2, random_state=42)


In [12]:
# Train a k-NN classifier with DTW
model_dtw = KNeighborsTimeSeriesClassifier(n_neighbors=1, metric="dtw")
model_dtw.fit(X_train, y_train)

# Predict and evaluate accuracy
y_pred = model_dtw.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"DTW Classification Accuracy: {accuracy}")


DTW Classification Accuracy: 0.9266233766233766


# Using both accelerometer data and Magnetometer data

In [None]:
import pandas as pd

# Load your dataset
df = pd.read_csv('combined.csv')

df.head

Unnamed: 0,Time (seconds),x,y,z,mx,my,mz,stroke
0,155.7,-1264,-16,-200,55.05,-69.0,-95.7,1
1,155.74,-1244,4,-172,55.8,-67.95,-94.8,1
2,155.78,-1240,-76,-200,54.0,-67.5,-93.15,1
3,155.81,-1304,-36,-160,54.6,-67.5,-91.65,1
4,155.88,-1176,148,-292,51.75,-67.95,-89.1,1


In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
features = ['x', 'y', 'z', 'mx', 'my', 'mz']  # Features to scale
df[features] = scaler.fit_transform(df[features])


In [None]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
df['stroke'] = encoder.fit_transform(df['stroke'])


In [None]:
import numpy as np
from tensorflow.keras.utils import to_categorical


#trying new sequence length because stroke cycle is 2 strokes for long axis strokes and 1 for short axis strokes
sequence_length = 2
def create_sequences(data, labels, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length + 1):
        X.append(data[i:(i + sequence_length)])
        y.append(labels[i + sequence_length - 1])
    return np.array(X), np.array(y)

# Features and labels for sequence creation
features = df[['x', 'y', 'z', 'mx', 'my', 'mz']].values



In [None]:
from sklearn.model_selection import train_test_split

from tensorflow.keras.utils import to_categorical

# One-hot encode labels
labels = to_categorical(labels)

# Recreate sequences with one-hot encoded labels
X, y = create_sequences(features, labels, sequence_length)

# Split the data again
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


LSTM- 6 features

In [None]:
model_lstm = Sequential()
model_lstm.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model_lstm.add(LSTM(units=50))
model_lstm.add(Dense(units=y_train.shape[1], activation='softmax'))  # This line should now work correctly

model_lstm.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_lstm.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)

# Evaluate the model
test_loss, test_accuracy = model_lstm.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 0.17325520515441895
Test Accuracy: 0.9441558718681335


**DTW- 6 featuers**


In [None]:
import pandas as pd

# Load your dataset
df = pd.read_csv('combined.csv')

df.head()

Unnamed: 0,Time (seconds),x,y,z,mx,my,mz,stroke
0,155.7,-1264,-16,-200,55.05,-69.0,-95.7,1
1,155.74,-1244,4,-172,55.8,-67.95,-94.8,1
2,155.78,-1240,-76,-200,54.0,-67.5,-93.15,1
3,155.81,-1304,-36,-160,54.6,-67.5,-91.65,1
4,155.88,-1176,148,-292,51.75,-67.95,-89.1,1


In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
features = ['x', 'y', 'z', 'mx', 'my', 'mz']  # Features to scale
df[features] = scaler.fit_transform(df[features])


In [None]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Encoding labels
encoder = LabelEncoder()
df['stroke'] = encoder.fit_transform(df['stroke'])

# Formatting for DTW
time_series = to_time_series_dataset(df[['x', 'y', 'z', 'mx', 'my', 'mz']].values.reshape(-1, 1, 6))
labels = df['stroke'].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(time_series, labels, test_size=0.2, random_state=42)


In [None]:
!pip3 install tslearn

Collecting tslearn
  Downloading tslearn-0.6.3-py3-none-any.whl (374 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m374.4/374.4 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tslearn
Successfully installed tslearn-0.6.3


In [None]:
from tslearn.neighbors import KNeighborsTimeSeriesClassifier
from sklearn.metrics import accuracy_score

# Convert X_train and X_test into the format for tslearn
from tslearn.utils import to_time_series_dataset

X_train_ts = to_time_series_dataset(X_train)
X_test_ts = to_time_series_dataset(X_test)

# Initialize the DTW k-nearest neighbors classifier
model_dtw = KNeighborsTimeSeriesClassifier(n_neighbors=1, metric="dtw")

# Fit the model
model_dtw.fit(X_train_ts, y_train)

# Predict on the test set
y_pred = model_dtw.predict(X_test_ts)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"DTW Classification Accuracy: {accuracy}")


DTW Classification Accuracy: 0.9545454545454546
