In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score
from scipy.signal import butter, filtfilt, find_peaks
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
# from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.metrics import accuracy_score, precision_score, mean_absolute_error, mean_squared_error
from tensorflow.keras.preprocessing.sequence import pad_sequences


# Define butter_filter function
def butter_filter(data, cutoff, fs, order=4):
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    y = filtfilt(b, a, data)
    return y


# Load CSV files into DataFrames
data1 = pd.read_csv('Subject 1 (Left right up down).csv', header=0, names=['Time', 'Current'], na_values=[])
data2 = pd.read_csv('Subject 2 (Left right up down).csv', header=0, names=['Time', 'Current'], na_values=[])
data3 = pd.read_csv('Subject 3 (Left right up down).csv', header=0, names=['Time', 'Current'], na_values=[])

# Drop rows with missing values
data1 = data1.dropna()
data2 = data2.dropna()
data3 = data3.dropna()

# Set cutoff frequency and sampling frequency
cutoff_freq = 1 # Hz
fs = 1 / np.mean(np.diff(data1['Time']))

# Filter signal, take second derivative, and detect peaks for data1
filtered_data1 = butter_filter(data1['Current'], cutoff_freq, fs, order=4)
second_derivative1 = np.gradient(np.gradient(filtered_data1))
baseline = np.std(second_derivative1)
best_window1 = None
best_threshold1 = None
best_peaks1 = None
best_score1 = 0
window_sizes = [0.001, 0.005, 0.01, 0.05]
threshold_levels = [1, 2, 3, 4, 5]
for window in window_sizes:
    for threshold in threshold_levels:
        min_dist = int(window * fs)
        peaks, properties = find_peaks(second_derivative1, prominence=threshold * baseline, distance=min_dist)
        score = len(peaks)
        if score > best_score1:
            best_window1 = window
            best_threshold1 = threshold
            best_peaks1 = peaks
            best_score1 = score
            
# Separate peaks into left, right, up, and down for data1
num_peaks_per_movement = len(best_peaks1) // 4
left_peaks1 = best_peaks1[:num_peaks_per_movement]
right_peaks1 = best_peaks1[num_peaks_per_movement:num_peaks_per_movement*2]
up_peaks1 = best_peaks1[num_peaks_per_movement*2:num_peaks_per_movement*3]
down_peaks1 = best_peaks1[num_peaks_per_movement*3:]

# Filter signal, take second derivative, and detect peaks for data2
filtered_data2 = butter_filter(data2['Current'], cutoff_freq, fs, order=4)
second_derivative2 = np.gradient(np.gradient(filtered_data2))
best_window2 = None
best_threshold2 = None
best_peaks2 = None
best_score2 = 0
for window in window_sizes:
     for threshold in threshold_levels:
        min_dist = int(window * fs)
        peaks, properties = find_peaks(second_derivative2, prominence=threshold * baseline, distance=min_dist)
        score = len(peaks)
        if score > best_score2:
            best_window2 = window
            best_threshold2 = threshold
            best_peaks2 = peaks
            best_score2 = score

# Separate peaks into left, right, up, and down for data2
num_peaks_per_movement2 = len(best_peaks2) // 4
left_peaks2 = best_peaks2[:num_peaks_per_movement2]
right_peaks2 = best_peaks2[num_peaks_per_movement2:num_peaks_per_movement2*2]
up_peaks2 = best_peaks2[num_peaks_per_movement2*2:num_peaks_per_movement2*3]
down_peaks2 = best_peaks2[num_peaks_per_movement2*3:]

# Filter signal, take second derivative, and detect peaks for data3
filtered_data3 = butter_filter(data3['Current'], cutoff_freq, fs, order=4)
second_derivative3 = np.gradient(np.gradient(filtered_data3))
best_window3 = None
best_threshold3 = None
best_peaks3 = None
best_score3 = 0
for window in window_sizes:
    for threshold in threshold_levels:
        min_dist = int(window * fs)
        peaks, properties = find_peaks(second_derivative3, prominence=threshold * baseline, distance=min_dist)
        score = len(peaks)
        if score > best_score3:
            best_window3 = window
            best_threshold3 = threshold
            best_peaks3 = peaks
            best_score3 = score

# Separate peaks into left, right, up, and down for data3          
num_peaks_per_movement3 = len(best_peaks3) // 4
left_peaks3 = best_peaks3[:num_peaks_per_movement3]
right_peaks3 = best_peaks3[num_peaks_per_movement3:num_peaks_per_movement3*2]
up_peaks3 = best_peaks3[num_peaks_per_movement3*2:num_peaks_per_movement3*3]
down_peaks3 = best_peaks3[num_peaks_per_movement3*3:]




In [1]:

# Extract features for data1
left_time_diffs1 = np.diff(data1['Time'][left_peaks1])
right_time_diffs1 = np.diff(data1['Time'][right_peaks1])
up_time_diffs1 = np.diff(data1['Time'][up_peaks1])
down_time_diffs1 = np.diff(data1['Time'][down_peaks1])
data1_features = np.column_stack((left_time_diffs1, right_time_diffs1, up_time_diffs1, down_time_diffs1))
data1_labels = np.repeat(['left', 'right', 'up', 'down'], num_peaks_per_movement)

# Extract features for data2
left_time_diffs2 = np.diff(data2['Time'][left_peaks2])
right_time_diffs2 = np.diff(data2['Time'][right_peaks2])
up_time_diffs2 = np.diff(data2['Time'][up_peaks2])
down_time_diffs2 = np.diff(data2['Time'][down_peaks2])
data2_features = np.column_stack((left_time_diffs2, right_time_diffs2, up_time_diffs2, down_time_diffs2))
data2_labels = np.repeat(['left', 'right', 'up', 'down'], num_peaks_per_movement2)

# Extract features for data3
left_time_diffs3 = np.diff(data3['Time'][left_peaks3])
right_time_diffs3 = np.diff(data3['Time'][right_peaks3])
up_time_diffs3 = np.diff(data3['Time'][up_peaks3])
down_time_diffs3 = np.diff(data3['Time'][down_peaks3])
data3_features = np.column_stack((left_time_diffs3, right_time_diffs3, up_time_diffs3, down_time_diffs3))
data3_labels = np.repeat(['left', 'right', 'up', 'down'], num_peaks_per_movement3)

# Concatenate features and labels from all data sets
features = np.concatenate((data1_features, data2_features, data3_features))
labels = np.concatenate((data1_labels, data2_labels, data3_labels))

# Pad or truncate features to make them all the same length
max_length = 100  # define the maximum length
features = pad_sequences(features, maxlen=max_length, padding='post', truncating='post')

# Scale features to have zero mean and unit variance
scaler = StandardScaler()
features = scaler.fit_transform(features)

# Define decision tree classifier
clf = DecisionTreeClassifier(random_state=0)

# Perform 10-fold cross-validation
kfold = KFold(n_splits=10, shuffle=True, random_state=0)
accuracy_scores = []
for train_idx, test_idx in kfold.split(features):
    train_features, train_labels = features[train_idx], labels[train_idx]
    test_features, test_labels = features[test_idx], labels[test_idx]
    clf.fit(train_features, train_labels)
    predicted_labels = clf.predict(test_features)
    accuracy_scores.append(accuracy_score(test_labels, predicted_labels))

# Print mean and standard deviation of accuracy scores
print('Accuracy: {:.2f} ± {:.2f}'.format(np.mean(accuracy_scores), np.std(accuracy_scores)))


NameError: name 'np' is not defined