In [1]:
import io
import requests
import os
import numpy as np

from scipy import signal
from scipy.integrate import simps

from sklearn import svm, metrics
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier


# Function to load a numpy array from a caller-specified URL:
def load_data_from_url(url):
    response = requests.get(url)
    response.raise_for_status()
    data = np.load(io.BytesIO(response.content)) 
    return data


# Function to compute the Power spectral density (PSD) of  a set of input signals stored in
# matrix[sample_index, channel_index, time_index], and return the PSD at some specific frequencies, along with those frequencies:   

def get_freqs_and_psd(matrix):
    # Define sampling frequency and window length; compute number of samples per window and use Welch function to compute 
    # psd at specific frequencies:
    sf = 200
    win_len = 4
    n_per_seg = win_len * sf
    freqs, psd = signal.welch(matrix, sf, nperseg=n_per_seg) 
    return freqs, psd

# Function to compute band power for a specific frequency interval/band given by [low, high], using the frequncies and PSD
# returned by function get_freqs_and_psd above.
def get_band_power(freqs, psd, low, high):
    freq_res = freqs[1] - freqs[0]

    # Find indices corresponding to the boundaries of the band:
    idx_band = np.logical_and(freqs >= low, freqs <= high)

    # Integral approximation of the spectrum using Simpson's rule:
    band_power = simps(psd[idx_band], dx=freq_res)  
    return band_power
    
  
# Load training and test data from their respective URLs:

url_x_train = 'https://data.mendeley.com/public-files/datasets/5pc2j46cbc/files/169dca1c-4992-43d3-9c94-030de59c2524/file_downloaded'
url_y_train = 'https://data.mendeley.com/public-files/datasets/5pc2j46cbc/files/62accb90-a1b2-4b50-bde5-fbe6096f165f/file_downloaded'
   
url_x_test = 'https://data.mendeley.com/public-files/datasets/5pc2j46cbc/files/93b81166-0e48-4dc0-ac20-b7167f7606c5/file_downloaded'
url_y_test = 'https://data.mendeley.com/public-files/datasets/5pc2j46cbc/files/adf1c2fd-81ef-4f87-86cc-56d75bba8c31/file_downloaded'

x_train = load_data_from_url(url_x_train)
y_train = load_data_from_url(url_y_train)

x_test = load_data_from_url(url_x_test)
y_test = load_data_from_url(url_y_test)

https://data.mendeley.com/public-files/datasets/5pc2j46cbc/files/169dca1c-4992-43d3-9c94-030de59c2524/file_downloaded


NameError: name 'x_train' is not defined

In [None]:
# Compute PSD at a set of freqs (i.e frequencies) for the training data:

freqs, psd_train = get_freqs_and_psd(x_train)

# Define number of bands, n_bands, and divide freqs into n_bands. Then compute band power for each of the n_bands:  

n_bands = 11
band_seq = [x for x in np.linspace( np.min(freqs), np.max(freqs), n_bands + 1 )]
low, high = band_seq[0], band_seq[1]

n_samps, n_channs, f = psd_train.shape
band_power_train = np.zeros((n_samps, n_channs, n_bands))

for i in range(n_samps):
    for j in range(n_channs):
        for k in range(n_bands):
            band_power_train[i, j, k] = get_band_power(freqs, psd_train[i,j,:], band_seq[k], band_seq[k+1])
            

In [None]:
# Compute PSD and band powers for test data, similar to how they were computed for training data above:

freqs_test, psd_test = get_freqs_and_psd(x_test)
print(freqs.shape, psd_test.shape, freqs_test.shape)
band_seq = [x for x in np.linspace( np.min(freqs_test), np.max(freqs_test), n_bands + 1 )]
low, high = band_seq[0], band_seq[1]

n_samps, n_channs, f = psd_test.shape
band_power_test = np.zeros((n_samps, n_channs, n_bands))
for i in range(n_samps):
    for j in range(n_channs):
        for k in range(n_bands):
            band_power_test[i, j, k] = get_band_power(freqs_test, psd_test[i,j,:], band_seq[k], band_seq[k+1])
                    
print('n_bands: ', n_bands)

In [None]:
# Using SKlearn's implementation, train a random forest classifier on the training set band power features computed above. 
# Then use the trained classifier to make predictions for the training set band power features as well as the test set band
# power features:

# Reshape training set band power features' matrix to fit the format expected by Sklearn:
n_samps_train, n_channs, n_bands = band_power_train.shape
feats_train = band_power_train.reshape(n_samps_train, n_channs*n_bands)

# Reshape test set band power features' matrix to fit the format expected by Sklearn:
n_samps_test, n_channs, n_bands = band_power_test.shape
feats_test = band_power_test.reshape(n_samps_test, n_channs*n_bands)


# Initialize RandomForestClassifier:
clf = RandomForestClassifier()

#Train classifier using the training data and associated targets:
clf.fit(feats_train, y_train)

# Use trained classifier to make predictions for the training data:
predictions_train = clf.predict(feats_train)

# Use trained classifier to make predictions for the test data:
predictions_test = clf.predict(feats_test)

# Compute and output classification metrics for the training data:

classif_report_train = metrics.classification_report(y_train, predictions_train)
conf_matrix_train = metrics.confusion_matrix(y_train, predictions_train)
print('classif_report_train: ', classif_report_train)
print('conf_matrix_train: ', conf_matrix_train)
     
# Compute and output classification metrics for the test data:
classif_report_test = metrics.classification_report(y_test, predictions_test)
conf_matrix_test = metrics.confusion_matrix(y_test, predictions_test)
print('classif_report_test: ', classif_report_test)
print('conf_matrix_test: ', conf_matrix_test)
            