# Hidden Markov Model

The notebook to run the HMM on the 30 word dataset.

In [6]:
# Data loading
import scipy.signal
import numpy as np
import pandas as pd

import feature_extractors

# Define configuration
INITIALIZATION_WINDOW = [0.5, 4.5] # seconds
EMG_SAMPLE_RATE = 250

# Window Configuration
WINDOW_SIZE = 400 # ms
WINDOW_STRIDE = 100 # ms
WINDOW_SAMPLE_SIZE = int(EMG_SAMPLE_RATE * (WINDOW_SIZE / 1000)) # should be an integer result
WINDOW_SAMPLE_STRIDE = int(EMG_SAMPLE_RATE * (WINDOW_STRIDE / 1000)) # should be an integer result

print(f"SAMPLES PER WINDOW: {WINDOW_SAMPLE_SIZE}")
print(f"SAMPLES PER STRIDE: {WINDOW_SAMPLE_STRIDE}")

# Define filters
sos_highpass = scipy.signal.butter(4, 0.5, 'highpass', fs=EMG_SAMPLE_RATE, output='sos')
sos_notch_50hz = scipy.signal.butter(4, [48,52], 'bandstop', fs=EMG_SAMPLE_RATE, output='sos')

base_path = "../../datasets/electrode-brace/50x3"
df = pd.read_csv(f"{base_path}/metadata.csv")

X = None
y = None

for index, row in df.iterrows():

    # Load in npy from dataset
    emg = np.load(f"{base_path}/{row['id']}.npy")

    # Do basic analog filtering
    emg = emg - np.mean(emg, axis=0) # Remove DC
    emg = scipy.signal.sosfiltfilt(sos_highpass, emg, axis=0, padtype='even')
    emg = scipy.signal.sosfiltfilt(sos_notch_50hz, emg, axis=0, padtype='even')

    # Remove intialization to avoid initialization noise
    emg = emg[int(INITIALIZATION_WINDOW[0]*EMG_SAMPLE_RATE):int(INITIALIZATION_WINDOW[1]*EMG_SAMPLE_RATE),:]
    features = feature_extractors.F2(emg, window_size=WINDOW_SAMPLE_SIZE, window_stride=WINDOW_SAMPLE_STRIDE, sr=EMG_SAMPLE_RATE)

    if X is None:
        X = np.expand_dims(features, axis=0)
    else:
        X = np.concatenate((X, np.expand_dims(features, axis=0)), axis=0)

    if y is None:
        y = np.array([row['cls']])
    else:
        y = np.append(y, row['cls'])

print("Generated Feature Matrix:")
print(X.shape)

print("Generated Label Matrix:")
print(y.shape)

SAMPLES PER WINDOW: 100
SAMPLES PER STRIDE: 25


Generated Feature Matrix:
(150, 37, 64)
Generated Label Matrix:
(150,)


In [7]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Seed for reproducible pseudo-randomness
random_state = np.random.RandomState(1)

split = 0

if split == 0:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
elif split == 1:
    X_train = X
    X_test = X
    y_train = y
    y_test = y
elif split == 2:
    X_train = X[:443]
    X_test = X[443:]
    y_train = y[:443]
    y_test = y[443:]

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

le = LabelEncoder()
le.fit(y)

y_labels = le.transform(y)
y_train = le.transform(y_train)
y_test = le.transform(y_test)

print(le.classes_)

(120, 37, 64)
(30, 37, 64)
(120,)
(30,)
['air' 'bat' 'cap']


In [8]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from joblib import dump, load

N_COMPONENTS = 2

# Train an LDA
X_train_LDA = np.reshape(X_train, (-1, X_train.shape[2])) # Flatten frames
X_test_LDA = np.reshape(X_test, (-1, X_test.shape[2]))

print(X_train_LDA.shape)

y_train_LDA = np.repeat(y_train, X.shape[1])

lda = LinearDiscriminantAnalysis(n_components=N_COMPONENTS)
X_train_LDA = lda.fit(X_train_LDA, y_train_LDA).transform(X_train_LDA)

X_test_LDA = lda.transform(X_test_LDA)

dump(lda, '../demo/lda.joblib')
# dump(lda, 'lda.joblib')

#scaler = preprocessing.StandardScaler()
#X_norm = scaler.fit(X_LDA).transform(X_LDA)

X_train = np.reshape(X_train_LDA, (X_train.shape[0], X_train.shape[1], -1)) # Unflatten frames
X_test = np.reshape(X_test_LDA, (X_test.shape[0], X_test.shape[1], -1)) # Unflatten frames

print(X_train.shape)
print(X_test.shape)



(4440, 64)
(120, 37, 2)
(30, 37, 2)


In [9]:
from hmmlearn import hmm
import numpy as np

def make_ltr_transition(n):
    transmat = np.zeros((n, n))

    for i in range(n):
        if i == (n - 1):
            transmat[i,i] = 1.0
        else:
            transmat[i,i] = 0.5
            transmat[i, i+1] = 0.5
    return transmat

def get_model(X, lengths, states=3, n_iter=5):
    best_model = None
    best_ll = None

    for i in range(n_iter):
        model = hmm.GaussianHMM(n_components=states, covariance_type="diag", n_iter=100, init_params='c', params='cmt')
    
        startprob = np.zeros(states)
        startprob[0] = 1.0
        model.startprob_ = startprob
    
        model.transmat_ = make_ltr_transition(states)
        try:
            model.fit(X, lengths)
            score = model.score(X, lengths)
        except:
            continue
        print(score)
        if not best_ll or best_ll < score:
            best_ll = score
            best_model = model

    return best_model

models = []

for i in range(len(le.classes_)):
    mask = (y_train == i)
    X_mask = X_train[mask]
    
    lengths = [len(x) for x in X_mask]
    X_mask_flat = np.reshape(X_mask, (-1, X_mask.shape[2]))

    print(i)
    print(X_mask.shape)
    model = get_model(X_mask_flat, lengths)
    models.append(model)

# dump(models, 'hmm_models.joblib')
dump(models, '../demo/hmm_models.joblib')

0
(40, 37, 2)
-3661.837089962355
-3659.5766433424624
-3800.1192251029224
-3811.6694301297243
-3659.5774812346563
1
(40, 37, 2)
-3928.0664515429567
-3928.065415297674
-3688.9335129353053
-3957.8534681331344
-3957.8536662114907
2
(40, 37, 2)
-3278.581542864605
-3419.853954164098
-3295.140916077182
-3419.8539129009337
-3295.141045167137


['../demo/hmm_models.joblib']

In [10]:
correct = 0

X_acc = X_test
y_acc = y_test

for index, x in enumerate(X_acc):
    best_ll = None
    best_class = None
    
    for i, model in enumerate(models):
        score = model.score(x)
        if not best_ll or best_ll < score:
            best_ll = score
            best_class = i

    print(f"{best_class} == {y_acc[index]}")
    if y_acc[index] == best_class:
        correct += 1

print(correct/len(X_acc))

2 == 0
2 == 2
2 == 1
2 == 1
0 == 0
1 == 1
2 == 0
0 == 0
2 == 2
1 == 1
1 == 2
2 == 2
2 == 2
0 == 1
0 == 0
2 == 0
0 == 0
1 == 1
1 == 1
2 == 2
0 == 0
2 == 2
1 == 1
0 == 2
0 == 2
1 == 1
1 == 1
0 == 0
1 == 2
1 == 0
0.6333333333333333
