In [1]:
import os
import pandas as pd
import numpy as np
import Data_Savior_J
from Data_Savior_J import load_file
import scipy as sp
from scipy import signal
from ml_time_series import generate_envelope

In [2]:
files = ['data_154743','data_155334','data_155743','data_161034', 'data_161420', 'rob_data_170159']

DATAFILE = np.load('DB_HAND/'+files[5]+'.npy')
df = pd.DataFrame(DATAFILE, columns=['Sample','Label'])

df.head()

Unnamed: 0,Sample,Label
0,323.0,0.0
1,576.0,0.0
2,775.0,0.0
3,630.0,0.0
4,270.0,0.0


In [3]:
DATAFILE.shape

(30000, 2)

In [4]:
%matplotlib notebook
import matplotlib.pyplot as plt

fig = plt.figure()

df['Sample'].plot(linewidth=0.8)
plt.title('Normal')

plt.axvspan(.0, 9999, facecolor='#F08080', alpha=0.5)
plt.axvspan(10000, 19999, facecolor='#70659e', alpha=0.5)
plt.axvspan(20000, 29999, facecolor='#F08080', alpha=0.5)
plt.annotate('Open hand', xy=(3500, df['Sample'].max()*1.01))  
plt.annotate('Closed hand', xy=(13500, df['Sample'].max()*1.01))
plt.annotate('Hand tightly closed', xy=(23500, df['Sample'].max()*1.01))

#fig_name = 'fig_' + files[0] + '.png'
fig.set_size_inches(w=11,h=7)
#fig.savefig(fig_name)

<IPython.core.display.Javascript object>

---
# Filtering
---

### RMS Function

In [5]:
import numpy as np
def window_rms(a, window_size):
    a2 = np.power(a,2)
    window = np.ones(window_size)/float(window_size)
    return np.sqrt(np.convolve(a2, window, 'valid'))

In [6]:
rms = window_rms(df['Sample'],400)

df_rms = pd.DataFrame(rms)

df_rms.plot(linewidth=0.8)
plt.title('RMS')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x7f99db05dc90>

### Filter Function

In [7]:
def filteremg(time, emg, low_pass=10., sfreq=2000., high_band=20., low_band=450.):
    """
    time: Time data
    emg: EMG data
    high: high-pass cut off frequency
    low: low-pass cut off frequency
    sfreq: sampling frequency
    """
    
    # normalise cut-off frequencies to sampling frequency
    high_band = high_band/(sfreq/2)
    low_band = low_band/(sfreq/2)
    
    # create bandpass filter for EMG
    b1, a1 = sp.signal.butter(4, [high_band,low_band], btype='bandpass')
    
    # process EMG signal: filter EMG
    emg_filtered = sp.signal.filtfilt(b1, a1, emg)    
    
    # process EMG signal: rectify
    emg_rectified = abs(emg_filtered)
    
    # create lowpass filter and apply to rectified signal to get EMG envelope
    low_pass = low_pass/sfreq
    b2, a2 = sp.signal.butter(4, low_pass, btype='lowpass')
    emg_envelope = sp.signal.filtfilt(b2, a2, emg_rectified)
    
    return emg_envelope

def plot_fremg(df):   
    # plot graphs
    fig = plt.figure()
    

    #plt.subplot(4, 1, 1).xaxis.set_visible(False)
    #plt.plot.set_title('Filtered, rectified ' + '\n' + 'EMG envelope: ' + str(int(40)) + ' Hz' + '\n VL-d')
    plt.plot(fr_emg['Time'], fr_emg['Filtered'])
    plt.locator_params(axis='x', nbins=4)
    plt.locator_params(axis='y', nbins=4)
    plt.axvspan(.0, 9999, facecolor='#F08080', alpha=0.5)
    plt.axvspan(10000, 19999, facecolor='#70659e', alpha=0.5)
    plt.axvspan(20000, 29999, facecolor='#F08080', alpha=0.5)
    plt.annotate('Open hand', xy=(3500, fr_emg['Filtered'].max()))  
    plt.annotate('Closed hand', xy=(13500, fr_emg['Filtered'].max()))
    plt.annotate('Hand tightly closed', xy=(23500, fr_emg['Filtered'].max()))
        

    fig_name = 'fig_' + files[0] + '_filtered.png'
    fig.set_size_inches(w=11,h=7)
    fig.savefig(fig_name)



In [8]:
df['Time'] = (range(0,len(df)))
# show what different low pass filter cut-offs do
fr_emg = pd.DataFrame(df['Time'])

emg_correctmean = np.array(df['Sample'])
fr_emg['Filtered'] = pd.DataFrame(filteremg(df['Time'], emg_correctmean, low_pass=30))


plot_fremg(fr_emg)

<IPython.core.display.Javascript object>

### Process automation

In [9]:
def filter_signal(emg, low_pass=10., sfreq=2000., high_band=20., low_band=450.):
    """
    emg: EMG data
    high: high-pass cut off frequency
    low: low-pass cut off frequency
    sfreq: sampling frequency
    """
    
    # normalise cut-off frequencies to sampling frequency
    high_band = high_band/(sfreq/2)
    low_band = low_band/(sfreq/2)
    
    # create bandpass filter for EMG
    b1, a1 = sp.signal.butter(4, [high_band,low_band], btype='bandpass')
    
    # process EMG signal: filter EMG
    emg_filtered = sp.signal.filtfilt(b1, a1, emg)    
    
    # process EMG signal: rectify
    emg_rectified = abs(emg_filtered)
    
    # create lowpass filter and apply to rectified signal to get EMG envelope
    low_pass = low_pass/sfreq
    b2, a2 = sp.signal.butter(4, low_pass, btype='lowpass')
    emg_envelope = sp.signal.filtfilt(b2, a2, emg_rectified)
    
    return emg_envelope

In [10]:
def plot_fremg(data):   
    # plot graphs
    fig = plt.figure()
    

    #plt.subplot(4, 1, 1).xaxis.set_visible(False)
    #plt.plot.set_title('Filtered, rectified ' + '\n' + 'EMG envelope: ' + str(int(40)) + ' Hz' + '\n VL-d')
    plt.plot(data)
    plt.locator_params(axis='x', nbins=4)
    plt.locator_params(axis='y', nbins=4)
    plt.axvspan(.0, 9999, facecolor='#F08080', alpha=0.5)
    plt.axvspan(10000, 19999, facecolor='#70659e', alpha=0.5)
    plt.axvspan(20000, 29999, facecolor='#F08080', alpha=0.5)
    plt.annotate('Open hand', xy=(3500, data.max()))  
    plt.annotate('Closed hand', xy=(13500, data.max()))
    plt.annotate('Hand tightly closed', xy=(23500, data.max()))
        

    fig_name = 'fig_' + str(int(40)) + '.png'
    fig.set_size_inches(w=11,h=7)
    fig.savefig(fig_name)

In [197]:
#plt.figure()
#plt.plot(filter_signal(df['Sample'], low_pass=3)[50:])

In [13]:
data = np.load('DB_HAND/'+'data_154743'+'.npy')

In [14]:
data = filter_signal(data[:,0], low_pass=3)

In [22]:
#plt.plot(data)

---
# Processing
---

In [84]:
def proccess_data(file_,DATA):
    
    #datafile = os.path.join(DATADIR, DATAFILE)
    #datafile = DATAFILE

    # Remove header, Nan and trash
    data = np.load('DB_HAND/'+file_+'.npy')
    data = filter_signal(data[:,0], low_pass=30)
    #plot_fremg(data)
    Xc_1 = data[:10000]
    Xc_2 = data[10000:20000]
    Xc_3 = data[20000:]
    
    print '\nXc_1 cleaned shape ', Xc_1.shape
    print 'Xc_2 cleaned shape ', Xc_2.shape
    print 'Xc_3 cleaned shape ', Xc_3.shape
    
    #Create temporal serie
    NUM_SAMPLES = 500
    
    Xc_1 = generate_envelope(Xc_1, NUM_SAMPLES)
    Xc_2 = generate_envelope(Xc_2, NUM_SAMPLES)
    Xc_3 = generate_envelope(Xc_3, NUM_SAMPLES)
    
    print 'Xc_1 temporal-serie shape ', Xc_1.shape
    print 'Xc_2 temporal-serie shape ', Xc_2.shape
    print 'Xc_3 temporal-serie shape ', Xc_3.shape
    
    #Labeling the type of movement
    C = (np.ones(len(Xc_1))*0).reshape((len(Xc_1),1))
    Xc_1 = np.hstack((Xc_1.reshape(Xc_1.shape),C.reshape((len(Xc_1),1))))


    C = (np.ones(len(Xc_2))*1).reshape((len(Xc_2),1))
    Xc_2 = np.hstack((Xc_2.reshape(Xc_2.shape),C.reshape((len(Xc_2),1))))
    
    C = (np.ones(len(Xc_3))*2).reshape((len(Xc_3),1))
    Xc_3 = np.hstack((Xc_3.reshape(Xc_3.shape),C.reshape((len(Xc_3),1))))
    
    print 'Xc_1 labeled shape ', Xc_1.shape
    print 'Xc_2 labeled shape ', Xc_2.shape
    print 'Xc_3 labeled shape ', Xc_3.shape
    
    # Salving in file on the folder <classifier_data>
    from Data_Savior_J import save_it_now
    save_it_now(Xc_1, './preproc_filtered_data/'+file_+'_f_honp.'+'data')
    print file_+'_o.'+'data'
    
    save_it_now(Xc_2, './preproc_filtered_data/'+file_+'_f_hclp.'+'data')
    print file_+'_cmf.'+'data'
    
    save_it_now(Xc_3, './preproc_filtered_data/'+file_+'_f_hchp.'+'data')
    print file_+'_ctf.'+'data'
    
    DATA.append(file_+'_f_honp.'+'data')
    DATA.append(file_+'_f_hclp.'+'data')
    DATA.append(file_+'_f_hchp.'+'data')

In [85]:
def proccess_data_validation(file_,DATA):
    
    #datafile = os.path.join(DATADIR, DATAFILE)
    #datafile = DATAFILE

    # Remove header, Nan and trash
    data = np.load('DB_HAND/'+file_+'.npy')
    data = filter_signal(data[:,0], low_pass=30)
    Xc_t = data
    print '\nXc_t cleaned shape ', Xc_t.shape
    
    #Create temporal serie
    NUM_SAMPLES = 500
    
    Xc_t = generate_envelope(Xc_t, NUM_SAMPLES)
    print 'Xc_t temporal-serie shape ', Xc_t.shape
    from Data_Savior_J import save_it_now
    save_it_now(Xc_t, './preproc_filtered_data/'+file_+'validation.'+'data')
    print file_+'validation.'+'data'
    
    DATA.append(file_+'validation.'+'data')

In [86]:
files = ['data_154743','data_155334','data_155743','data_161034', 'data_161420']
DATA = []
for i in files:
    proccess_data(i,DATA)


Xc_1 cleaned shape  (10000,)
Xc_2 cleaned shape  (10000,)
Xc_3 cleaned shape  (10000,)
Xc_1 temporal-serie shape  (9501, 500)
Xc_2 temporal-serie shape  (9501, 500)
Xc_3 temporal-serie shape  (9501, 500)
Xc_1 labeled shape  (9501, 501)
Xc_2 labeled shape  (9501, 501)
Xc_3 labeled shape  (9501, 501)
Saved to file
data_154743_o.data
Saved to file
data_154743_cmf.data
Saved to file
data_154743_ctf.data

Xc_1 cleaned shape  (10000,)
Xc_2 cleaned shape  (10000,)
Xc_3 cleaned shape  (10000,)
Xc_1 temporal-serie shape  (9501, 500)
Xc_2 temporal-serie shape  (9501, 500)
Xc_3 temporal-serie shape  (9501, 500)
Xc_1 labeled shape  (9501, 501)
Xc_2 labeled shape  (9501, 501)
Xc_3 labeled shape  (9501, 501)
Saved to file
data_155334_o.data
Saved to file
data_155334_cmf.data
Saved to file
data_155334_ctf.data

Xc_1 cleaned shape  (10000,)
Xc_2 cleaned shape  (10000,)
Xc_3 cleaned shape  (10000,)
Xc_1 temporal-serie shape  (9501, 500)
Xc_2 temporal-serie shape  (9501, 500)
Xc_3 temporal-serie shape 

In [101]:
dt = []
#proccess_data_validation('rob_data_170159',dt)
proccess_data_validation('data_154743',dt)


Xc_t cleaned shape  (30000,)
Xc_t temporal-serie shape  (29501, 500)
Saved to file
data_154743validation.data


In [2]:
#Validation set
Xvalid = load_file("preproc_filtered_data/"+dt[0])
X_valid = Xvalid[:,:499]
X_valid.shape

NameError: name 'dt' is not defined

#TESTE DA PORRA
Xc = np.load("DB_HAND/"+files[0]+'.npy')
for i in files[1:]:
    Xc = np.vstack((Xc,np.load("DB_HAND/"+i+'.npy'))) 
    
Xvalid = np.load("DB_HAND/rob_data_170159.npy")
X_valid = Xvalid[:,0]
X=Xc[:,0]
y=Xc[:,1]

In [89]:
Xc = load_file("preproc_filtered_data/"+DATA[0])
for i in DATA[1:]:
    Xc = np.vstack((Xc,load_file("preproc_filtered_data/"+i))) 

Xc = load_file("preproc_filtered_data/Xc_set.data")
Xvalid = load_file("preproc_filtered_data/rob_data_170159validation.data")
X_valid = Xvalid[:,:499]
X_valid.shape

In [4]:
Xc.shape

(142515, 501)

In [5]:
X = Xc[:,0:(Xc.shape[1]-2)]

yz = Xc[:,[(Xc.shape[1]-1)]]
y = np.array([])
for i in range(len(yz)):
    y = np.hstack((y,yz[i]))

X.shape, y.shape

((142515, 499), (142515,))

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=10)

In [7]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
X_valid_std = sc.transform(X_valid)

In [8]:
print X_train.shape, y_train.shape, X_test.shape

(106886, 499) (106886,) (35629, 499)


In [9]:
print X_train_std.shape, X_test_std.shape, X_valid_std.shape, y_test.shape

(106886, 499) (35629, 499) (29501, 499) (35629,)


# Random Forest Classifier

In [96]:
#Training

from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(n_estimators=100)

print 'Training time'
%time rfc = rfc.fit(X_train_std, y_train)

Training time
CPU times: user 4min 59s, sys: 404 ms, total: 4min 59s
Wall time: 5min


In [112]:
#Test

print 'Predict time'
%time y_pred = rfc.predict(X_test_std)


print''

from sklearn.metrics import accuracy_score
print ('ClassifyRF accuracy:---------->%.2f %%' % (accuracy_score(y_test, y_pred)*100))

Predict time
CPU times: user 980 ms, sys: 104 ms, total: 1.08 s
Wall time: 1.09 s

ClassifyRF accuracy:---------->99.99 %


In [113]:
print("Accuracy: %.3f%% (%.3f%%)") % (y_pred.mean()*100.0, y_pred.std()*100.0)

Accuracy: 100.067% (81.734%)


In [114]:
#Validation

print 'Predict time'
%time y_pred = rfc.predict(X_valid_std)


print''

from sklearn.metrics import accuracy_score
print ('ClassifyRF accuracy:---------->%.2f %%' % (accuracy_score(y_pred, y_test[:len(X_valid_std)])*100))

Predict time
CPU times: user 524 ms, sys: 12 ms, total: 536 ms
Wall time: 538 ms

ClassifyRF accuracy:---------->33.18 %


In [115]:
print("Accuracy: %.3f%% (%.3f%%)") % (y_pred.mean()*100.0, y_pred.std()*100.0)

Accuracy: 99.712% (81.457%)


## Confusion Matrix

In [149]:
from sklearn.metrics import confusion_matrix
import itertools
import matplotlib.pyplot as plt

%matplotlib notebook
class_names = np.array(['HONP', 'HCLP', 'HCHP'])

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

# Compute confusion matrix
cnf_matrix = confusion_matrix(y_test, y_pred)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names,
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
                      title='Normalized confusion matrix')

plt.show()

<IPython.core.display.Javascript object>

Confusion matrix, without normalization
[[11171  1082   209]
 [ 1738  9879   851]
 [  318  1125 10944]]


<IPython.core.display.Javascript object>

Normalized confusion matrix
[[ 0.9   0.09  0.02]
 [ 0.14  0.79  0.07]
 [ 0.03  0.09  0.88]]



# Support Vector Machine

In [15]:
#Training

from sklearn import svm, datasets

print 'Processing time SVM'
%time svm = svm.SVC(kernel='rbf', gamma=0.2, C=6, decision_function_shape='ovr').fit(X_train_std, y_train)

print ''

from sklearn.metrics import accuracy_score

Processing time SVM
CPU times: user 2h 32min 31s, sys: 4.66 s, total: 2h 32min 36s
Wall time: 2h 32min 49s



In [17]:
#Test

print 'Predict time'  
%time y_pred = svm.predict(X_test_std)
from sklearn.metrics import accuracy_score
print ('SVM-RBF accuracy:---------->%.2f %%' % (accuracy_score(y_test, y_pred)*100))

Predict time
CPU times: user 25min 9s, sys: 1.12 s, total: 25min 10s
Wall time: 25min 12s
SVM-RBF accuracy:---------->83.53 %


In [19]:
#Validation

print 'Predict time'  
%time y_pred = svm.predict(X_valid_std)
from sklearn.metrics import accuracy_score
print ('SVM-RBF accuracy:---------->%.2f %%' % (accuracy_score(y_test[:len(X_valid_std)], y_pred)*100))

Predict time
CPU times: user 20min 14s, sys: 56 ms, total: 20min 14s
Wall time: 20min 15s
SVM-RBF accuracy:---------->33.21 %


# GridSearch

In [None]:
from sklearn.grid_search import GridSearchCV
param_grid = [
    {
        'C'     : [0.001, 0.01, 0.1, 1, 10, 50, 100, 1000], 
        'gamma' : [1000, 100, 80, 50, 35, 10, 1, 0.1, 0.01, 0.001, 0.0001], 
        'kernel': ['rbf'],
        'random_state' : [1,5,10,100,500,1000,10000]
    },
]    
clf = GridSearchCV(svm.SVC(decision_function_shape='ovr'), param_grid, cv=15)
%time clf.fit(X_train_std, y_train)

print("Best parameters set found on development set:")
print()
print(clf.best_params_)
print()
print("Grid scores on development set:")
print()
for params, mean_score, scores in clf.grid_scores_:
    print("%0.3f (+/-%0.03f) for %r"
          % (mean_score, scores.std() * 2, params))
print()

# Gaussian Naive Bayes Classifier

In [10]:
from sklearn.naive_bayes import GaussianNB

gnbc = GaussianNB()
print 'Training time'
%time gnbc.fit(X_train_std, y_train)

pred = gnbc.predict(X_test)

from sklearn.metrics import accuracy_score
print ('ClassifyNB accuracy:---------->%.2f %%' % (accuracy_score(y_test, pred)*100))

ClassifyNB accuracy:---------->33.45 %


# Ada Boost Classifier

In [11]:
from sklearn.ensemble import AdaBoostClassifier
abc = AdaBoostClassifier(n_estimators=100, learning_rate = 2.5395)
print 'Training time'
%time abc = abc.fit(X_train_std, y_train)
print''

print 'Predict time'
%time pred = abc.predict(X_test_std)

print''

from sklearn.metrics import accuracy_score


print ('ClassifyAB accuracy:---------->%.2f %%' % (accuracy_score(y_test, pred)*100))


Predict time
CPU times: user 1.39 s, sys: 8 ms, total: 1.4 s
Wall time: 1.4 s

ClassifyAB accuracy:---------->46.83 %


# KNeighbors Classifier

In [12]:
from sklearn.neighbors import KNeighborsClassifier
knc = KNeighborsClassifier(n_neighbors=8, weights = 'distance', algorithm = 'auto', p = 10)
print 'Training time'
%time knc = knc.fit(X_train_std, y_train)
print''

print 'Predict time'
%time pred = knc.predict(X_test_std)

print''

from sklearn.metrics import accuracy_score



print ('ClassifyKN accuracy:---------->%.2f %%' % (accuracy_score(y_test, pred)*100))


Predict time
CPU times: user 4min 27s, sys: 56 ms, total: 4min 27s
Wall time: 4min 28s

ClassifyKN accuracy:---------->100.00 %


In [21]:
print 'Predict time'
%time pred = knc.predict(X_valid_std)

print''

from sklearn.metrics import accuracy_score



print ('ClassifyKN accuracy:---------->%.2f %%' % (accuracy_score(y_test[:len(X_valid_std)], pred)*100))

Predict time
CPU times: user 31min 14s, sys: 472 ms, total: 31min 15s
Wall time: 31min 17s

ClassifyKN accuracy:---------->33.14 %


In [22]:
%%bash
paplay /usr/share/sounds/freedesktop/stereo/complete.oga

In [4]:
import smtplib

def send_email(msg, sendto='jorgeluizjk@gmail.com'):
    fromaddr = 'ohperaml@gmail.com'
    toaddrs  = sendto
    msg = msg


    # Credentials (if needed)
    username = 'ohperaml@gmail.com'
    password = 'ohperaml11'

    # The actual mail send
    server = smtplib.SMTP('smtp.gmail.com:587')
    server.starttls()
    server.login(username,password)
    server.sendmail(fromaddr, toaddrs, msg)
    server.quit()