# Objectives:

1. Implement data preprocessing to summarize the features in the time series. The features hypothesized to be useful are as follows:
    * Band Spectrum Energy (eeg.get_power) (for EEG)
    * Fractal Dimension of Time Series (pyEEG)
    * Skewness and Kurtosis of the distributions (scipy.stats)
    * Regular stats (mean, median, max, min of time series)
    
2. Models to train:
    * Linear Chain Conditional Random Field with structured SVM solver with a 1-slack QP with L1 slack penalty (pystruct)
    * A simple SVM like Andreas (sklearn SVC)
    * A Hidden Markov model?
    
3. Cross Validate:
    * Build a proper function/system for leave-one-subject-out CV
    * Test models and ensembles on it


In [1]:
import importlib
import utils
importlib.reload(utils)
import utils
from utils import *

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  from ._conv import register_converters as _register_converters


In [2]:
xtrain_eeg1, xtrain_eeg2, xtrain_emg, ytrain, xtest_eeg1, xtest_eeg2, xtest_emg = load_data()

Loading xtrain...
Shapes: (64800, 512) (64800, 512) (64800, 512)
Loading ytrain...
Shape: (64800, 1)
Loading xtest...
Shapes: (43200, 512) (43200, 512) (43200, 512)


# Model Implementation: Conditional 

In [None]:
idx = 21600*2
xtrain_eeg1_ = xtrain_eeg1[0:idx]
xtrain_eeg2_ = xtrain_eeg2[0:idx]
xtrain_emg_ = xtrain_emg[0:idx]
eeg1_ = process_EEG(xtrain_eeg1_)
eeg2_ = process_EEG(xtrain_eeg2_)
emg_ = process_EMG(xtrain_emg_)
xtrain_ = np.concatenate((eeg1_, eeg2_, emg_), axis=1)
# ytrain_classes = ytrain[0:idx]['y']
ytrain_classes = ytrain.values[0:idx]
ytrain_ = ytrain.values[0:idx]

xtest_eeg1_ = xtrain_eeg1[idx:-1]
xtest_eeg2_ = xtrain_eeg2[idx:-1]
xtest_emg_ = xtrain_emg[idx:-1]
eeg1_ = process_EEG(xtest_eeg1_)
eeg2_ = process_EEG(xtest_eeg2_)
emg_ = process_EEG(xtest_emg_)
xtest_ = np.concatenate((eeg1_, eeg2_, emg_), axis=1)
ytest_ = ytrain.values[idx:-1]

# xtest_eeg1_ = xtrain_eeg1[idx:-1]
# xtest_eeg2_ = xtrain_eeg2[idx:-1]
# xtest_final = 

In [None]:
# CRF

# xtrain_ = np.reshape(xtrain_, (xtrain_.shape[0], 1, xtrain_[0].shape[0])) # Reshape so that it works with CRF
xtrain_crf = np.reshape(xtrain_, (2, -1, xtrain_.shape[1])) # Reshape so that it works with CRF
ytrain_crf = np.reshape(ytrain_, (2, -1)) -1 # Reshape so that it works with CRF
# X_test_crf = X_test.reshape(2, -1, 49)
print(xtrain_crf.shape, ytrain_crf.shape)

print("Starting CRF...")
classes = np.array([1, 2, 3])
lmao = np.reshape(ytrain_classes, (-1,))
weights_crf = compute_class_weight("balanced", list(classes), list(lmao))
weights_crf[0] = weights_crf[0]+2.5
weights_crf[1] = weights_crf[1]+1.5
    
model = ChainCRF(class_weight=weights_crf)
ssvm = OneSlackSSVM(model=model, C=0.5, max_iter=2000)
    
ssvm.fit(xtrain_crf, ytrain_crf)   
   

# Test on the third guy
xtest_crf = np.reshape(xtest_, (1, -1, xtest_.shape[1]))
ytest_crf = np.reshape(ytest_, (1, -1)) -1
print(xtest_crf.shape, ytest_crf.shape)
y_pred_crf = ssvm.predict(xtest_crf)
y_pred_crf = np.asarray(y_pred_crf).reshape(-1) + 1  

print("BMAC:", sklearn.metrics.balanced_accuracy_score(ytest_, y_pred_crf))


# y_pred_filtered_crf = medfilt(y_pred_crf, FILTER_WINDOW)


# print(f"CRF unique predictions {np.unique(y_pred_filtered_crf)}")


# CRF Simply run model on CV

In [None]:
res = losocv_CRF(xtrain_eeg1, xtrain_eeg2, xtrain_emg, ytrain, C=0.1, weight_shift=1, fs=128)
print(res)

# Dec 18, c=0.1, w = 0, simple EMG feats, 0.94 avg. with weight_shift = 1, the thing actually converges, else it does not. 

# CRF Submission

In [8]:
pred = CRF_submit(xtrain_eeg1, xtrain_eeg2, xtrain_emg, ytrain, xtest_eeg1, xtest_eeg2, xtest_emg, C=0.1, weight_shift=1, fs=128)
make_submission('CRFc0.6w1.csv', pred)


  (p, _, _, _) = numpy.linalg.lstsq(x, L)
  np.sum((np.power(np.abs(signal),(-0.3)) > 20), axis=1),


# CRF Grid Search

In [5]:
Cs = [0.5, 0.55, 0.58, 0.6, 0.62, 0.64, 0.68, 0.7, 0.75]
weight_shifts = [1]
total_res = []
for c, w in itertools.product(Cs, weight_shifts):
    res = losocv_CRF(xtrain_eeg1, xtrain_eeg2, xtrain_emg, ytrain, C=c, weight_shift=w, fs=128)
    total_res.append(res)
    print("C:", c, ", w:", w, ", BMAC (Mean, Std, full):", (np.mean(res), np.std(res), res))        
    print("********************************")

# Res December 14th, Cs and weight shift, EEG and EMG running process_EMG .
# C: 0.1 , w: 0 , BMAC (Mean, Std): (0.915638922375751, 0.05039793710589233)

# Dec 18
# C: 0.1 , w: 0 , BMAC (Mean, Std, full): (0.8105643639593416, 0.1823532025093181, [0.9057309406411344, 0.9705540211342516, 0.5554081301026389])
# C: 0.2 , w: 0 , BMAC (Mean, Std, full): (0.8320971763898081, 0.15734031410977364, [0.9146093715452001, 0.9698041054523155, 0.6118780521719084])

# Dec 18 grid search with EMG running process_EEG
# Iteration, result: 0 0.9079745357325523
# Iteration, result: 1 0.9466763536193374
# Iteration, result: 2 0.948981772088117
# C: 0.05 , w: 1 , BMAC (Mean, Std, full): (0.9345442204800022, 0.018811164225236095, [0.9079745357325523, 0.9466763536193374, 0.948981772088117])
# ********************************
# Iteration, result: 0 0.9173226358874063
# Iteration, result: 1 0.9547583809426654
# Iteration, result: 2 0.9575831838587567
# C: 0.1 , w: 1 , BMAC (Mean, Std, full): (0.9432214002296094, 0.018349466363147493, [0.9173226358874063, 0.9547583809426654, 0.9575831838587567])
# ********************************
# Iteration, result: 0 0.9218756442880339
# Iteration, result: 1 0.9563778895201546
# Iteration, result: 2 0.9601677342906215
# C: 0.15 , w: 1 , BMAC (Mean, Std, full): (0.9461404226996034, 0.01722740712484969, [0.9218756442880339, 0.9563778895201546, 0.9601677342906215])
# ********************************
# Iteration, result: 0 0.9239779019222256
# Iteration, result: 1 0.9629387386786116
# Iteration, result: 2 0.9592762662759097
# C: 0.2 , w: 1 , BMAC (Mean, Std, full): (0.9487309689589156, 0.017566809078814704, [0.9239779019222256, 0.9629387386786116, 0.9592762662759097])
# ********************************
# Iteration, result: 0 0.9251862473404887
# Iteration, result: 1 0.9639795723967083
# Iteration, result: 2 0.9607644254552966
# C: 0.25 , w: 1 , BMAC (Mean, Std, full): (0.9499767483974978, 0.017578604409089597, [0.9251862473404887, 0.9639795723967083, 0.9607644254552966])
# ********************************
# Iteration, result: 0 0.9265861484008573
# Iteration, result: 1 0.9637316211236263
# Iteration, result: 2 0.960730526001078
# C: 0.3 , w: 1 , BMAC (Mean, Std, full): (0.9503494318418539, 0.016847786657926366, [0.9265861484008573, 0.9637316211236263, 0.960730526001078])
# ********************************
# Iteration, result: 0 0.926784099672131
# Iteration, result: 1 0.963657873189657
# Iteration, result: 2 0.9605292556024607
# C: 0.4 , w: 1 , BMAC (Mean, Std, full): (0.9503237428214163, 0.016693974196268068, [0.926784099672131, 0.963657873189657, 0.9605292556024607])
# ********************************
# Iteration, result: 0 0.92784839763847
# Iteration, result: 1 0.9655970286576371
# Iteration, result: 2 0.9612160364123156
# C: 0.6 , w: 1 , BMAC (Mean, Std, full): (0.9515538209028076, 0.016857413648390978, [0.92784839763847, 0.9655970286576371, 0.9612160364123156])
# ********************************
# Iteration, result: 0 0.9265366010353305
# Iteration, result: 1 0.9656693419824388
# Iteration, result: 2 0.9605270519131293
# C: 0.8 , w: 1 , BMAC (Mean, Std, full): (0.9509109983102996, 0.017362684482873845, [0.9265366010353305, 0.9656693419824388, 0.9605270519131293])
# ********************************
# Iteration, result: 0 0.9241461733266849
# Iteration, result: 1 0.9666668316689847
# Iteration, result: 2 0.9609692320544577
# C: 1.0 , w: 1 , BMAC (Mean, Std, full): (0.9505940790167092, 0.018845590941175522, [0.9241461733266849, 0.9666668316689847, 0.9609692320544577])
# ********************************
# Iteration, result: 0 0.9224415449966106
# Iteration, result: 1 0.9668854781229045
# Iteration, result: 2 0.9561918352434254
# C: 2.0 , w: 1 , BMAC (Mean, Std, full): (0.9485062861209802, 0.0189405481602964, [0.9224415449966106, 0.9668854781229045, 0.9561918352434254])
# ********************************
# Iteration, result: 0 0.9180049502109369
# Iteration, result: 1 0.9661411949814905
# Iteration, result: 2 0.9473559863169196
# C: 5.0 , w: 1 , BMAC (Mean, Std, full): (0.943834043836449, 0.019808711455307933, [0.9180049502109369, 0.9661411949814905, 0.9473559863169196])
# ********************************
# Iteration, result: 0 0.9163655769033707
# Iteration, result: 1 0.9684931245342258
# Iteration, result: 2 0.940308265070561
# C: 8.0 , w: 1 , BMAC (Mean, Std, full): (0.9417223221693858, 0.021304459214146036, [0.9163655769033707, 0.9684931245342258, 0.940308265070561])
# ********************************

  (p, _, _, _) = numpy.linalg.lstsq(x, L)
  np.sum((np.power(np.abs(signal),(-0.3)) > 20), axis=1),


Iteration, result: 0 0.9272650591985349


KeyboardInterrupt: 