In [1]:
from ml.logutils import *
from ml.processing import *
from deep_dict_utils import *
import numpy as np
from ezprettyjson import prettyjson
import gzip, cPickle
import keras.layers.containers as containers
from keras.layers.core import Dense, Dropout
from keras.layers.noise import GaussianNoise
from keras.models import Sequential
from keras.regularizers import WeightRegularizer
from keras.utils import np_utils
import pandas as pd
import matplotlib.pyplot as mpp
import seaborn as sns

Using Theano backend.


Using gpu device 0: GeForce GTX 970M


In [2]:
np.set_printoptions(precision = 3, suppress = True)

In [3]:
joren_logs = loadall('logs/joren')

In [4]:
handled = {fname: filter(None, map(handle_data_entry, fc))
           for (fname, fc) in joren_logs.iteritems()}
dropped = {fname: reduce(change_drop, fc, (phase_names[0], drop_seconds, []))[2]
           for (fname, fc) in handled.iteritems()}
shifted = {fname: [e
                   for l in map(lambda c,n: smart_shift(c, n, shift),
                               entries[0:-1],
                               entries[1:])
                   for e in l]
           for fname, entries in dropped.iteritems()}
transformed = {fname: map(lambda r: dict(r, **{ 'raw': merge(*map(lambda f: f(fix_length(r['raw'], 512)),
                                                                  [wavelet_trans, fourier_trans, extremes])) }),
                          fc)
               for (fname, fc) in shifted.iteritems() }

In [5]:
nfs = [('raw', list), ('bands', detvalues), ('eSense', detvalues)]
pnames = {pname_for(fname) for fname in transformed.iterkeys()}
perplayer = {pname: sum(((fc if isinstance(fc,list) else [fc])
                         for (fname, fc) in transformed.iteritems()
                         if pname_for(fname) == pname), 
                        [])
             for pname in pnames }
perplayer = {pname: dict({'stats': getstats({pname: pc}, nfs)},
                         **{fname: {'data': fc
                                   ,'stats': getstats({fname: fc}, nfs)}
                            for (fname, fc) in transformed.iteritems()
                            if pname_for(fname) == pname})
             for (pname, pc) in perplayer.iteritems()}
with gzip.open('ml/unscaled.pkl.gz', 'r') as f:
    allstats = cPickle.load(f)['stats']
result = {'stats': allstats, 'players': perplayer}
scaled = {pname: normalized_by(fe['data'], result['stats'])
          for (pname, pe) in result['players'].iteritems()
               for (fname, fe) in pe.iteritems()
               if fname != 'stats'}

In [24]:
layer_sizes = [100, 64, 32, 16]
sigma_base = 0.0
sigma_fact = 1.0
model = Sequential()
#if l1 != 0 or l2 != 0:
#    regularizer = WeightRegularizer(l1=l1, l2=l2)
#else:
#    regularizer = None
for (i, (n_in, n_out)) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
    model.add(GaussianNoise(sigma_base*(sigma_fact**-i), 
                            input_shape = [layer_sizes[i]]))
    model.add(Dense(input_dim = n_in, 
                    output_dim = n_out, 
                    activation = 'sigmoid', 
                    W_regularizer = None))
#    if drop_rate > 0 and mod_use_drop:
#        model.add(Dropout(drop_rate, 
#                          input_shape = (n_in,)))
model.add(Dense(input_dim = layer_sizes[-1],
                output_dim = len(phase_names),
                activation = 'softmax',
                W_regularizer = None))
model.load_weights('ml/model-snapshots/cp-160819-001547-340-0.5504.hdf5')
model.compile(loss='categorical_crossentropy', optimizer='adadelta')

In [7]:
train_labels, train_data = zip(*[(e['phase'], e['raw'])
                                 for e in scaled['joren-train']])
play_labels, play_data = zip(*[(e['phase'], e['raw'])
                               for e in scaled['joren-justplay']])

In [19]:
df = pd.DataFrame.from_dict(dmap(lambda l: reduce(lambda (c,l1), l2: (c-1,l1) if c > 0 else (32,l1+[max(list(l2))]), 
                                                  l, 
                                                  (0,[]))[1], 
                                 reduce(smart_combine, 
                                        map(get('bands'), 
                                            scaled['joren-train']))))
df.plot(kind='line')
mpp.show()

In [28]:
data_by_label = {}
for (l, d) in zip(train_labels, train_data):
    data_by_label.setdefault(l, []).append(d)
phl = 5
eye = np.identity(phl)
tc  = np_utils.to_categorical
for key in ['DISTRACT', 'RELAXOPEN', 'RELAXCLOSED', 'CASUAL', 'INTENSE']:
    arr = np.array(data_by_label[key])
    eva = model.evaluate(arr
                        ,tc(map(lambda n: phase_names.index(n)
                               ,list(repeat(key, len(arr))))
                        ,phl)
                        ,batch_size=100
                        ,show_accuracy=True)
    counts = np.sum(map(lambda p: eye[np.argmax(p)]
                       ,model.predict(arr, batch_size=100))
                   ,axis=0)
    print(eva)
    print(counts)

[1.5350387275480388, 0.36842105263157893]
[ 672.  250.   52.  349.  501.]
[1.1795333568696622, 0.69270833333333337]
[   55.  1197.   354.   112.    10.]
[1.0092426070736513, 0.62442129629629628]
[   21.   598.  1079.    26.     4.]
[1.7436791323574192, 0.22853915662650603]
[ 363.  866.  700.  607.  120.]
[1.8501888427085109, 0.086216517857142863]
[  258.  1216.   890.   911.   309.]


In [67]:
def exp_smooth(l):
    return reduce(lambda ov, cv: ov+[0.8*ov[-1]+0.2*cv], l, [np.mean(l)])
def mean_per(l, n = 32):
    interm = [l[i:min((i+n,len(l)))] for i in xrange(0, len(l), n)]
    return map(np.mean, interm)
predictions = model.predict(np.array(train_data))
labelled_p = map(lambda p: {'DISTRACT': p[0]
                           ,'RELAXOPEN': p[1]
                           ,'RELAXCLOSED': p[2]
                           ,'CASUAL': p[3]
                           ,'INTENSE': p[4]},
                 predictions)
lp = dmap(chain(mean_per, exp_smooth), reduce(smart_combine, labelled_p))
pd.DataFrame.from_dict(lp).plot(figsize=(11,7))
mpp.title('Voorspellingen voor protocolsessie met nieuwe proefpersoon (met exponential smoothing)')
mpp.xlabel('Tijd (in seconden)')
mpp.ylabel('Zekerheid')
mpp.tight_layout()
mpp.show()

In [37]:
predictions = model.predict(np.array(play_data))
labelled_p = map(lambda p: {'DISTRACT': 1 if p[0] == max(p) else 0
                           ,'RELAXOPEN': 1 if p[1] == max(p) else 0
                           ,'RELAXCLOSED': 1 if p[2] == max(p) else 0
                           ,'CASUAL': 1 if p[3] == max(p) else 0
                           ,'INTENSE': 1 if p[4] == max(p) else 0},
                 predictions)
pd.DataFrame.from_dict(reduce(smart_combine, labelled_p)).plot()
mpp.title('')
mpp.show()

In [49]:
predictions = model.predict(np.array(play_data))
labelled_p = map(lambda p: map(lambda pi: (pi,pi,pi), p),
                 predictions)
lsqrt = int(math.floor(math.sqrt(len(labelled_p)))+1)
fill = [[(0,0,0)]*5]*(lsqrt**2-len(labelled_p))
split = [(labelled_p+fill)[i:i+lsqrt] for i in xrange(0, lsqrt**2, lsqrt)]
flipped = map(lambda l: zip(*l), split)
mpp.imshow(np.array(reduce(smart_combine,flipped)))
mpp.show()