In [1]:
from keras.models import Sequential

Using TensorFlow backend.


In [3]:
import numpy as np

In [None]:
class Game():
    def __init__(self, shape=(60,2,13,3)):
        self.shape = shape
        
        
    def reset(self, new_example, true_acc, is_clapping): # new_example = frames*(v,d), v = vertices of 13 parts, d = vel of 13 parts
        self.info = new_example # store v and d in one tensor (60frames*13joints*2features*3axis)
        self.pos = new_example[0][0] # the first state of new example # np.random.rand(1,3)
        self.vel = new_example[0][1] # the first state of new example # np.random.rand(1,3)
        self.frame = 0
        self.is_clapping = is_clapping
        self.true_acc = true_acc # store true acc in one tensor (60frames*13joints*1feature*3axis)
        self.max_frame = self.shape[0]-1
        
    def state(self):
        # flatten the vd matrix size=(2*13*3) into 1 dimension size=(1*78)
        # flatten the a matrix size=(1*13*3) into 1 dimension size=(1*39)
        return self.info[self.frame].reshape((1,-1)).copy(), self.true_acc[self.frame].reshape((1,-1)).copy()
    
    def update(self):
        self.frame += 1 # move to next frame
        if self.frame == self.max_frame: # give reward if it is last frame
            if is_clapping:
                return 1
            else
                return -1
        return 0

In [9]:
a = np.zeros((10,10))
a.reshape((1,-1))

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.]])

In [10]:
import os
#if using Theano with GPU
#os.environ["THEANO_FLAGS"] = "mode=FAST_RUN,device=gpu,floatX=float32"

import random
from keras.models import Sequential
from keras.layers.core import Dense
from collections import deque

In [None]:
class Agent():
    def __init__(self, env, explore=0.1, discount=0.95, hidden_size=10, memory_limit=5000):
        self.env = env
        model = Sequential()
        model.add(Dense(hidden_size, input_shape=(2*13*3,), activation='relu'))
        model.add(Dense(hidden_size, activation='relu'))
        model.add(Dense(13*3))
        model.compile(loss='mse', optimizer='sgd')
        self.Q = model
        
        # experience replay:
        # remember states to "reflect" on later
        self.memory = deque([], maxlen=memory_limit)
        
        self.explore = explore # give some chance to randomly assign acc to explore new possiblities
        self.discount = discount # gamma of weight
    
    def predict_acc(self):
        if np.random.rand() <= self.explore:
            return np.random.rand(1, 13)
        state = self.env.state
        q = self.Q.predict(state)
        
        return q[0]
    
    def remember(self, state, acc, true_acc, reward):
        # the deque object will automatically keep a fixed length
        self.memory.append((state, acc, true_acc, reward))
    
    
    # between every action, it will pick batch_size previous states/actions that the agent "remembers" to train
    def _prep_batch(self, batch_size):
        if batch_size > self.memory.maxlen:
            Warning('batch size should not be larger than max memory size. Setting batch size to memory size')
            batch_size = self.memory.maxlen
            
        batch_size = min(batch_size, len(self.memory))
        
        inputs = []
        targets = []
        
        # prep the batch
        # inputs are states, outputs are values over actions
        batch = random.sample(list(self.memory), batch_size) # randomly get some states in the memory
        random.shuffle(batch)
        
        for state, acc, true_acc, reward in batch:
            inputs.append(state)
            target = true_acc
            
            # debug, "this should never happen"
            assert not np.array_equal(state, next_state)
            
            # non-zero reward indicates terminal state
#             if reward:
#                 target[action] = reward
#             else:
#                 # reward + gamma * max_a' Q(s',a')
#                 Q_sa = np.max(self.Q.predict(next_state)[0])
#                 target[action] = reward + self.discount * Q_sa

            
            
            
            targets.append(target)
            
        # to numpy matrices
        return np.vstack(inputs), np.vstack(targets)
    
    def replay(self, batch_size):
        inputs, targets = self._prep_batch(batch_size)
        loss = 0
        for i in range(len(inputs)):
            loss += pow(self.discount,self.env.max_frame - self.env.frame)*self.Q.train_on_batch(inputs[i], targets[i])
        return loss
    
    def save(self, fname):
        self.Q.save_weights(fname)
    
    def load(self, fname):
        self.Q.load_weights(fname)
        print(self.Q.get_weights())

In [None]:
import os
import sys
from time import sleep
game = Game()
agent = Agent(game)

print('training...')
epochs = 100 # number of training videos
batch_size = 60 # number of "experiences" we want to train from the previous decisions
fname = 'clapping_weights.h5'

# keep track of past record_len results
record_len = 100
record = deque([], record_len)

for i in range(epochs):
    new_example = # load new training video data
    is_clapping = # is the video clapping?
    game.reset(new_example, is_clapping)
    reward = 0
    loss = 0
    
    # rewards only given at end of game
    while reward == 0:
        prev_state, true_acc = game.state
        acc = agent.predict_acc()
        reward = game.update()
        
        # debug, "this should never happen"
        assert not np.array_equal(new_state, prev_state)
        
        agent.remember(prev_state, acc, true_acc, reward)
        loss += agent.replay(batch_size)
        
    if i % 100 == 0:
        print('epoch: {:04d}/{} | loss: {:.3f} | win rate: {:.3f}\r'.format(i+1, epochs, loss, sum(record)/len(record) if record else 0))
    
    record.append(reward if reward == 1 else 0)

agent.save(fname)

In [11]:
np.random.randint(0,5)

4

In [16]:
a = np.zeros((2,4,3))
a

array([[[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]])

In [65]:
b = np.array([[[1,2,3],
               [4,5,6],
               [7,8,9],
               [10,11,12]],
              
              [[13,14,15],
               [16,17,18],
               [19,20,21],
               [22,23,24]]
             ])
b[1][0]

array([13, 14, 15])

In [21]:
b.reshape((1,-1))

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24]])

In [22]:
c = []
c.append((1,0))
c.append((2,0))
c.append((3,0))
c.append((4,0))
c.append((5,0))
c


[(1, 0), (2, 0), (3, 0), (4, 0), (5, 0)]

In [23]:
random.sample(c, 3)

[(1, 0), (3, 0), (2, 0)]

In [25]:
        model = Sequential()
        model.add(Dense(100, input_shape=(13*2*3,), activation='relu'))
        model.add(Dense(10, activation='relu'))
        model.add(Dense(5))
        model.compile(loss='mse', optimizer='sgd')

In [26]:
model

<keras.engine.sequential.Sequential at 0x123f88278>

In [86]:
rs = np.zeros((13,2,3))
# rs.shape
rs = rs.reshape((1,-1)).copy()
rs.shape

(1, 78)

In [89]:
rs

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [90]:
inputs = []
inputs.append(rs)
inputs.append(rs)
inputs.append(rs)

In [58]:
p = 5
if p:
    print("yes")
else:
    print('no')

yes


In [60]:
np.random.rand(1,3)

array([[0.30745461, 0.66801875, 0.43939106]])

In [66]:
shape=(60,2,13,3)
shape[0]

60

In [91]:
np.vstack(inputs)

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
      

In [92]:
out = np.zeros((5,1))
# rs.shape
out = out.reshape((1,-1)).copy()
out.shape

(1, 5)

In [93]:
outputs = []
outputs.append(out)
outputs.append(out)
outputs.append(out)

In [94]:
np.vstack(outputs)

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [95]:
inputs[0].shape

(1, 78)

In [102]:
for i in range(len(inputs)):
    print(i)

0
1
2
