In [1]:
##### この２行だけ必要に応じて変えてください #####
GAME_NAME = 'deresute'
t_frame = [25,125] # どのフレームを教師データとして使うか

In [2]:
import cv2
import numpy as np
import tensorflow as tf
from sklearn.utils import shuffle

In [3]:
### 動画の保存
def save(movie, name):
    # nameには 'hoge.mp4' などを渡す
    result = name
    fps    = 30.0
    height = movie.shape[1]
    width  = movie.shape[2]
    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
    out = cv2.VideoWriter(result, int(fourcc), fps, (int(width), int(height)))
    for i in range(movie.shape[0]):
        out.write(movie[i])
    out.release()

In [4]:
### 動画の読み取り
def read(name, resize=False):
    # nameには 'hoge.mp4' などを渡す
    movie = []
    cap = cv2.VideoCapture(name)
    for i in range(1000):
        ret, frame = cap.read()
        if resize:
            frame = cv2.resize(frame, (160, 120))
        movie.append(frame)
        if cv2.waitKey(1) == 27:
            break
    cap.release()
    return np.array(movie)

In [5]:
### 動画の閲覧
# 何かキーを押してる間再生
def watch(movie):
    for t in range(movie.shape[0]):
        cv2.imshow('frame', movie[t])
        temp_key = cv2.waitKey(0)
        if temp_key == 27: break
    cv2.destroyAllWindows()

In [6]:
### 正解ラベルの可視化
def overlay_(x, y, points):
    img = x.copy()
    for k in range(1, y.shape[0]):
        if y[k] == 1:
            cv2.circle(img, (points[k-1,1], points[k-1,0]), 20, (255,0,0), 3)
    return img

def overlay(X, Y, points):
    imgs = []
    for t in range(X.shape[0]):
        img = overlay_(X[t], Y[t], points)
        imgs.append(img)
    imgs = np.array(imgs)
    return imgs

points = {}
points['deresute'] = np.zeros((9,2)).astype(np.uint16)
points['deresute'][:,0] = 180
points['deresute'][:,1] = np.arange(41, 279, 236/8)
points['mirisita'] = np.zeros((11,2)).astype(np.uint16)
points['mirisita'][:,0] = 189
points['mirisita'][:,1] = np.arange(53, 267, 211/10)
points['bandori'] = np.zeros((13,2)).astype(np.uint16)
points['bandori'][:,0] = 210
points['bandori'][:,1] = np.arange(22, 300, 320/14)

In [7]:
### 乱数のシード値の設定
rng = np.random.RandomState(2525)
random_state = 39

In [8]:
data_X = np.load(GAME_NAME + '/data_X.npy')
data_T = np.load(GAME_NAME + '/data_T.npy')

In [9]:
movie = data_X.copy()

In [10]:
watch(movie)

In [11]:
train_X = ((data_X[t_frame[0]-3:t_frame[1]-3] - data_X.mean())/data_X.std())
train_T = data_T # <- 120:220 # 3フレーム分未来の予測をする

In [12]:
movie = overlay(read(GAME_NAME + '/movie.mp4')[t_frame[0]+3:t_frame[1]+3], train_T, points[GAME_NAME])

In [13]:
watch(movie)

In [14]:
class Conv:
    def __init__(self, filter_shape, function=lambda x: x, strides=[1,1,1,1], padding='VALID'):
        # Xavier Initialization
        fan_in = np.prod(filter_shape[:3])
        fan_out = np.prod(filter_shape[:2]) * filter_shape[3]
        self.W = tf.Variable(rng.uniform(
                        low=-np.sqrt(6/(fan_in + fan_out)),
                        high=np.sqrt(6/(fan_in + fan_out)),
                        size=filter_shape
                    ).astype('float32'), name='W')
        self.b = tf.Variable(np.zeros((filter_shape[3]), dtype='float32'), name='b') # バイアスはフィルタごとなので, 出力フィルタ数と同じ次元数
        self.function = function
        self.strides = strides
        self.padding = padding

    def f_prop(self, x):
        u = tf.nn.conv2d(x, self.W, strides=self.strides, padding=self.padding) + self.b
        return self.function(u)

In [15]:
class Pool:
    
    def __init__(self, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID'):
        self.ksize = ksize
        self.strides = strides
        self.padding = padding
    
    def f_prop(self, x):
        return tf.nn.max_pool(x, ksize=self.ksize, strides=self.strides, padding=self.padding)

In [16]:
class Flatten:
    def f_prop(self, x):
        return tf.reshape(x, (-1, np.prod(x.get_shape().as_list()[1:])))

In [17]:
class Dense:
    def __init__(self, in_dim, out_dim, function=lambda x: x):
        # Xavier Initialization
        self.W = tf.Variable(rng.uniform(
                        low=-np.sqrt(6/(in_dim + out_dim)),
                        high=np.sqrt(6/(in_dim + out_dim)),
                        size=(in_dim, out_dim)
                    ).astype('float32'), name='W')
        self.b = tf.Variable(np.zeros([out_dim]).astype('float32'))
        self.function = function

    def f_prop(self, x):
        return self.function(tf.matmul(x, self.W) + self.b)

In [18]:
layers = [
    Conv(filter_shape=[28,28,3,5], function=tf.nn.relu, strides=[1,4,4,1]), # 120x160x3 -> 34x24x5
    Pool(ksize=[1,2,2,1], strides=[1,2,2,1]), # 34x24x5 -> 17x12x5
    Flatten(),
    Dense(17*12*5, data_T.shape[1])
]

x = tf.placeholder(tf.float32, [None, 120, 160, 3])
t = tf.placeholder(tf.float32, [None, data_T.shape[1]])

def f_props(layers, x):
    for layer in layers:
        x = layer.f_prop(x)
    return x

y = f_props(layers, x)

cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=t))
train = tf.train.GradientDescentOptimizer(0.02).minimize(cost)

pred = tf.cast(tf.rint(tf.sigmoid(y)),dtype=tf.int32)

In [19]:
n_epochs = 50
batch_size = 10
n_batches = train_X.shape[0]//batch_size

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for epoch in range(n_epochs):
    train_X, train_T = shuffle(train_X, train_T, random_state=random_state)
    for i in range(n_batches):
        start = i * batch_size
        end = start + batch_size
        sess.run(train, feed_dict={x: train_X[start:end], t: train_T[start:end]})
    pred_Y, valid_cost = sess.run([pred, cost], feed_dict={x: train_X, t: train_T})
    if epoch%10 == 0:
        score = np.sum(np.abs(pred_Y - train_T.astype(np.int32)))
        print(score, end=' ')
        print('EPOCH:: %i, Validation cost: %.3f' % (epoch + 1, valid_cost))

102 EPOCH:: 1, Validation cost: 0.250
4 EPOCH:: 11, Validation cost: 0.043
1 EPOCH:: 21, Validation cost: 0.018
0 EPOCH:: 31, Validation cost: 0.011
1 EPOCH:: 41, Validation cost: 0.008


In [20]:
for i in range(10):
    test_X = ((data_X[100*i:100*(i+1)] - data_X.mean())/data_X.std())
    pred_Y = sess.run(pred, feed_dict={x: test_X})
    np.save(GAME_NAME + '/pred' + str(i) + '.npy', pred_Y)

In [21]:
pred_Y = np.zeros([0,data_T.shape[1]], dtype=np.int32)
for i in range(10):
    pred_Y = np.append(pred_Y, np.load(GAME_NAME +  '/pred' + str(i) + '.npy'), axis=0)

In [22]:
movie = overlay(read(GAME_NAME + '/movie.mp4')[3:1000], pred_Y[0:997], points[GAME_NAME])

In [23]:
watch(movie)                    

In [24]:
save(movie, GAME_NAME + '/pred_movie.mp4')