In [1]:
import keras
from keras.layers import Input ,Dense, Dropout, Activation, LSTM
from keras.layers import Convolution2D, MaxPooling2D, Flatten, Reshape
from keras.models import Sequential
from keras.layers.wrappers import TimeDistributed
from keras.layers.pooling import GlobalAveragePooling1D
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.models import Model
from keras.optimizers import Adam
from keras.engine.network import Network

from keras.initializers import glorot_normal,orthogonal

from keras.callbacks import EarlyStopping
import csv

from PIL import Image

import numpy as np
import time

Using TensorFlow backend.


In [2]:
#どんくらいで学習させるか関連
num_sample=20 #画像の総フレーム数
timesteps=5 #一回のLSTMに入れる値の数
camera=5 #カメラの数

#画像関連
channels=3
img_width=256
img_height=256

# LSTMなどで用いる値
n_hidden = 256    # 出力次元
epochs = 100      # エポック数
batch_size = 5   # ミニバッチサイズ

In [3]:
class Prediction :
    def __init__(self, timesteps, n_hidden, n_out, width, height):
        self.maxlen = timesteps
        self.n_hidden = n_hidden
        self.n_out = n_out
        self.width=width
        self.height=height
        self.sharedLayer = self.create_sharedmodel()
    
    def create_sharedmodel(self):
        inputs = Input(shape=(self.maxlen, self.height, self.width, 3))
        
        x = TimeDistributed(Convolution2D(32, (3, 3), activation="relu", padding="same"))(inputs)
        x = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(x)
        x = TimeDistributed(Convolution2D(32, (3, 3), activation="relu", padding="same"))(x)
        x = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(x)
        x = TimeDistributed(Dropout(0.25))(x)
        x = TimeDistributed(Flatten())(x)
        x = TimeDistributed(Dense(512))(x)
        predictions = LSTM(self.n_hidden, batch_input_shape = (None, self.maxlen, 35),
             kernel_initializer = glorot_normal(seed=20181020),
             recurrent_initializer = orthogonal(gain=1.0, seed=20181020), 
             dropout = 0.01, 
             recurrent_dropout = 0.01)(x)
        #predictions = Dense(256, activation = "softmax", name="time_distr_dense_one")(x)
        
        shared_layers = Network(inputs, predictions, name="shared_layers")
        
        return shared_layers
    
    def create_model(self):
        model_input1 = Input(shape=(self.maxlen, self.height, self.width, 3))
        model_input2 = Input(shape=(self.maxlen, self.height, self.width, 3))
        model_input3 = Input(shape=(self.maxlen, self.height, self.width, 3))
        model_input4 = Input(shape=(self.maxlen, self.height, self.width, 3))
        model_input5 = Input(shape=(self.maxlen, self.height, self.width, 3))
        
        mid_feature1 = self.sharedLayer(model_input1)
        mid_feature2 = self.sharedLayer(model_input2)
        mid_feature3 = self.sharedLayer(model_input3)
        mid_feature4 = self.sharedLayer(model_input4)
        mid_feature5 = self.sharedLayer(model_input5)
        
        merged_vector = keras.layers.concatenate([mid_feature1, mid_feature2, mid_feature3, mid_feature4, mid_feature5], axis=-1)
        
        predictions = Dense(self.n_out, activation='sigmoid')(merged_vector)
        
        model = Model(inputs=[model_input1, model_input2, model_input3, model_input4, model_input5], outputs=predictions)
        model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
        
        return model
    
    def train(self, x_train1, x_train2, x_train3, x_train4, x_train5, t_train, batch_size, epochs) :
        early_stopping = EarlyStopping(patience=5, verbose=1)
        model = self.create_model()
        hist = model.fit([x_train1, x_train2, x_train3, x_train4, x_train5], t_train, batch_size = batch_size, epochs = epochs, verbose = 1,
              shuffle = True, callbacks = [early_stopping], validation_split = 0.3)
        return model, hist

In [4]:
class Data:
    def __init__(self, camera_num, file_name, samples, timesteps, width, height):
        self.camera_num = camera_num
        self.file_name =file_name
        self.samples=samples
        self.timesteps=timesteps
        self.width=width
        self.height=height
        self.learnData=[]
        self.labelData=[]
        
        self.make_selection()
        self.make_image()
    
    def make_selection(self):
        csv_selection=csv.reader(open(self.file_name+"video/mintime_optimize.csv", 'r'))
        one_hot = np.eye(self.camera_num)
        for row2 in csv_selection:
            self.labelData.append(one_hot[int(row2[1])])
    
    def make_image(self):
        for i in range(0, self.camera_num):
            img_list=[]
            for j in range(0,self.samples):
                pro_size=20
                bar = int(j*pro_size/self.samples)
                pro_bar = ('=' * bar) + (' ' * (pro_size - bar))
                percent ='{:03f}'.format(j / self.samples * 100.)
                print('\r{0}/{1} [{2}] {3}%'.format((i+1), self.camera_num, pro_bar, percent), end='')
                time.sleep(0.5)
                
                file_path = "image/"+str(i)+"/"+str(j)+".jpg"
                img = Image.open(file_path).convert('RGB') ## Gray->L, RGB->RGB
                img = img.resize((self.width, self.height))
                x = np.array(img, dtype=np.float32)
                x = x / 255.
                
                img_list.append(x)
            self.learnData.append(img_list)
    
    def get_learndata(self, num):
        test=[]
        for i in range(0,self.samples-self.timesteps):
            tmp_list=[]
            for j in range(0, self.timesteps):
                tmp_list.append(self.learnData[num][i+j])
            test.append(tmp_list)
        tmp=np.array(test)
        tmp=tmp.astype(np.float)
                
        return tmp
    
    def get_labeldata(self):
        test=[]
        for i in range(self.timesteps,self.samples):
            test.append(self.labelData[i])
        tmp = np.array(test)
        tmp = tmp.astype(np.float)
        
        return tmp
    
    def show(self):
        print()
        print(self.get_learndata(0).shape)
        print(self.get_labeldata().shape)

In [5]:
data = Data(camera, "./", num_sample, timesteps, img_width, img_height)
data.show()

(15, 5, 256, 256, 3)
(15, 5)


In [None]:
n_hidden = 32    # 出力次元
epochs = 100      # エポック数
batch_size = 5   # ミニバッチサイズ

# モデル定義
pred = Prediction(timesteps, n_hidden, camera, img_width, img_height)
# 学習
model , hist = pred.train(data.get_learndata(0),data.get_learndata(1),data.get_learndata(2),data.get_learndata(3),data.get_learndata(4), 
                                data.get_labeldata(), batch_size, epochs)
# テスト
score = model.evaluate(x_train, t_train, batch_size = batch_size, verbose = 1)
print("score:", score)

pre = [[0 for i in range(3)] for j in range(3)]
# 正答率集計
preds = model.predict(x_test)
correct = 0
for i in range(len(preds)):
    pred = np.argmax(preds[i,:])
    tar = np.argmax(t_test[i,:])
    pre[pred][tar]+=1
    if pred == tar :
        correct += 1

print("正答率:", 1.0 * correct / len(preds))

In [None]:
pred = Prediction(timesteps, n_hidden, camera, img_width, img_height)
model = pred.create_model()
print(model.summary())