In [2]:
BASE_DIR = '../'*3
DATA_DIR = BASE_DIR+ 'code/util/dataset/dataset.ipynb'

In [3]:
%run {DATA_DIR}

In [None]:
class VideoShotDataset(Dataset_torch):
    def __init__(self, filenames, timesteps=5, count=50, batch=10):
        super(VideoShotDataset, self).__init__('videoshot', 'binary')

        video_create_cache(filenames)
        
        self.frames, self.marks = video_load_cache(filenames)

        self.set_timesteps(timesteps)
        
        
    def set_timesteps(self, timesteps):
        self.timesteps = timesteps
        self.input_shape = [timesteps+1, 3, 90, 120]
#         self.input_shape = [timesteps+1, 3,90, 120]
        self.output_shape = [timesteps+1, 1]
        
    @property
    def train_count(self):
        return 2000
    
    def __str__(self):
        return '{}({}, {} frames, {} shots, {} train_data)'. \
               format(self.name, self.mode, len(self.frames), \
                      np.sum(self.marks), self.train_count)

In [None]:
def video_create_cache(filenames):
    movie_path = BASE_DIR + '../big_data/movie/movie/'
    cache_path = BASE_DIR+'../big_data/movie/cache/'
    
    if not os.path.exists(cache_path): os.mkdir(cache_path)

    for filename in filenames:
        movie_fname = movie_path + filename
        cache_fname = cache_path + filename + '.npy'
        
        if os.path.exists(cache_fname):
            print('{}: cache file is found => use cache'.format(filename))
            continue

        if not os.path.exists(movie_fname):
            print('{}: file is not found => ERROR'.format(filename))
            assert 0
        
        print('{}: creating cache file...'.format(filename))
        
        cap = cv2.VideoCapture(movie_fname)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1

        shot_idxs = list(np.sort(np.random.randint(0, frame_count-400, 100)))
        thumbs = np.zeros([100,4,90,120,3])
#         thumbs = np.zeros([100,4, 3, 90,120])
        sn = 0
                
        for fn in range(frame_count-400):
            ret = cap.grab()
            if fn == shot_idxs[sn]:
                for k in range(4):
                    _, frame = cap.retrieve(0)
                    cap.grab()
                    thumbs[sn, k] = cv2.resize(frame, (120, 90))
                sn += 1
                if sn >= 100: break
                
        cap.release()
        np.save(cache_fname, thumbs)

    print('Creating thumbnail cache is done')

In [None]:
def video_load_cache(filenames):
    cache_path = BASE_DIR+'../big_data/movie/cache/'

#     buffer = np.zeros([len(filenames), 100, 4, 3, 90, 120])
    buffer = np.zeros([len(filenames), 100, 4, 90, 120, 3])

    for n, filename in enumerate(filenames):
        cache_fname = cache_path + filename + '.npy'
        buffer[n] = np.load(cache_fname)
        
    starts = np.zeros([len(filenames), 100, 4])
    starts[:,:,0] = 1.0

    frames = buffer.reshape([-1, 90, 120, 3])
#     frames = buffer.reshape([-1, 3, 90, 120])
    shots = starts.reshape([-1])
    
    return frames, shots

In [None]:
#     tmp_x = []
#     tmp_y = []
#     for i in range(train_count):
#         x,y=self.create_seq(batch_size)
#         tmp_x.append(x)
#         tmp_y.append(y)
#     tmp_x=np.array(tmp_x).transpose(0,1,2,5,3,4)
#     tmp_y=np.array(tmp_y)
#     return tmp_x, tmp_y


In [None]:
def video_shot_get_train_data(self, batch_size):
    return self.create_seq(batch_size)

def video_shot_get_test_data(self,count):
    return self.create_seq(128)

def video_shot_get_validate_data(self, count):
    return self.create_seq(count)

VideoShotDataset.get_train_data = video_shot_get_train_data
VideoShotDataset.get_test_data = video_shot_get_test_data
VideoShotDataset.get_validate_data = video_shot_get_validate_data
VideoShotDataset.get_visualize_data = video_shot_get_validate_data

In [None]:
def video_create_seq(self, count):
    length = self.timesteps
    xs = np.zeros([count, length+1, 90, 120, 3])
    ys = np.zeros([count, length+1, 1])
    frame_count = len(self.frames)
    for n in range(count):
        xs[n, 0, 0, 0, 0] = length
        ys[n, 0, 0] = length
        pos = frame_count
        for k in range(length):
            if pos >= frame_count-1 or np.random.randint(2) == 0:
                pos = np.random.randint(frame_count)
                is_new = 1.0
            else:
                pos += 1
                is_new = self.marks[pos]
            xs[n, k+1, :, :, :] = self.frames[pos,:,:,:]
            ys[n, k+1, 0] = is_new
    return xs, ys

VideoShotDataset.create_seq = video_create_seq

In [None]:
def video_visualize(self, xs, est, ans):
    for n in range(len(xs)):
        draw_images_horz(xs[n][1:], [90,120,3])
#         draw_images_horz(xs[n][1:], [90,120,3])

    for n in range(len(xs)):
        print('Est: ' + ','.join(["%4.2f" % x for x in est[n,2:,0]]))
        print('Ans: ' + ','.join(["%4.2f" % x for x in ans[n,2:,0]]))

VideoShotDataset.visualize = video_visualize

In [None]:
def video_shot_forward_postproc(self, loss_func, output, y_train, mode=None):
    output, y_train=output[:,2:,:], y_train[:,2:,:]
    if mode == None : mode = self.mode

    if mode == 'regression':
        loss = loss_func(output, y_train)
        
    elif mode == 'binary':
        loss = loss_func(output, y_train)
    
    elif mode == 'select':
        loss = loss_func(output, torch.argmax(y_train,dim=1))
        
    return loss


VideoShotDataset.forward_postproc = video_shot_forward_postproc

In [None]:
def video_shot_eval_accuracy(self, x, y, output, mode=None):
    y1, o1 = y[:,2:,:], output[:,2:,:]
    answer = torch.eq(y1, torch.tensor(1.0))
    estimate = torch.greater(o1, 0)
    correct = torch.sum(torch.eq(estimate, answer))
    accuracy = correct / torch.prod(torch.tensor(y1.shape))
                             
    return accuracy

def video_shot_get_estimate(self, output, mode=None):
    estimate = torch.zeros(output.shape)
    estimate[:,0,:] = output[:,0,:]
    estimate[:,2:,:] = sigmoid(output[:,2:,:])
        
    return estimate

VideoShotDataset.eval_accuracy = video_shot_eval_accuracy
VideoShotDataset.get_estimate = video_shot_get_estimate

In [None]:
#     for epoch in range(epoch_count):
#         costs = []
#         accs = []
#         self.dataset.shuffle_train_data(batch_size*batch_count)
#         for n in range(batch_count):
#             trX, trY = self.dataset.get_train_data(batch_size, n)
#             cost, acc = self.train_step(trX, trY)
#             costs.append(cost)
#             accs.append(acc)

In [None]:
class Dataset_2_torch(Dataset):
    def __init__(self,x,y):
#         print(x.shape)
#         print(y.shape)
#         x = x.reshape(-1,)
        
        self.X_data  = torch.from_numpy(x).float()
        self.y_data = torch.from_numpy(y).float()

    def __len__(self):
        return self.X_data.shape[0]

    def __getitem__(self,idx):
        
        x_ = self.X_data[idx]
        y_ = self.y_data[idx]

        return x_, y_

In [None]:
def dataloader_torch(self, batch, num):
    
    self.trX, self.trY = self.get_train_data(batch)
    Ds=Dataset_2_torch(self.trX,self.trY)
    dataloader = DataLoader(Ds, batch_size=batch, \
                            shuffle=True,num_workers=num)
    
    return dataloader

VideoShotDataset.dataloader = dataloader_torch


In [None]:
def get_train_loader(self,batch,num):
    train_loader = self.dataloader(batch,num)
    return train_loader

VideoShotDataset.get_train_loader = get_train_loader