In [2]:
## nets.py
import tensorflow as tf 
import keras 
from keras.layers import Conv3D,BatchNormalization
from keras.layers import ReLU,Add,MaxPool3D,GlobalAveragePooling3D
from keras.layers import Concatenate,Dropout,Dense,Lambda,Input,MaxPooling3D
from keras.models import Model
from keras import Sequential

def datalayer(x,stride):
    return x[:,::stride,:,:,:]

def Conv_BN_ReLU(planes,kernel_size,strides=(1,1,1),padding='same',use_bias=False):
    return Sequential([Conv3D(planes,kernel_size,strides=strides,padding=padding,use_bias=use_bias),BatchNormalization(),ReLU()])

def bottleneck(x,channels,stride=1,downsample=None,head_conv=1,use_bias=False):
    residual = x
    if head_conv == 1:
        x = Conv_BN_ReLU(channels,kernel_size=1,use_bias=use_bias)(x)
    elif head_conv == 3:
        x = Conv_BN_ReLU(channels,kernel_size=(3,1,1),use_bias=use_bias)(x)
    else:
        raise ValueError('Unsupported head_conv!!!')
    x = Conv_BN_ReLU(channels,kernel_size=(1,3,3),strides=(1,stride,stride),use_bias=use_bias)(x)
    x = Conv3D(channels*4,kernel_size=1,use_bias=use_bias)(x)
    x = BatchNormalization()(x)
    if downsample is not None:
        residual = downsample(residual)
    x = Add()([x,residual])
    x = ReLU()(x)
    return x 

def fast_residual_block(x,block,channels,blocks,stride=1,head_conv=1,fast_channels=8,channel_expansion=4):
    downsample = None
    if stride != 1 or fast_channels != channels * channel_expansion:
        downsample = Sequential([Conv3D(channels*channel_expansion,kernel_size=1,strides=(1,stride,stride),use_bias=False),BatchNormalization()])
    fast_channels = channels * channel_expansion
    x = block(x,channels,stride,downsample=downsample,head_conv=head_conv)
    for _ in range(1,blocks):
        x = block(x,channels,head_conv=head_conv)
    return x,fast_channels

def slow_residual_block(x,block,channels,blocks,stride=1,head_conv=1,slow_channels=80,channel_expansion=4):
    downsample = None
    if stride != 1 or slow_channels != channels * channel_expansion:
        downsample = Sequential([Conv3D(channels*channel_expansion,kernel_size=1,strides=(1,stride,stride),use_bias=False),BatchNormalization()])
    x = block(x,channels,stride,downsample,head_conv=head_conv)
    slow_channels = channels * channel_expansion + channels * channel_expansion // 8 * 2
    for _ in range(1,blocks):
        x = block(x,channels,head_conv=head_conv)
    return x,slow_channels
def Fast_body(x,layers,block):
    ## Time-strided convolution: We perform a 3D convolution of a 5×1×1 kernel with 2βC output channels and stride = α.
    fast_channels = 8 
    lateral = []
    x = Conv_BN_ReLU(8,kernel_size=(5,7,7),strides=(1,2,2))(x)
    #x = MaxPool3D(pool_size=(1,3,3),strides=(1,2,2),padding='same')(x)
    x = MaxPooling3D(pool_size=(1,3,3),strides=(1,2,2),padding='same')(x)
    lateral_p1 = Conv3D(8*2,kernel_size=(5,1,1),strides=(8,1,1))(x)
    lateral.append(lateral_p1)
    x,fast_channels = fast_residual_block(x,block,8,layers[0],head_conv=3,fast_channels=fast_channels)
    lateral_p2 = Conv3D(32*2,kernel_size=(5,1,1),strides=(8,1,1))(x)
    lateral.append(lateral_p2)
    x,fast_channels = fast_residual_block(x,block,16,layers[1],stride=2,head_conv=3,fast_channels=fast_channels)
    lateral_p3 = Conv3D(64*2,kernel_size=(5,1,1),strides=(8,1,1))(x)
    lateral.append(lateral_p3)
    x,fast_channels = fast_residual_block(x,block,32,layers[2],stride=2,head_conv=3,fast_channels=fast_channels)
    lateral_p4 = Conv3D(128*2,kernel_size=(5,1,1),strides=(8,1,1))(x)
    lateral.append(lateral_p4)
    x,fast_channels = fast_residual_block(x,block,64,layers[3],stride=2,head_conv=3,fast_channels=fast_channels)
    x = GlobalAveragePooling3D()(x)
    return x, lateral

def Slow_body(x,lateral,layers,block):
    slow_channels = 64 + 64//8*2 
    x = Conv_BN_ReLU(64,kernel_size=(1,7,7),strides=(1,2,2))(x)
    #x = MaxPool3D(pool_size=(1,3,3),strides=(1,2,2),padding='same')(x)
    x = MaxPooling3D(pool_size=(1,3,3),strides=(1,2,2),padding='same')(x)
    x = Concatenate()([x,lateral[0]])
    x,slow_channels = slow_residual_block(x,block,64,layers[0],head_conv=1,slow_channels=slow_channels)
    x = Concatenate()([x,lateral[1]])
    x,slow_channels = slow_residual_block(x,block,128,layers[1],stride=2,head_conv=1,slow_channels=slow_channels)
    x = Concatenate()([x,lateral[2]])
    x,slow_channels = slow_residual_block(x,block,256,layers[2],stride=2,head_conv=3,slow_channels=slow_channels)
    x = Concatenate()([x,lateral[3]])
    x,slow_channels = slow_residual_block(x,block,512,layers[3],stride=2,head_conv=3,slow_channels=slow_channels)
    x = GlobalAveragePooling3D()(x)
    return x
def SlowFast_body(inputs,layers,block,num_classes,dropout=0.5):
    inputs_fast = Lambda(datalayer,name='data_fast',arguments={'stride':2})(inputs)
    inputs_slow = Lambda(datalayer,name='data_slow',arguments={'stride':16})(inputs)
    print('*'*30)
    print(f'inputs_fast: {inputs_fast}')
    print(f'inputs_slow: {inputs_slow}')
    fast,lateral = Fast_body(inputs_fast,layers,block)
    slow = Slow_body(inputs_slow,lateral,layers,block)
    x = Concatenate()([slow,fast])
    x = Dropout(dropout)(x)
    out = Dense(num_classes,activation='softmax')(x)
    return Model(inputs,out)

def resnet50(inputs,**kwargs):
    model = SlowFast_body(inputs,[3,4,6,3],bottleneck,**kwargs)
    return model

def resnet101(inputs,**kwargs):
    model = SlowFast_body(inputs,[3,4,23,3],bottleneck,**kwargs)
    return model 

def resnet152(inputs,**kwargs):
    model = SlowFast_body(inputs,[3,8,36,3],bottleneck,**kwargs)
    return model 

def resnet200(inputs,**kwargs):
    model = SlowFast_body(inputs,[3,24,36,3],bottleneck,**kwargs)
    return model 

if __name__ == '__main__':

    #x = tf.random_uniform([4,64,224,224,3])
    x = Input(shape=(64,224,224,3))
    print(f'origin inputs x: {x}')
    model = resnet50(x,num_classes=15)
    model.summary()

origin inputs x: Tensor("input_2:0", shape=(?, 64, 224, 224, 3), dtype=float32)
******************************
inputs_fast: Tensor("data_fast_1/strided_slice:0", shape=(?, 32, 224, 224, 3), dtype=float32)
inputs_slow: Tensor("data_slow_1/strided_slice:0", shape=(?, 4, 224, 224, 3), dtype=float32)
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 64, 224, 224, 0                                            
__________________________________________________________________________________________________
data_fast (Lambda)              (None, 32, 224, 224, 0           input_2[0][0]                    
___________________________________________________________________________

In [0]:
## video2jpg.py
import cv2
import glob
import os
import numpy as np
import multiprocessing
## 多进程内存会爆炸。。。。。。。。。
## 文件名和帧数存放到json文件中去。
def video2jpg(video_name):
    if not os.path.exists('ucf_jpg'):
        os.mkdir('ucf_jpg')
    def video2frame(video_name):
        total_frames = []
        cap = cv2.VideoCapture(video_name)
        fps = cap.get(cv2.CAP_PROP_FPS)
        print(f'video {video_name} fps is {np.around(fps)}')
        if cap.isOpened() == False:
            print('Error opening video stream of file')
        while cap.isOpened():
            ret,frame = cap.read()
            if ret == True:
                total_frames.append(frame)
            else:
                break
        cap.release()
        return (total_frames,fps)
    results = video2frame(video_name)
    for index,each in enumerate(results[0]):
        if not os.path.exists(os.path.join('ucf_jpg',video_name[:-4])):
            os.mkdir(os.path.join('ucf_jpg',video_name[:-4]))
        img_name = 'ucf_jpg/' + video_name[:-4] + '/' + video_name[:-4] +  '_' + str(index) + '.jpg'
        # each = cv2.resize(each,(224,224))
        cv2.imwrite(img_name,each)

if __name__ == '__main__':
    """
    pool = multiprocessing.Pool(processes=2)
    video_names = glob.glob('')

    result = pool.map(video2frame,video_names)

    video_name = ''
    results = video2frames(video_name)
    for index,each in enumerate(results[0]):
        img_name = 'video_name'
        cv2.imwrite()
    """
    video_name = 'test.mp4'
    video2jpg(video_name)

video test.mp4 fps is 60.0


In [0]:
!nvidia-smi

Fri Jan 10 08:42:43 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.44       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   43C    P0    28W / 250W |      0MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [0]:
## spatial_transforms.py
## 图像数据，在空间域的预处理方法:归一化、随机裁剪、中心裁剪等。
## 有PILLOW对象存在，要转换成numpy array。
import os 
import random
import cv2 
import numpy as np 
import collections
import numbers
from PIL import Image

class Compose(object):
    def __init__(self,transforms=[]):
        self.transforms = transforms
    def __call__(self,img):
        for t in self.transforms:
            img = t(img)
        return img 
    def randomize_parameters(self):
        for t in self.transforms:
            t.randomize_parameters()
class Normalize(object):
    def __init__(self,mean=[128,128,128],std=[128,128,128]):
        self.mean = mean
        self.std = std 
    def __call__(self,img):
        img = np.array(img)
        img = (img-np.array([[self.mean]])) / np.array([[self.std]])
        return img 
    def randomize_parameters(self):
        pass 
class Scale(object):
    def __init__(self,size,interpolation=Image.BILINEAR):
        assert isinstance(size,int) or (isinstance(size,collections.Iterable) and len(size)==2)
        self.size = size
        self.interpolation = interpolation
    def __call__(self,img):
        if isinstance(self.size,int):
            w,h = img.size
            if (w<h and w == self.size) or (h <= w and h == self.size):
                return img
            if w < h：
                ow = self.size
                oh = int(self.size*h/w)
                return img.resize((ow,oh),self.interpolation)
            else:
                oh = self.size
                ow = int(self.size*w/h)
                return img.resize((ow,oh),self.interpolation)
        else:
            return img.resize((self.size,self.size),self.interpolation)
    def randomize_parameters(self):
        pass 
class RandomScale(object):
    def __init__(self,short_sides,interpolation=Image.BILINEAR):
        self.short_sides = short_sides
        self.interpolation = interpolation
    def __call__(self,img):
        w,h = img.size
        if (w <= h and w == self.short_sides) or (h<=w and h == self.short_sides):
            return img
        if w < h:
            ow = self.short_sides
            oh = int(self.short_sides*h/w)
        else:
            oh = self.short_sides
            ow = int(self.short_sides*w/h)
        return img.resize((ow,oh),self.interpolation)
    def randomize_parameters(self):
        self.short_sides = random.choice(self.short_sides)
class CenterCrop(object):
    def __init__(self,size):
        if isinstance(size,numbers.Number):
            self.size = (int(size),int(size))
        else:
            self.size = size 
    
    def __call__(self,img):
        w,h = img.size
        th,tw = self.size
        y1 = int(round((h-th)/2))
        x1 = int(round((w-tw)/2))
        return img.crop((x1,y1,x1+tw,y1+th))

class CornerCrop(objtect):
    def __init__(self,size,crop_position=None):
        if isinstance(size,numbers.Number):
            self.size = (int(size),int(size))
        else:
            self.size = size
        if crop_position is None:
            self.randomize = True
        else:
            self.randomize = False 
        self.crop_positions = ['c','tl','tr','bl','br']
        self.crop_position = crop_position
    def __call__(self,img):
        w,h = img.size
        if self.crop_position == 'c':
            th,tw = self.size
            y1 = int(round((h-th)/2))
            x1 = int(round((w-tw)/2))
            y2 = y1 + th
            x2 = x1 + tw
        elif self.crop_position == 'tl':
            y1 = 0 
            x1 = 0
            y2 = th
            x2 = tw
        elif self.crop_position == 'tr':
            y1 = 0
            x1 = w - tw 
            y2 = th 
            x2 = w 
        elif self.crop_position == 'bl':
            y1 = h - th 
            x1 = 0
            y2 = h
            x2 = tw
        elif self.crop_position == 'br':
            y1 = h - th 
            x1 = w - tw 
            y2 = h
            x2 = w
        return img.crop((x1,y1,x2,y2))
    def randomize_parameters(self):
        if self.randomize:
            self.crop_position = random.choice(self.crop_positions)
class RandomHorizontalFlip(object):
    def __init__(self):
        pass 
    def __call__(self,img):
        if self.p < 0.5:
            return img.transpose(Image.FLIP_LEFT_RIGHT)
        else:
            return img
    def randomize_parameters(self):
        self.p = random.random()
class RandomCrop(object):
    def __init_(self,size):
        if isinstance(size,numbers.Number):
            self.size = (int(size),int(size))
        else:
            self.size = size
    def __call__(self,img):
        w,h = img.size
        th,tw = self.size
        x1 = int(self.x*(w-tw))
        y1 = int(self.y*(h-th))
        x2 = x1 + tw
        y2 = y1 + th
        return img.crop((x1,y1,x2,y2))
    def randomize_parameters(self):
        self.x = random.random()
        self.y = random.random()

In [0]:
## temporal_transforms.py
## 视频图像数据在时间域的预处理流程
import cv2 
import random 
import numpy as np 
import collections
import numbers
from PIL import Image

class TemporalBeginCrop(object):
    def __init__(self,size):
        self.size = size
    def __call__(self,frame_indices):
        out = frame_indices[:self.size]
        for index in out:
            if len(out) >= self.size:
                break
            #out.append(out)
            out.append(index)
        return out 
class TemporalCenterCrop(object):
    def __init__(self,size):
        self.size = size
    def __call__(self,frame_indices):
        center_index = len(frame_indices) // 2
        begin_index = max(0,center_index - (self.size//2))
        end_index = min(begin_index+self.size,len(frame_indices))
        out = frame_indices[begin_index:end_index]
        for index in out:
            if len(out) >= self.size:
                break
            out.append(index)
        return out 
class TemporalRandomCrop(object):
    def __init__(self,size):
        self.size = size
    def __call__(self,frame_indices):
        rand = max(0,len(frame_indices)-self.size-1)
        begin_index = random.randint(0,rand)
        end_index = min(begin_index+self.size,len(frame_indices))
        out = frame_indices[begin_index:end_index]
        for index in out:
            if len(out) >= self.size:
                break
            out.append(index)
        return out 


In [0]:
## dataset.py 
## data generator
import os 
import random
import math
import numpy as np 
from keras.utils import Sequence
from spatial_transforms import RandomCrop,Scale,RandomScale,RandomHorizontalFlip,CenterCrop,Compose,Normalize
from temporal_transforms import TemporalRandomCrop,TemporalCenterCrop
from PIL import Image

def load_image(image_path):
    with open(image_path,'rb') as f:
        with Image.open(f) as img:
            return img.convert('RGB')
def load_clip_video(video_path,frame_indices):
    video = []
    for i in frame_indices:
        image_path = os.path.join(video_path,'image_{:05d}.jpg'.format(i))
        if os.path.exists(image_path):
            video.append(load_image(image_path))
        else:
            return video
    return video

def get_ucf101(video_path,file_path,name_path,mode,num_classes):
    ## video_path: 视频转成图像之后的存储路径 UCF-101_jpg/
    ## file_path: train.txt 或者 val.txt
    ## name_path: classInd.txt
    ## mode: train or val
    ## num_classes: number of data classes.
    name2index = {}
    lines = open(name_path,'r').readlines()
    for i,class_name in enumerate(lines):
        class_name = class_name.split()[1]
        name2index[str(class_name)] = i
    assert num_classes == len(name2index)
    video_files = []
    label_files = []
    for path_label in open(file_path,'r'):
        if mode == 'train':
            path,_ = path_label.split()
        elif mode == 'val':
            path = path_label
        else:
            raise ValueError('mode must be train or val')
        pathname,_ = os.path.splitext(path)
        video_files.append(os.path.join(video_path,pathname))
        label = pathname.split('/')[0]
        label_files.append(name2index[label])
    return video_files,label_files

class DataGenerator(Sequence):
    def __init__(self,data_name,video_path,file_path,name_path,mode,batch_size,num_classes,shuffle,short_side=[256,320],crop_size=224,clip_len=64,n_samples_for_each_video=1):
        self.batch_size = batch_size
        self.num_classes = num_classes
        self.shuffle = shuffle
        if data_name == 'ucf101':
            self.video_files,self.label_files = get_ucf101(video_path, file_path, name_path, mode, num_classes)
        if mode == 'train':
            self.spatial_transforms = Compose([RandomScale(short_side),RandomCrop(crop_size),RandomHorizontalFlip(),Normalize()])
            self.temporal_transorms = TemporalRandomCrop(clip_len)
        elif mode == 'val':
            self.spatial_transforms = Compose([Scale(crop_size),CenterCrop(crop_size),Normalize()])
            self.temporal_transforms = TemporalCenterCrop(clip_len)
        else:
            raise ValueError('mode must be train or val')
        self.dataset = self.makedataset(n_samples_for_each_video, clip_len)
        if self.shuffle:
            random.shuffle(self.dataset)
    def __len__(self):
        return math.ceil(len(self.video_files)/self.batch_size)
    def __getitem__(self,index):
        batch_dataset = self.dataset[index*self.batch_size:(index+1)*self.batch_size]
        video_data,label_data = self.data_generator(batch_dataset)
        return video_data,label_data
    def on_epoch_end(self):
        if self.shuffle:
            random.shuffle(self.dataset)
    def makedataset(self,n_samples_for_each_video,clip_len):
        dataset = []
        for i,video_file in enumerate(self.video_files):
            if i % 1000 == 0:
                print('dataset loading [{}/{}]'.format(i, len(self.video_files)))
            if not os.path.exists(video_file):
                print('{} is not exist'.format(video_file))
                continue
            ## n_frames : 每一个视频导出的图像数量
            n_frames = 100000
            if n_frames <= 0 :
                continue
            sample = {'video_path':video_file,'label':int(self.label_files[i])}
            if n_samples_for_each_video == 1:
                sample['frame_indices'] = list(range(1,n_frames+1))
                dataset.append(sample)
            else:
                if n_samples_for_each_video > 1:
                    step = max(1,math.ceil((n_frames-1-clip_len)/(n_samples_for_each_video-1)))
                else:
                    step = clip_len
                for j in range(1,n_frames,step):
                    sample_j = copy.deepcopy(sample)
                    sample_j['frame_indices'] = list(range(j,min(n_frames+1,j+clip_len)))
                    dataset.append(sample_j)
        return dataset

    def data_generator(self,batch_dataset):
        video_data = []
        label_data = []
        for data in batch_dataset:
            path = data['video_path']
            frame_indices = data['frame_indices']
            if self.temporal_transforms is not None:
                frame_indices = self.temporal_transforms(frame_indices)
            clip = load_clip_video(path,frame_indices)
            if self.spatial_transforms is not None:
                self.spatial_transforms.randomize_parameters()
                clip = [self.spatial_transforms(img) for img in clip]
            clip = np.stack(clip,0)
            video_data.append(clip)
            label_data.append(data['label'])
        video_data = np.array(video_data)
        label_data = np.eye(self.num_classes)[label_data]
        return video_data,label_data    

In [0]:
## opt.py
import argparse
def parse_opts():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--root_path',
        default=None,
        type=str,
        help='Root directory path of data'
    )
    parser.add_argument(
        '--video_path',
        default='UCF-101_jpg',
        type=str,
        help='Directory of videos'
    )
    parser.add_argument(
        '--name_path',
        default=None,
        type=str,
        help='Directory of classes name'
    )
    parser.add_argument(
        '--train_list',
        default=None,
        type=str,
        help='Path to training list'
    )
    parser.add_argument(
        '--val_list',
        default=None,
        type=str,
        help='Path to validation list'
    )
    parser.add_argument(
        '--data_name',
        default='ucf101',
        type=str,
        help='Name of dataset'
    )
    parser.add_argument(
        '--gpus',
        default=0,
        type=int,
        help='GPUs for running'
    )
    parser.add_argument(
        '--log_dir',
        default='log',
        type=str,
        help='Path to save log'
    )
    parser.add_argument(
        '--num_classes',
        default=101,
        type=int,
        help='Number of classes'
    )
    parser.add_argument(
        '--crop_size',
        default=224,
        type=int,
        help='Size of crop image input'
    )
    parser.add_argument(
        '--clip_len',
        default=64,
        type=int,
        help='Length of videos'
    )
    parser.add_argument(
        '--short_side',
        default=[256, 320],
        type=int,
        nargs='+',
        help='Short side of the image'
    )
    parser.add_argument(
        '--n_samples_for_each_video',
        default=1,
        type=int,
        help='Number of samples of each video'
    )
    parser.add_argument(
        '--lr',
        default=1.6,
        type=float,
        help='Learning rate'
    )
    parser.add_argument(
        '--momentum',
        default=0.9,
        type=float,
        help='Momentum'
    )
    parser.add_argument(
        '--weight_decay',
        default=1e-4,
        type=float,
        help='Weight decay'
    )
    parser.add_argument(
        '--lr_decay',
        default=0.8,
        type=float,
        help='Decay rate of learning rate'
    )
    parser.add_argument(
        '--cycle_length',
        default=10,
        type=int,
        help='Epochs to restart cycle when using SGDR'
    )
    parser.add_argument(
        '--multi_factor',
        default=1.5,
        type=float,
        help='Increasing rate of cycle length'
    )
    parser.add_argument(
        '--warm_up_epoch',
        default=5,
        type=int,
        help='Using warmup at first several epochs'
    )
    parser.add_argument(
        '--optimizer',
        default='SGD',
        type=str,
        help='Optimizer'
    )
    parser.add_argument(
        '--batch_size',
        default=32,
        type=int,
        help='Batch size for training'
    )
    parser.add_argument(
        '--epochs',
        default=100,
        type=str,
        help='Epochs fot training'
    )
    parser.add_argument(
        '--workers',
        default=4,
        type=int,
        help='Worker for loading data'
    )
    parser.add_argument(
        '--network',
        default='resnet50',
        type=str,
        help='Network'
    )
    parser.add_argument(
        '--pretrained_weights',
        default=None,
        type=str,
        help='Path to pre-trained model'
    )

    args = parser.parse_args()

    return args

In [0]:
## train.py
import numpy as np 
import tensorflow as tf 
from keras.optimizers import SGD,Adam
from keras.callbacks import Callback,ModelCheckpoint,EarlyStopping,TensorBoard
from keras import backend as K
from nets import resnet50
from opts import parse_opts
from keras.utils import multi_gpu_model
from dataset import DataGenerator
from keras.layers import Input
from kears.losses import categorical_crossentropy
def get_optimizer(opt):
    if opt.optimizer == 'SGD':
        optimizer = SGD(lr=opt.lr,momentum=opt.momentum,decay=opt.weight_decay)
    elif opt.optimizer == 'Adam':
        optimizer = Adam(lr=opt.lr,decay=opt.weight_decay)
    return optimizer
class ParallelModelCheckpoint(ModelCheckpoint):
    def __init__(self,model,filepath,monitor='val_loss',verbose=0,save_best_only=False,save_weights_only=False,mode='auto', period=1):
        self.single_model = model
        super(ParallelModelCheckpoint,self).__init__(file_path,monitor,verbose,save_best_only,save_weights_only,mode,period)
    def set_model(self,model):
        super(ParallelModelCheckpoint,self).set_model(self.single_model)
class SGDRScheduler_with_WarmUp(Callback):
    def __init__(self,min_lr,max_lr,steps_per_epoch,lr_decay=1,cycle_length=10,multi_factor=2,warm_up_epoch=5):
        self.min_lr = min_lr
        self.max_lr = max_lr
        self.steps_per_epoch = steps_per_epoch
        self.lr_decay = lr_decay
        self.cycle_length = cycle_length
        self.multi_factor = multi_factor
        self.warm_up_epoch = warm_up_epoch
        self.is_warming = True
        self.history = {}
    def sgdr_lr(self):
        fraction_to_restart = self.batch_since_restart / (self.per_epoch * self.cycle_length)
        print('',fraction_to_restart)
        lr = self.min_lr + 0.5 * (self.max_lr - self.min_lr) * (1 + np.cos(fraction_to_restart * np.pi))
        return lr
    def warm_lr(self):
        lr = self.max_lr * (self.warm_up_batch / (self.steps_per_epoch * self.warm_up_epoch)) * (self.warm_up_batch / (self.steps_per_epoch * self.warm_up_epoch))
        return lr 
    def on_train_begin(self,logs={}):
        logs = logs or {}
        self.warm_up_batch = 1
        K.set_value(self.model.optimizer.lr,self.warm_lr())
    def on_batch_end(self,batch,logs={}):
        logs = logs or {}
        self.history.setdefault('lr',[]).append(K.get_value(self.model.optimizer.lr))
        for k,v in logs.items():
            self.history.setdefault(k,[]).append(v)

        if self.is_warming:
            self.warm_up_batch += 1
            K.set_value(self.model.optimizer.lr,self.warm_lr())
        else:
            self.batch_since_restart += 1
            K.set_value(self.model.optimizer.lr,self.sgdr_lr())
    def on_epoch_begin(self,epoch,logs={}):
        if epoch == self.warm_up_epoch:
            self.is_warming = False
            self.batch_since_restart = 0
            self.next_restart = self.cycle_length + epoch
    def on_epoch_end(self,epoch,logs={}):
        if epoch >= self.warm_up_epoch:
            if epoch + 1 = self.next_restart:
                self.batch_since_restart = 0
                self.cycle_length = np.ceil(self.cycle_length * self.multi_factor)
                self.next_restart += self.cycle_length
                self.max_lr *= self.lr_decay
                self.best_weights = self.model.get_weights()
    def on_train_end(self,logs={}):
        self.model.set_weights(self.best_weights)
class PrintLeraningRate(Callback):
    def on_batch_end(self,batch,logs={}):
        logs = logs or {}
        if batch > 0:
            print('- lr: %.6f'%K.get_value(self.model.optimizer.lr))
class TrainPrint(Callback):
    def __init__(self,steps_per_epoch,max_epoch):
        self.steps_per_epoch = steps_per_epoch
        self.max_epoch = max_epoch
        self.log = 'epoch [%.3d]/[%.3d] batch [%d/%d] loss %.4f lr %.6f acc %.2f'
    def on_epoch_begin(self,epoch,logs={}):
        self.epoch = int(epoch)
    def on_batch_end(self,batch,logs={}):
        logs = logs or {}
        print(self.log%(self.epoch,self.max_epoch,batch,self.steps_per_epoch,logs['loss'],K.get_value(self.model.optimizer.lr),logs['acc']))
    def on_epoch_end(self,epoch,logs={}):
        logs = logs or {}
        if 'val_loss' and 'val_acc' in logs.keys():
            print('Validate on epoch {}: loss {} acc {}'.format(epoch,logs['val_loss'],logs['val_acc']))

def create_callbacks(opt,steps_per_epoch,model=None):
    log_dir = os.path.join(opt.root_path,opt.log_dir)
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)
    tensorboard = TensorBoard(log_dir=log_dir,write_graph=True)
    result_path = os.path.join(opt.root_path,opt.result_path)
    if not os.path.exists(result_path):
        os.mkdir(result_path)
    if model is not None:
        checkpoint = ParallelModelCheckpoint(model,os.path.join(result_path,'ep{epoch:03d}-val_acc{val_acc:.2f}.h5'),monitor='val_acc',save_weights_only=True,save_best_only=False,period=1)
    else:
        checkpoint = ModelCheckpoint(os.path.join(result_path,'ep{epoch:03d}-val_acc{val_acc:.2f}.h5'),monitor='val_acc',save_weights_only=True,save_best_only=False,period=1)
    early_stopping = EarlyStopping(monitor='val_acc',min_delta=0,patience=10)
    learning_rate_scheduler = SGDRScheduler_with_WarmUp(0,opt.lr,steps_per_epoch,lr_decay=opt.lr_decay,cycle_length=opt.cycle_length,multi_factor=opt.multi_factor,warm_up_epoch=opt.warm_up_epoch) 
    print_lr = PrintLearningRate()
    return [tensorboard,learning_rate_scheduler,print_lr,checkpoint,early_stopping]

def train(opt):
    K.clear_session()
    video_input = Input(shape=(None,None,None,3))
    model = resnet50(video_input,num_classes=opt.num_classes)
    print('Create {} model with {} classes'.format(opt.network,opt.num_classes))
    if opt.pretrained_weights is not None:
        model.load_weights(opt.pretrained_weights)
        print('Loading weights from {}'.format(opt.pretrained_weights))
    optimizer = get_optimizer(opt)
    train_data_generator = DataGenerator(opt.data_name,opt.video_path,opt.train_list,opt.name_path,'train',opt.batch_size,opt.num_classes,True,opt.short_side,opt.crop_size,opt.clip_len,opt.n_samples_for_each_video)
    val_data_generator = DataGenerator(opt.data_name,opt.video_path,opt.val_list,opt.name_path,'val',opt.batch_size,opt.num_classes,False,opt.short_side,opt.crop_size,opt.clip_len,opt.n_samples_for_each_video)
    callbacks = create_callbacks(opt,max(1,train_data_generator.__len__()),model)
    if len(opt.gpus) > 1:
        print('Using multi gpus')
        parallel_model = multi_gpu_model(model,gpus=len(opt.gpus))
        parallel_model.compile(optimizer=optimizer,loss=categorical_crossentropy,metrics=['accuracy'])
        parallel_model.fit_generator(train_data_generator,steps_per_epoch=max(1,train_data_generator.__len__()),epochs=opt.epochs,validation_data=val_data_generator,validation_steps=max(1,val_data_generator.__len__()),callbacks=callbacks)
    else:
        model.compile(optimizer=optimizer,loss=categorical_crossentropy,metrics=['accuracy'])
        model.fit_generator(train_data_generator,steps_per_epoch=max(1,train_data_generator.__len__()),epochs=opt.epochs,validation_data=val_data_generator,validation_steps=max(1,val_data_generator.__len__()),callbacks=callbacks)
    model.save('slowFast.h5')
if __name__ == '__main__':
    opt = parse_opts()
    print(opt)
    if len(opt.gpus) > 1:
        os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(map(str,opt.gpus))
    train(opt)
    

In [0]:
## pre and visualize 
## pre_visualize.py
from keras.models import load_model
from nets import resnet50
from keras.preprocessing