In [1]:
import torch
print(torch.cuda.device_count())

1


In [1]:
from keras.layers import Dense,Dropout,Conv3D,Input,MaxPool3D,Flatten,Activation, ZeroPadding3D
from keras.regularizers import l2
from keras.models import Model, Sequential

In [3]:
# 输入数据为 112×112 的图片，16帧， 3通道
input_shape = (112,112,16,3)
# 权重衰减率
weight_decay = 0.005
# 类型数量，我们使用UCF-101 为数据集，所以为101
nb_classes = 21

# 构建模型结构
inputs = Input(input_shape)

x = Conv3D(64,(3,3,3),strides=(1,1,1),padding='same',
           activation='relu',kernel_regularizer=l2(weight_decay))(inputs)
x = MaxPool3D((2,2,1),strides=(2,2,1),padding='same')(x)

x = Conv3D(128,(3,3,3),strides=(1,1,1),padding='same',
           activation='relu',kernel_regularizer=l2(weight_decay))(x)
x = MaxPool3D((2,2,2),strides=(2,2,2),padding='same')(x)

x = Conv3D(128,(3,3,3),strides=(1,1,1),padding='same',
           activation='relu',kernel_regularizer=l2(weight_decay))(x)
x = MaxPool3D((2,2,2),strides=(2,2,2),padding='same')(x)

x = Conv3D(256,(3,3,3),strides=(1,1,1),padding='same',
           activation='relu',kernel_regularizer=l2(weight_decay))(x)
x = MaxPool3D((2,2,2),strides=(2,2,2),padding='same')(x)

x = Conv3D(256, (3, 3, 3), strides=(1, 1, 1), padding='same',
           activation='relu',kernel_regularizer=l2(weight_decay))(x)
x = MaxPool3D((2, 2, 2), strides=(2, 2, 2), padding='same')(x)

x = Flatten()(x)
x = Dense(2048,activation='relu',kernel_regularizer=l2(weight_decay))(x)
x = Dropout(0.5)(x)
x = Dense(2048,activation='relu',kernel_regularizer=l2(weight_decay))(x)
x = Dropout(0.5)(x)
x = Dense(nb_classes,kernel_regularizer=l2(weight_decay))(x)
x = Activation('softmax')(x)

model = Model(inputs, x)

In [4]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 112, 112, 16, 3)  0         
                             ]                                   
                                                                 
 conv3d_5 (Conv3D)           (None, 112, 112, 16, 64)  5248      
                                                                 
 max_pooling3d_5 (MaxPooling  (None, 56, 56, 16, 64)   0         
 3D)                                                             
                                                                 
 conv3d_6 (Conv3D)           (None, 56, 56, 16, 128)   221312    
                                                                 
 max_pooling3d_6 (MaxPooling  (None, 28, 28, 8, 128)   0         
 3D)                                                             
                                                           

In [5]:
model.input

<KerasTensor: shape=(None, 112, 112, 16, 3) dtype=float32 (created by layer 'input_2')>

shape = (batch size, width, height, frames, channels)

In [6]:
# 引用必要的库
from keras.optimizers import SGD,Adam
from keras.utils import np_utils

import numpy as np
import random
import cv2

import matplotlib.pyplot as plt
import os

# # 自定义callbacks
# from schedules import onetenth_4_8_12

In [7]:
img_path = 'E:\\train_data\\jpgs\\'  # 图片文件存储位置
results_path = 'E:\\train_data\\results\\'  # 训练结果保存位置
if not os.path.exists(results_path):
    os.mkdir(results_path)

In [8]:
cates = os.listdir(img_path)
train_list = []
test_list = []
# 遍历所有的动作类型
for cate in cates:
    videos = os.listdir(os.path.join(img_path, cate))
    length = len(videos)//5
    # 训练集大小，随机取视频文件加入训练集
    train= random.sample(videos, length*4)
    train_list.extend(train)
    # 将余下的视频加入测试集
    for video in videos:
        if video not in train:
            test_list.append(video)
print("训练集为：")    
print( train_list)
print("共%d 个视频\n"%(len(train_list)))
print("验证集为：")            
print(test_list)
print("共%d 个视频"%(len(test_list)))

训练集为：
['v_ApplyEyeMakeup_g09_c06', 'v_ApplyEyeMakeup_g24_c05', 'v_ApplyEyeMakeup_g09_c05', 'v_ApplyEyeMakeup_g07_c01', 'v_ApplyEyeMakeup_g17_c05', 'v_ApplyEyeMakeup_g18_c04', 'v_ApplyEyeMakeup_g10_c03', 'v_ApplyEyeMakeup_g10_c01', 'v_ApplyEyeMakeup_g05_c01', 'v_ApplyEyeMakeup_g18_c01', 'v_ApplyEyeMakeup_g16_c02', 'v_ApplyEyeMakeup_g15_c02', 'v_ApplyEyeMakeup_g16_c03', 'v_ApplyEyeMakeup_g12_c04', 'v_ApplyEyeMakeup_g09_c03', 'v_ApplyEyeMakeup_g23_c01', 'v_ApplyEyeMakeup_g19_c01', 'v_ApplyEyeMakeup_g04_c07', 'v_ApplyEyeMakeup_g02_c02', 'v_ApplyEyeMakeup_g23_c06', 'v_ApplyEyeMakeup_g08_c05', 'v_ApplyEyeMakeup_g13_c03', 'v_ApplyEyeMakeup_g12_c01', 'v_ApplyEyeMakeup_g17_c04', 'v_ApplyEyeMakeup_g10_c02', 'v_ApplyEyeMakeup_g14_c05', 'v_ApplyEyeMakeup_g07_c07', 'v_ApplyEyeMakeup_g22_c02', 'v_ApplyEyeMakeup_g25_c04', 'v_ApplyEyeMakeup_g19_c03', 'v_ApplyEyeMakeup_g12_c02', 'v_ApplyEyeMakeup_g15_c06', 'v_ApplyEyeMakeup_g20_c01', 'v_ApplyEyeMakeup_g09_c02', 'v_ApplyEyeMakeup_g21_c03', 'v_ApplyEyeMa

In [13]:
# 保存数据
def writeData(datalist,savepath):
    index = 1
    for data in datalist:
        with open(savepath, mode="a", encoding="utf-8") as f:
            f.write(str(index) + ' ' + data)   #写数据
            f.write("\n")   #换行
        index = index + 1
    f.close()
writeData(cates,"E:\\train_data\\classInd\\classInd.txt")

In [13]:
# file 是一个文件夹 包含一个短视频的所有帧的图片
def process_data(img_path, file, batch_size=16, train=True):
    # 一个batch包含16帧的图片信息数据
    batch = np.zeros((batch_size,16,112,112,3),dtype='float32')
    # 定义标签
    labels = np.zeros(batch_size,dtype=int)
    # 目录结构
    cate_list = os.listdir(img_path)
    # 读取类别词典
    def read_classes():
        path = "E:\\train_data\\classInd\\classInd.txt"
        with open(path, "r+") as f:
            lines = f.readlines()
        classes = {}
        for line in lines:
            c_id = line.split()[0]
            c_name = line.split()[1]
            classes[c_name] =c_id 
        return classes
    
    classes_dict = read_classes()
    # 对一个视频中的所有帧进行读取
    cate = file.split("_")[1]
    img_list = os.listdir(os.path.join(img_path, cate, file))    # E:\train_data\jpgs\ApplyEyeMakeup\v_ApplyEyeMakeup_g04_c01\...jpg
    img_list.sort()
    batch_img = []
    # # 分多少块 多少个16帧
    # symbol = len(img_list)//16
    index = 0
    path = os.path.join(img_path, cate, file)
    label =  int(classes_dict[cate])-1
    for i in range(batch_size):
        # symbol = len(img_list)//16
        if train:
            # 随机进行裁剪
            crop_x = random.randint(0, 15)
            crop_y = random.randint(0, 58)
            # 随机进行翻转
            is_flip = random.randint(0, 1)
            # 以16 帧为单位
            for j in range(16):
                if len(img_list) - index * 16 < 16:
                    break
                img = img_list[index * 16 + j]
                image = cv2.imread( path + '/' + img)
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                image = cv2.resize(image, (171, 128))
                if is_flip == 1:
                    image = cv2.flip(image, 1)
                batch[i][j][:][:][:] = image[crop_x:crop_x + 112, crop_y:crop_y + 112, :]
                # symbol-=1
                # if symbol<0:
                #     break
        # 不是训练集则不进行旋转 随机裁剪等处理
        else:
            for j in range(16):
                if len(img_list) - index * 16 < 16:
                    break
                img = img_list[index * 16 + j]
                image = cv2.imread( path + '/' + img)
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                image = cv2.resize(image, (171, 128))
                batch[i][j][:][:][:] = image[8:120, 30:142, :]
                # symbol-=1
                # if symbol<0:
                #     break
        labels[i] = label
        index+=1
    return batch, labels



该Process_data是处理一个batch的

In [15]:
# batch, labels = process_data(img_path, train_list)

# print("每个batch的形状为：%s"%(str(batch.shape)))
# print("每个label的形状为：%s"%(str(labels.shape)))

每个batch的形状为：(16, 16, 112, 112, 3)
每个label的形状为：(16,)


In [10]:
def preprocess(inputs):
    inputs[..., 0] -= 99.9
    inputs[..., 1] -= 92.1
    inputs[..., 2] -= 82.6
    inputs[..., 0] /= 65.8
    inputs[..., 1] /= 62.3
    inputs[..., 2] /= 60.3
    return inputs

In [11]:
def generator_train_batch(train_list, batch_size, num_classes, img_path):
    for file in train_list:
        # 读取一个batch的数据
        x_train, x_labels = process_data(img_path, file, batch_size,train=True) 
        x = preprocess(x_train)
        # 形成input要求的数据格式
        y = np_utils.to_categorical(np.array(x_labels), num_classes)
        # 将数据转置成模型所对应的shape
        x = np.transpose(x, (0,2,3,1,4))
        yield x, y
def generator_val_batch(test_list, batch_size, num_classes, img_path):
    for file in test_list:
        # 读取一个batch的数据
        y_test,y_labels = process_data(img_path, file, batch_size,train=False)
        x = preprocess(y_test)
        # 形成input要求的数据格式
        x = np.transpose(x,(0,2,3,1,4))
        y = np_utils.to_categorical(np.array(y_labels), num_classes)
        yield x, y

In [16]:
# 训练一个epoch大约需4分钟
# 类别数量
num_classes = 21
# batch大小
batch_size = 16
# epoch数量
epochs = 1
# 学习率大小
lr = 0.00001
# 优化器定义
sgd = SGD(lr=lr, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

In [14]:
generator = generator_train_batch(train_list, batch_size, num_classes,img_path)
x , y = next(generator)
print(x.shape)
print(y.shape)
print('*' * 20)

(16, 112, 112, 16, 3)
(16, 21)
********************


In [17]:
# 开始训练
# steps_per_epochs:是指在每个epoch中生成器执行生成数据的次数
history = model.fit_generator(generator_train_batch(train_list, batch_size, num_classes,img_path),
                              steps_per_epoch= len(train_list) // batch_size,
                              epochs=epochs,
                              callbacks=None,
                              validation_data=generator_val_batch(test_list, batch_size,num_classes,img_path),
                              validation_steps= len(test_list) // batch_size,
                              verbose=1)
# 对训练结果进行保存
model.save_weights(os.path.join(results_path, 'weights_c3d.h5'))

  history = model.fit_generator(generator_train_batch(train_list, batch_size, num_classes,img_path),


  5/137 [>.............................] - ETA: 2:01:08 - loss: 332.0240 - accuracy: 1.0000