In [7]:
#在该版本中测试进行 采用变长输入的改进，即读取时间长度信息并作为变长输入的依据

In [1]:
# 第二版不同之处在于使用全连接层进行特征融合
#Step1.导入包并选定设备
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import time
from torch.nn.utils.rnn import pack_padded_sequence,pad_packed_sequence


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


  return torch._C._cuda_getDeviceCount() > 0


In [2]:
#Step2.模型参数设置

sequence_length = 300  #序列长度，最大帧为300，但这里还需要更改
input_size = 75       #输入数据特征大小 3（x,y,z）*25（关节数量）
hidden_size = 128     #隐藏层数据特征大小,即每个时间步对应的ht的维数
num_layers = 2        #隐藏层层数
num_classes = 60      #结果类数
batch_size = 1000     #一个batch大小
num_epochs = 50       #epoch数目
learning_rate = 0.001  #学习率

In [3]:
# 新增代码
# 用于读取数据集
import pickle
import numpy as np
class Feeder(torch.utils.data.Dataset):
    """ 
    参数：
    data_path:.npy形式的数据的路径，数据的格式需要是(N,C,T,V,M)
        N：样本数目，C：有几个相机，T：帧数，V：有几个关节点，M：动作次数
    label_path：标签的路径
    random_choose：如果为真，则随机的选择输入序列中的一部分
    random_shift:如果为真，则在序列的开始和结束时随机的填充0
    window_size:输出序列的宽度
    normalization:如果为真，则对序列进行标准化
    debug:如果为真，则仅使用前100个样本
    mmap：如果为真，则使用虚拟内存映射（因为数据集太大了，故需要虚拟内存映射）
    """

    '''
    使用了标准化的版本
    def __init__(self,
                 data_path,
                 label_path,
                 random_choose=False,
                 random_move=False,
                 window_size=-1,
                 debug=False,
                 mmap=True):
        self.debug = debug
        self.data_path = data_path
        self.label_path = label_path
        self.random_choose = random_choose
        self.random_move = random_move
        self.window_size = window_size
        self.load_data(mmap)
    '''
    
    def __init__(self,
                 data_path,
                 seq_len_path,
                 label_path,
                 window_size=-1,
                 debug=False,
                 mmap=True):
        self.debug = debug
        self.data_path = data_path
        self.seq_len_path = seq_len_path
        self.label_path = label_path
        self.window_size = window_size
        self.load_data(mmap)
        
    def load_data(self, mmap):
        # data: N C V T M

        # 加载标签
        with open(self.label_path, 'rb') as f:
            self.sample_name, self.label = pickle.load(f)

        # load data
        if mmap:
            # 如果使用了虚拟内存映射，则使用虚拟内存映射模式加载数据
            self.data = np.load(self.data_path, mmap_mode='r')
            self.seq_len = np.load(self.seq_len_path, mmap_mode='r')
        else:
            self.data = np.load(self.data_path)
            self.seq_len = np.load(self.seq_len_path)
        # 如果是debug模式，则不载入全部数据,注：原来是100，为了方便观察这里改成的2
        if self.debug:
            self.label = self.label[0:10]
            self.data = self.data[0:10]
            self.seq_len = self.seq_len[0:10]
            self.sample_name = self.sample_name[0:10]

        self.N, self.C, self.T, self.V, self.M = self.data.shape

    # 获取数据集大小
    def __len__(self):
        return len(self.label)

    # 用于获取某一个数据的函数
    def __getitem__(self, index):
        data_numpy = np.array(self.data[index])
        label = self.label[index]
        seq_len = self.seq_len[index]
        """
        预处理过程，后续可以在此基础上补充
        注：预处理是在__getitem__中进行的
        if self.random_choose:
            data_numpy = tools.random_choose(data_numpy, self.window_size)
        elif self.window_size > 0:
            data_numpy = tools.auto_pading(data_numpy, self.window_size)
        if self.random_move:
            data_numpy = tools.random_move(data_numpy)
        """
        return data_numpy, label,seq_len

In [4]:
# 新增代码
# 用于读取数据
'''
self,
data_path,
label_path,
window_size=-1,
debug=False,
mmap=True)
'''
train_set = Feeder(data_path='./cooked_data/xsub/train_data.npy',
                  label_path='./cooked_data/xsub/train_label.pkl',
                   seq_len_path='./cooked_data/xsub/train_data_seqlen.npy',
                  )

test_set = Feeder(data_path='./cooked_data/xsub/val_data.npy',
                  label_path='./cooked_data/xsub/val_label.pkl',
                  seq_len_path='./cooked_data/xsub/val_data_seqlen.npy',
                  )

# 添加了一个debug_set用于观察数据
debug_set = Feeder(data_path='./cooked_data/xsub/train_data.npy',
                  label_path='./cooked_data/xsub/train_label.pkl',
                   seq_len_path='./cooked_data/xsub/train_data_seqlen.npy',
                   debug=True,
                  )

train_loader = torch.utils.data.DataLoader(train_set,
                                      batch_size = batch_size,
                                      shuffle = True,
                                      num_workers = 4,
                                        drop_last=True)

test_loader = torch.utils.data.DataLoader(test_set,
                                      batch_size = batch_size,
                                      num_workers = 4,
                                         drop_last=True)


debug_loader = torch.utils.data.DataLoader(debug_set,
                                           batch_size = 10,
                                      num_workers = 0)



In [5]:
# 第0维代表是第几个样本
# 第一维存的是关节的x,y,z坐标
# 第二维用于标识是哪一帧
# 第三维用于标识是哪个关节
# 第四维用于表示是哪个身体
# 1*3*300*25*2->1*3*300*25*1最后只取了一个身体方便计算
# batch_x = batch_x.view(-1,sequence_length,input_size)

'''
for batch_x,batch_y in debug_loader:
    for i in range(5):
        print(batch_x.size(i))
        
    print('\n')
    batch_x = batch_x[:,:,:,:,0].view(-1,300,75)
    for i in range(3):
        print(batch_x.size(i))
    print(batch_y)
'''
for batch_x,batch_y,seq_len in debug_loader:
    print(batch_y)
    print(seq_len)

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
tensor([[103.],
        [158.],
        [104.],
        [ 99.],
        [ 54.],
        [ 75.],
        [ 86.],
        [ 71.],
        [ 67.],
        [ 69.]])


  return default_collate([torch.as_tensor(b) for b in batch])


In [6]:
#Step4.模型定义
# 数据类型
# 数据的第0维是序号即代表是哪一个样本
# 第一维存的是关节的x,y,z坐标
# 第二维用于标识是哪一帧
# 第三维用于标识是哪个关节
# 第四维用于表示是哪个身体
class HRNN(nn.Module):
    # 实现三层架构，即首先经过两层普通BRNN并经过全连接层融合，最后经过一层LSTM的BRNN，然后用FC表示
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(HRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        #如果要使用反向的传递，则需令bidirectional=True
        # batch_first代表传入数据为（batch,seq,feature)的顺序 否则Pytroch所有RNN网络默认输入结构为(seq,batch,feature)
        # batch_first = true代表输入X为 batch_size,seq_len,input_size
        # 为了测试循环的提升，将True改成了False
        
        '''
        self.rnn1_4 = nn.RNN(4, int(hidden_size/4), num_layers, batch_first=True, bidirectional=True)
        self.rnn1_5 = nn.RNN(5, int(hidden_size/4), num_layers, batch_first=True, bidirectional=True)
        self.rnn1_6 = nn.RNN(6, int(hidden_size/4), num_layers, batch_first=True, bidirectional=True)
        '''
        
        
        self.rnn = nn.RNN(int(input_size/5), int(hidden_size/4), num_layers, batch_first=True, bidirectional=True)
        self.rnn2 = nn.RNN(hidden_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.lstm = nn.LSTM(int(hidden_size/4*2*2), hidden_size, num_layers, batch_first=True, bidirectional=True)
        
        
        #如果使用了反向传递，则需要将hidden_size*2!
        self.fc = nn.Linear(hidden_size*2, num_classes)
        self.fs1 = nn.Linear(hidden_size,hidden_size)
        self.fs2 = nn.Linear(hidden_size*4,hidden_size)
        
    def forward(self, x,seq_len):
        # 输入：
        # X为batch_size*seq_len*input_size(batch_first=true时)
        
        # 输出：
        # 输出为out,(hn,cn)
        # out(seq_len, batch_size, num_directions*hidden_size) 即为[h1,h2,...,hseq_len]
        # 即out = torch.Size([1000, 28, 128])
        # out: tensor of shape (batch_size, seq_length, hidden_size*2)
        '''
        注意减1！！！
        x_p1为左手臂部分（24、25、12、11、10、9）6
        x_p2为右手臂部分（22、23、8、7、6、5）6
        x_p3为左腿部分（20、19、18、17）4
        x_p4为右腿部分（13、14、15、16）4
        x_p5为躯干部分（1、2、3、4、21）5
        '''
        seq_len = torch.squeeze(seq_len)
        # layer1:即分成五个部分利用rnn进行分别建模 75/5
        # step1:将五个部分分别经过rnn层
        '''
        x_p1 = torch.cat((x[:,:,8:12],x[:,:,23:25]),2)
        x_p2 = torch.cat((x[:,:,4:8],x[:,:,21:23]),2)
        x_p3 = x[:,:,16:20]
        x_p4 = x[:,:,12:16]
        x_p5 = torch.cat((x[:,:,0:4],x[:,:,21:22]),2)
        '''
        
        
        
        (x_p1,x_p2,x_p3,x_p4,x_p5) = torch.chunk(x, 5, dim = 2)
        x_p1 = pack_padded_sequence(x_p1,seq_len,batch_first = True,enforce_sorted=False)
        x_p2 = pack_padded_sequence(x_p2,seq_len,batch_first = True,enforce_sorted=False)
        x_p3 = pack_padded_sequence(x_p3,seq_len,batch_first = True,enforce_sorted=False)
        x_p4 = pack_padded_sequence(x_p4,seq_len,batch_first = True,enforce_sorted=False)
        x_p5 = pack_padded_sequence(x_p5,seq_len,batch_first = True,enforce_sorted=False)
        out1_p1,_ = self.rnn(x_p1)
        out1_p2,_ = self.rnn(x_p2)
        out1_p3,_ = self.rnn(x_p3)
        out1_p4,_ = self.rnn(x_p4)
        out1_p5,_ = self.rnn(x_p5)
        
        out1_p1,_ = pad_packed_sequence(out1_p1,batch_first = True)
        out1_p2,_ = pad_packed_sequence(out1_p2,batch_first = True)
        out1_p3,_ = pad_packed_sequence(out1_p3,batch_first = True)
        out1_p4,_ = pad_packed_sequence(out1_p4,batch_first = True)
        out1_p5,_ = pad_packed_sequence(out1_p5,batch_first = True)
        

        #print(out1_p1.shape)
        #torch.Size([1000, 216, 64])
        # 经过第一个RNN得到的是五个子部分的表示
        
        
        # step2:利用全连接层进行特征融合
        # 先进行特征拼接
        temp2_p1 = torch.cat((out1_p1,out1_p2),2) #(,,128/4*2*2),第一个2为双向乘的，第二个2为两个并在一起乘的
        temp2_p2 = torch.cat((out1_p1,out1_p3),2)
        temp2_p3 = torch.cat((out1_p1,out1_p4),2)
        temp2_p4 = torch.cat((out1_p1,out1_p5),2)
        
        # 再进行特征融合
        seqs = temp2_p1.size(1)
        in2_p1 = []
        for seq in range(seqs):
            temp = torch.squeeze(temp2_p1[:,seq,:],dim=1) #删除这个维度
            in2_p1_i = F.relu(self.fs1(temp))
            in2_p1.append(in2_p1_i)
        in2_p1 = torch.stack(in2_p1,dim = 1)
        in2_p2 = []
        for seq in range(seqs):
            temp = torch.squeeze(temp2_p2[:,seq,:],dim=1) #删除这个维度
            in2_p2_i = F.relu(self.fs1(temp))
            in2_p2.append(in2_p2_i)
        in2_p2 = torch.stack(in2_p2,dim = 1)
        in2_p3 = []
        for seq in range(seqs):
            temp = torch.squeeze(temp2_p3[:,seq,:],dim=1) #删除这个维度
            in2_p3_i = F.relu(self.fs1(temp))
            in2_p3.append(in2_p3_i)
        in2_p3 = torch.stack(in2_p3,dim = 1)
        in2_p4 = []
        for seq in range(seqs):
            temp = torch.squeeze(temp2_p4[:,seq,:],dim=1) #删除这个维度
            in2_p4_i = F.relu(self.fs1(temp))
            in2_p4.append(in2_p4_i)
        in2_p4 = torch.stack(in2_p4,dim = 1)
        
        
        # layer2:用4个部分进行输入，得到结果经过融合层变成两部分
        # step1:四个部分分别经过第二个rnn层
        '''
        in2_p1 = pack_padded_sequence(in2_p1,seq_len,batch_first = True,enforce_sorted=False)
        in2_p2 = pack_padded_sequence(in2_p2,seq_len,batch_first = True,enforce_sorted=False)
        in2_p3 = pack_padded_sequence(in2_p3,seq_len,batch_first = True,enforce_sorted=False)
        in2_p4 = pack_padded_sequence(in2_p4,seq_len,batch_first = True,enforce_sorted=False)
        '''
        
        
        out2_p1,_ = self.rnn2(in2_p1)
        out2_p2,_ = self.rnn2(in2_p2)
        out2_p3,_ = self.rnn2(in2_p3)
        out2_p4,_ = self.rnn2(in2_p4)
        
        '''
        out2_p1,_ = pad_packed_sequence(out2_p1)
        out2_p2,_ = pad_packed_sequence(out2_p2)
        out2_p3,_ = pad_packed_sequence(out2_p3)
        out2_p4,_ = pad_packed_sequence(out2_p4)
        '''
        
        
        # step2:利用全连接层进行特征融合
        temp3_p1 = torch.cat((out2_p1,out2_p2),2)
        temp3_p2 = torch.cat((out2_p3,out2_p4),2)
        seqs = temp3_p1.size(1)
        in3_p1 = []
        for seq in range(seqs):
            temp = torch.squeeze(temp3_p1[:,seq,:],dim=1) #删除这个维度
            in3_p1_i = F.relu(self.fs2(temp))
            in3_p1.append(in3_p1_i)
        in3_p1 = torch.stack(in3_p1,dim = 1)
        
        in3_p2 = []
        for seq in range(seqs):
            temp = torch.squeeze(temp3_p2[:,seq,:],dim=1) #删除这个维度
            in3_p2_i = F.relu(self.fs2(temp))
            in3_p2.append(in3_p2_i)
        in3_p2 = torch.stack(in3_p2,dim = 1)
        
        # layer3:将两个部分的结果再经过rnn层最终得到一个部分的结果
        # step1:将两个部分分别经过rnn
        '''
        in3_p1 = pack_padded_sequence(in3_p1,seq_len,batch_first = True,enforce_sorted=False)
        in3_p2 = pack_padded_sequence(in3_p2,seq_len,batch_first = True,enforce_sorted=False)
        '''
        
        
        out3_p1,_ = self.rnn2(in3_p1)
        out3_p2,_ = self.rnn2(in3_p2)
        
        '''
        out3_p1,_ = pad_packed_sequence(out3_p1)
        out3_p2,_ = pad_packed_sequence(out3_p2)
        '''
        
        # step2.利用全连接层进行特征融合
        temp4_p1 = torch.cat((out3_p1,out3_p2),2)
        in4_p1 = []
        for seq in range(seqs):
            temp = torch.squeeze(temp4_p1[:,seq,:],dim=1) #删除这个维度
            in4_p1_i = F.relu(self.fs2(temp))
            in4_p1.append(in4_p1_i)
        in4_p1 = torch.stack(in4_p1,1)
        
        # layer3:整体作为输入经过lstm层得到输出
        '''
        in4_p1 = pack_padded_sequence(in4_p1,seq_len,batch_first = True,enforce_sorted=False)
        '''
        
        #out(seq_len, batch_size, num_directions*hidden_size)
        #[237, 1000, 256]
        
        out4,_ = self.lstm(in4_p1)
        
        #print(out4.shape)
        #torch.Size([1000, 216, 256])
        '''
        out4_p1,_ = pad_packed_sequence(out3_p1)
        '''
        
        # 代表仅取最后一个时间步的隐状态表示作为全连接层的输入(这显然是不合理的，因为有很多都没有到最后一帧)
        #out = self.fc(out4[:, -1, :])
        # 尝试一：将向量展平（但这样会存在很多0）
        #out4 = out4.reshape(out4.size(0),-1)
        #torch.Size([1000, 62720])
        #print(out4.shape)
        #out = self.fc(out4)
        
        
        #尝试二：隔帧采样并逐帧累加
        out = torch.zeros(out4.size(0),num_classes)
        for step in range(int(seqs/20)):
            temp = torch.squeeze(out4[:,step,:],dim=1)
            out_i = self.fc(temp)
            out = out + out_i
        return out

In [7]:
'''
# 该模型仅用于测试
class TestFC(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(TestFC, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.input_size = input_size
        self.num_classes = num_classes
        
        
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size*2*sequence_length,num_classes)
        
        
    def forward(self, x):
        out1,_ = self.rnn(x)
        #print(out1.shape)
        #torch.Size([1000, 300, 128])
        out = self.fc(out1.reshape(out1.size(0),hidden_size*2*sequence_length))
        return out
'''

        

'\n# 该模型仅用于测试\nclass TestFC(nn.Module):\n    def __init__(self, input_size, hidden_size, num_layers, num_classes):\n        super(TestFC, self).__init__()\n        self.hidden_size = hidden_size\n        self.num_layers = num_layers\n        self.input_size = input_size\n        self.num_classes = num_classes\n        \n        \n        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)\n        self.fc = nn.Linear(hidden_size*2*sequence_length,num_classes)\n        \n        \n    def forward(self, x):\n        out1,_ = self.rnn(x)\n        #print(out1.shape)\n        #torch.Size([1000, 300, 128])\n        out = self.fc(out1.reshape(out1.size(0),hidden_size*2*sequence_length))\n        return out\n'

In [8]:
# Step5.定义辅助函数用于模型评估
def eval(model,criterion,dataloader):
    loss = 0
    accuracy = 0
    for batch_x, batch_y,seq_len in dataloader:
        batch_x = batch_x[:,:,:,:,0].view(-1,sequence_length,input_size)
        
        # batch_y = F.one_hot(batch_y,num_class)
        batch_x, batch_y = batch_x.to(device),batch_y.to(device)
        seq_len = seq_len.to(device)
        logits = model(batch_x,seq_len)
        error = criterion(logits,batch_y)
        loss += error.item()
        
        probs,pred_y = logits.data.max(dim=1)
        accuracy += (pred_y==batch_y.data).sum().double()/batch_y.size(0)
        
    loss /= len(dataloader)
    accuracy = accuracy*100.0/len(dataloader)
    return loss,accuracy

In [9]:
#Step6.模型使用以及损失函数、优化函数使用
model = HRNN(input_size, hidden_size, num_layers, num_classes).to(device)
model.train()
# 使用交叉熵损失函数作为目标函数
# 使用Adam作为优化函数
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:

# Step7.模型训练
for epoch in range(num_epochs):
    model.train()
    since = time.time()
    for batch_x,batch_y,seq_len in train_loader:
        # 暂时只取了第一个身体
        batch_x = batch_x[:,:,:,:,0].view(-1,sequence_length,input_size)
        # batch_y = F.one_hot(batch_y,num_classes)
        # print(batch_x.size(0))
        batch_x,batch_y = batch_x.to(device),batch_y.to(device)
        seq_len = seq_len.to(device)
        # print(batch_y.shape)
        # print(batch_x.shape)
        optimizer.zero_grad()
        logit = model(batch_x,seq_len)
        # print(logit.shape)
        # print(batch_y.shape)
        E = criterion(logit,batch_y)
        E.backward()
        optimizer.step()
    now = time.time()
    model.eval()
    tr_loss, tr_acc = eval(model,criterion,train_loader)
    te_loss, te_acc = eval(model,criterion,test_loader)
    print('[%d/%d,%.0f seconds],train error:%.1e, train acc:%.2f\t test error: %.1e,test acc: %.2f'%(epoch+1,num_epochs,now-since,tr_loss,tr_acc,te_loss,te_acc))





In [None]:
# step8.用于测试
for batch_x,batch_y in debug_loader:
    batch_x = batch_x[:,:,:,:,0].view(-1,sequence_length,input_size)
    zero_bone = [0.0000 for _ in range(75)]
    #print(batch_x.shape)
    #print(batch_x)
    # print(zero_bone)
    print(batch_x[0,:,:])
    print((batch_x[0,:,:] == zero_bone).nonzero(as_tuple = True)[0])
    
    batch_x,batch_y = batch_x.to(device),batch_y.to(device)
    logit = model(batch_x)
    print(logit)
    '''
    pred_y = logit.data.max(dim=1)
    print(pred_y)
    '''
    

In [None]:
'''
#Step6.模型训练、测试与保存
total_step = len(train_loader)
print(total_step)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

# Test the model
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) 

# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')
'''


In [1]:
torch.cuda.empty_cache()

NameError: name 'torch' is not defined