In [25]:
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import matplotlib.pyplot as plt
import torchvision
from torchvision import datasets
from torch.utils.data import DataLoader, Dataset
from torch import optim
import time
import math
import os

use_cuda = torch.cuda.is_available()
# device = torch.device("cpu")
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cpu


In [21]:
data_path = "nturgb+d_skeletons/"
broken_files_path = "NTU_RGBD120_samples_with_missing_skeletons.txt"

In [22]:
# training_subjects = list(range(0, 28)) #количество людей выполняющих действия
training_classes = sorted([8, 10, 22, 23, 27, 21, 55, 2, 7]) #классы которые будем использовать для обучения, полный список прдставлен тут https://github.com/shahroudy/NTURGB-D
LABELS = {x: training_classes[x] for x in range(len(training_classes))}
training_cameras = [1, 2, 3] 

# max_body_true = 1
# max_body_kinect = 1

num_joint = 25
max_frame = 300

In [23]:
class Skeleton_Dataset(Dataset):
    def __init__(self, data_path, broken_files_path=None, training_classes=None,
                 num_joint = 25, max_frame = 300, transform=None):
        
        
        def read_data(data_path, broken_files_path):
            labels = []
            files = []
            action_classes = {}
            counter = 0
            files_counter = {}
            with open(broken_files_path, 'r') as f:
                broken_files = f.read().split("\n")

            raw_files = os.listdir(data_path)
            num_frames = 0

            for filename in raw_files:
                if filename not in broken_files:
                    action_class = int(filename[filename.find('A') + 1:filename.find('A') + 4])
                    subject_id = int(filename[filename.find('P') + 1:filename.find('P') + 4])
                    camera_id = int(filename[filename.find('C') + 1:filename.find('C') + 4])
                    if action_class in training_classes and camera_id in training_cameras: 
                        if action_class in action_classes:
                            if files_counter[action_class] < 120:
                                files.append([filename,action_classes[action_class]])
                                files_counter[action_class] = files_counter[action_class] + 1
                        else:
                            action_classes.update({action_class : counter})
                            files_counter.update({action_class : 1})
                            counter+=1
                            files.append([filename,action_classes[action_class]])
            print("action classes: ", action_classes)
            print("action files: ", files_counter)

            return files, action_classes
        
        
        def read_skeleton_filter(file):
            with open(file, 'r') as f:
                skeleton_sequence = {}
                skeleton_sequence['numFrame'] = int(f.readline())
                skeleton_sequence['frameInfo'] = []
                for t in range(skeleton_sequence['numFrame']):
                    frame_info = {}
                    frame_info['numBody'] = int(f.readline())
                    frame_info['bodyInfo'] = []

                    for m in range(frame_info['numBody']):
                        body_info = {}
                        body_info_key = [
                            'bodyID', 'clipedEdges', 'handLeftConfidence',
                            'handLeftState', 'handRightConfidence', 'handRightState',
                            'isResticted', 'leanX', 'leanY', 'trackingState'
                        ]
                        body_info = {
                            k: float(v)
                            for k, v in zip(body_info_key, f.readline().split())
                        }
                        body_info['numJoint'] = int(f.readline())
                        body_info['jointInfo'] = []
                        for v in range(body_info['numJoint']):
                            joint_info_key = [
                                'x', 'y', 'z', 'depthX', 'depthY', 'colorX', 'colorY',
                                'orientationW', 'orientationX', 'orientationY',
                                'orientationZ', 'trackingState'
                            ]
                            joint_info = {
                                k: float(v)
                                for k, v in zip(joint_info_key, f.readline().split())
                            }
                            body_info['jointInfo'].append(joint_info)
                        frame_info['bodyInfo'].append(body_info)
                    skeleton_sequence['frameInfo'].append(frame_info)

            return skeleton_sequence

        def read_xyz(file, max_body=1, num_joint=25):
            seq_info = read_skeleton_filter(file)
            data = np.zeros((max_body, seq_info['numFrame'], num_joint, 3))
            for n, f in enumerate(seq_info['frameInfo']):
                for m, b in enumerate(f['bodyInfo']):
                    for j, v in enumerate(b['jointInfo']):
                        if m < max_body and j < num_joint:
                            data[m, n, j, :] = [v['x'], v['y'], v['z']]

                        else:
                            pass

            return data
        
        
        def create_coords_blocks(test_file, chonk_len = 45):   
            frame_counter = 0
            new_labels = []
            new_frames = []
            blocks = []

            test_frames = read_xyz(data_path + test_file[0])[0]
            label = test_file[1]
            slice_len = chonk_len * int(len(test_frames)/chonk_len)


            for index in range(len(test_frames[:slice_len])):
                frame_counter += 1
                new_frames.append(test_frames[index].flatten())
                if frame_counter == chonk_len:
                    frame_counter = 0
                    blocks.append(np.array(new_frames))
                    new_labels = new_labels + [label]
                    new_frames = []


            return blocks, new_labels
        
        
        ##### список файлов с лейблами на каждый файл 
        working_files_with_labels, action_classes = read_data(data_path, broken_files_path)
        
        data = []
        labels = []
        ##########################################################################
        numbers = {x: 0 for x in range(len(action_classes))}  #####
        ##################################################################
        for file in working_files_with_labels:
            frames_blocks, label = create_coords_blocks(file)
            if label != [] and numbers[label[0]] <= 150:
                numbers[label[0]] = numbers[label[0]] + len(label)
                data = data + frames_blocks
                labels = labels + label
        data_np = np.asarray(data)
        labels_np = np.asarray(labels)

        data_sq = data_np.reshape(len(data_np), -1)
        data = pd.DataFrame(data_sq)
        labels = pd.DataFrame(labels_np)
        data['labels'] = labels
        

        self.data = data
        self.labels = data['labels'].astype('float32')
        self.transform = transform
        
           
    def __len__(self):
         return len(self.data)
        
        
    def __getitem__(self, idx):
        item = np.asarray(self.data.iloc[idx,:-1]).reshape(45,75)
        label = self.labels[idx]
        if self.transform != None:
            item = transform(item)
        return (item, label)

In [26]:
dataset = Skeleton_Dataset(data_path=data_path, broken_files_path=broken_files_path, 
                           training_classes=training_classes,num_joint = 25, 
                           max_frame = 300, transform=None)

action classes:  {10: 0, 27: 1, 55: 2, 8: 3, 2: 4, 23: 5, 22: 6, 7: 7, 21: 8}
action files:  {10: 120, 27: 120, 55: 120, 8: 120, 2: 120, 23: 120, 22: 120, 7: 120, 21: 120}


In [27]:
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [int(0.75*len(dataset)),
                                                                      len(dataset) - int(0.75*len(dataset))])
train_loader = DataLoader(train_dataset, batch_size = 16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size = 1, shuffle=True)

In [28]:
class LSTM_net(nn.Module):
    def __init__(self,input_dim,hidden_dim,output_dim,layer_num):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.lstm = torch.nn.LSTM(input_dim, hidden_dim,layer_num,batch_first=True)
        self.dr = torch.nn.Dropout2d(0.1)
        self.fc = torch.nn.Linear(hidden_dim,output_dim)
        
        
    def forward(self,inputs):
        x = inputs
        lstm_out,(hn,cn) = self.lstm(x)
        out = self.fc(lstm_out[:,-1,:])
        return out

In [29]:
n_hidden = 128
n_joints = 25*3
n_categories = len(LABELS)
n_layer = 2
rnn = LSTM_net(n_joints,n_hidden,n_categories,n_layer)
rnn.to(device)

LSTM_net(
  (lstm): LSTM(75, 128, num_layers=2, batch_first=True)
  (dr): Dropout2d(p=0.1, inplace=False)
  (fc): Linear(in_features=128, out_features=9, bias=True)
)

In [30]:
def categoryFromOutput(output):
    top_n, top_i = output.topk(1)
    category_i = top_i[0].item()
#     print(output.topk(5))
    return LABELS[category_i], category_i

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [31]:
criterion = nn.CrossEntropyLoss()
learning_rate = 0.0007
optimizer = optim.SGD(rnn.parameters(),lr=learning_rate,momentum=0.9)

all_losses = []
start = time.time()
counter = 0
for epoch in range(250):  
    current_loss = 0
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
    
        output = rnn(inputs.float())
        labels = labels.type(torch.LongTensor).to(device)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step() 


        current_loss += loss.item()
        category = LABELS[int(labels[0])]

        if counter % 500 == 0:
            guess, guess_i = categoryFromOutput(output)
            correct = '✓' if guess == category else '✗ (%s)' % category
            print('epoch : %d iter : %d (%s) %.4f  / %s %s' % (epoch, i, timeSince(start), loss, guess, correct))

        
        counter = counter + 1
    if counter % 100 == 0:
        all_losses.append(current_loss / 25)
        current_loss = 0

epoch : 0 iter : 0 (0m 0s) 2.1945  / 27 ✓
epoch : 9 iter : 5 (1m 49s) 2.2103  / 10 ✗ (8)
epoch : 18 iter : 10 (3m 40s) 2.1837  / 10 ✗ (2)
epoch : 27 iter : 15 (5m 33s) 2.1119  / 23 ✓
epoch : 36 iter : 20 (7m 23s) 2.0063  / 23 ✓
epoch : 45 iter : 25 (9m 18s) 1.9265  / 23 ✗ (27)
epoch : 54 iter : 30 (11m 13s) 2.0999  / 10 ✓
epoch : 63 iter : 35 (13m 1s) 1.7146  / 23 ✓
epoch : 72 iter : 40 (14m 52s) 1.5551  / 21 ✓
epoch : 81 iter : 45 (16m 43s) 1.7394  / 2 ✗ (22)
epoch : 90 iter : 50 (18m 29s) 1.4394  / 7 ✗ (8)
epoch : 100 iter : 0 (20m 14s) 1.5019  / 21 ✗ (27)
epoch : 109 iter : 5 (22m 5s) 1.6171  / 22 ✓
epoch : 118 iter : 10 (23m 56s) 1.5789  / 7 ✗ (2)
epoch : 127 iter : 15 (25m 51s) 1.2610  / 2 ✗ (22)
epoch : 136 iter : 20 (27m 47s) 0.8872  / 7 ✓
epoch : 145 iter : 25 (29m 40s) 0.8922  / 10 ✓
epoch : 154 iter : 30 (31m 26s) 1.5808  / 21 ✓
epoch : 163 iter : 35 (33m 19s) 1.2158  / 10 ✗ (21)
epoch : 172 iter : 40 (35m 13s) 1.5505  / 23 ✓
epoch : 181 iter : 45 (37m 9s) 1.6344  / 21 ✓
epoc

In [32]:
total = 0
right = 0
counter = 0

rnn.eval()
with torch.no_grad():
    for i, data in enumerate(test_loader, 0):
        counter = counter + 1
        inputs, labels = data[0].to(device), data[1].to(device)  
        output = rnn(inputs.float())
        guess, guess_i = categoryFromOutput(output)
        category = LABELS[int(labels[0])]
        
        if guess == category:
            right = right + 1


print('Accuracy of the network:  ',  (100 * right / counter))

Accuracy of the network:   45.205479452054796


In [46]:
n_hidden = 128*3
n_joints = 25*3
n_categories = len(LABELS)
n_layer = 2
rnn = LSTM_net(n_joints,n_hidden,n_categories,n_layer)
rnn.to(device)

LSTM_net(
  (lstm): LSTM(75, 384, num_layers=2, batch_first=True)
  (dr): Dropout2d(p=0.1, inplace=False)
  (fc): Linear(in_features=384, out_features=9, bias=True)
)

In [47]:
class LSTM_net(nn.Module):
    def __init__(self,input_dim,hidden_dim,output_dim,layer_num):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.lstm = torch.nn.LSTM(input_dim, hidden_dim,layer_num,batch_first=True)
        self.dr = torch.nn.Dropout2d(0.1)
        self.fc = torch.nn.Linear(hidden_dim,output_dim)
        
        
    def forward(self,inputs):
        x = inputs
        lstm_out,(hn,cn) = self.lstm(x)
        out = self.fc(lstm_out[:,-1,:])
        return out


In [48]:
criterion = nn.CrossEntropyLoss()
learning_rate = 0.0007
optimizer = optim.SGD(rnn.parameters(),lr=learning_rate,momentum=0.9)

all_losses = []
start = time.time()
counter = 0
for epoch in range(250):  
    current_loss = 0
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
    
        output = rnn(inputs.float())
        labels = labels.type(torch.LongTensor).to(device)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step() 


        current_loss += loss.item()
        category = LABELS[int(labels[0])]

        if counter % 500 == 0:
            guess, guess_i = categoryFromOutput(output)
            correct = '✓' if guess == category else '✗ (%s)' % category
            print('epoch : %d iter : %d (%s) %.4f  / %s %s' % (epoch, i, timeSince(start), loss, guess, correct))

        
        counter = counter + 1
    if counter % 100 == 0:
        all_losses.append(current_loss / 25)
        current_loss = 0

epoch : 0 iter : 0 (0m 0s) 2.1959  / 21 ✗ (8)
epoch : 9 iter : 5 (1m 0s) 2.1635  / 10 ✗ (2)
epoch : 18 iter : 10 (1m 57s) 2.1943  / 10 ✗ (8)
epoch : 27 iter : 15 (2m 56s) 2.1457  / 23 ✗ (27)
epoch : 36 iter : 20 (3m 53s) 1.8627  / 23 ✓
epoch : 45 iter : 25 (4m 59s) 1.8783  / 21 ✗ (7)
epoch : 54 iter : 30 (6m 7s) 1.4062  / 10 ✗ (8)
epoch : 63 iter : 35 (7m 7s) 1.5918  / 2 ✗ (55)
epoch : 72 iter : 40 (8m 6s) 1.3491  / 22 ✗ (55)
epoch : 81 iter : 45 (9m 5s) 1.6471  / 23 ✓
epoch : 90 iter : 50 (10m 4s) 1.7037  / 2 ✗ (22)
epoch : 100 iter : 0 (11m 2s) 1.1231  / 21 ✗ (7)
epoch : 109 iter : 5 (12m 0s) 1.6700  / 8 ✓
epoch : 118 iter : 10 (12m 59s) 1.1781  / 23 ✓
epoch : 127 iter : 15 (13m 57s) 1.1609  / 8 ✗ (2)
epoch : 136 iter : 20 (14m 56s) 1.7309  / 2 ✗ (8)
epoch : 145 iter : 25 (15m 54s) 0.7089  / 2 ✓
epoch : 154 iter : 30 (16m 53s) 1.1550  / 23 ✓
epoch : 163 iter : 35 (17m 51s) 1.2088  / 8 ✓
epoch : 172 iter : 40 (18m 50s) 1.3659  / 22 ✗ (21)
epoch : 181 iter : 45 (19m 48s) 1.0940  / 23 ✓

In [49]:
total = 0
right = 0
counter = 0

rnn.eval()
with torch.no_grad():
    for i, data in enumerate(test_loader, 0):
        counter = counter + 1
        inputs, labels = data[0].to(device), data[1].to(device)  
        output = rnn(inputs.float())
        guess, guess_i = categoryFromOutput(output)
        category = LABELS[int(labels[0])]
        
        if guess == category:
            right = right + 1


print('Accuracy of the network:  ',  (100 * right / counter))

Accuracy of the network:   59.24657534246575


In [40]:

training_classes = sorted([8, 10, 22, 23, 27, 21, 55, 2, 7]) #классы которые будем использовать для обучения, полный список прдставлен тут https://github.com/shahroudy/NTURGB-D
LABELS = {x: training_classes[x] for x in range(len(training_classes))}
training_cameras = [1, 2, 3] 


num_joint = 25
max_frame = 200

In [41]:
dataset = Skeleton_Dataset(data_path=data_path, broken_files_path=broken_files_path, 
                           training_classes=training_classes,num_joint = num_joint, 
                           max_frame = max_frame, transform=None)

action classes:  {10: 0, 27: 1, 55: 2, 8: 3, 2: 4, 23: 5, 22: 6, 7: 7, 21: 8}
action files:  {10: 120, 27: 120, 55: 120, 8: 120, 2: 120, 23: 120, 22: 120, 7: 120, 21: 120}


In [42]:
n_hidden = 128*3
n_joints = 25*3
n_categories = len(LABELS)
n_layer = 2
rnn = LSTM_net(n_joints,n_hidden,n_categories,n_layer)
rnn.to(device)

LSTM_net(
  (lstm): LSTM(75, 384, num_layers=2, batch_first=True)
  (dr): Dropout2d(p=0.1, inplace=False)
  (fc): Linear(in_features=384, out_features=9, bias=True)
)

In [43]:
criterion = nn.CrossEntropyLoss()
learning_rate = 0.0007
optimizer = optim.SGD(rnn.parameters(),lr=learning_rate,momentum=0.9)

all_losses = []
start = time.time()
counter = 0
for epoch in range(250):  
    current_loss = 0
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
    
        output = rnn(inputs.float())
        labels = labels.type(torch.LongTensor).to(device)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step() 


        current_loss += loss.item()
        category = LABELS[int(labels[0])]

        if counter % 500 == 0:
            guess, guess_i = categoryFromOutput(output)
            correct = '✓' if guess == category else '✗ (%s)' % category
            print('epoch : %d iter : %d (%s) %.4f  / %s %s' % (epoch, i, timeSince(start), loss, guess, correct))

        
        counter = counter + 1
    if counter % 100 == 0:
        all_losses.append(current_loss / 25)
        current_loss = 0

epoch : 0 iter : 0 (0m 0s) 2.1997  / 27 ✓
epoch : 9 iter : 5 (1m 3s) 2.2343  / 10 ✗ (2)
epoch : 18 iter : 10 (2m 2s) 2.1974  / 10 ✗ (8)
epoch : 27 iter : 15 (3m 1s) 2.0792  / 10 ✗ (55)
epoch : 36 iter : 20 (4m 0s) 1.8060  / 21 ✗ (55)
epoch : 45 iter : 25 (4m 58s) 1.7076  / 21 ✗ (22)
epoch : 54 iter : 30 (5m 57s) 2.3444  / 10 ✗ (27)
epoch : 63 iter : 35 (6m 56s) 1.9741  / 2 ✗ (7)
epoch : 72 iter : 40 (7m 54s) 1.5439  / 22 ✓
epoch : 81 iter : 45 (8m 53s) 1.4507  / 21 ✗ (8)
epoch : 90 iter : 50 (9m 52s) 1.4633  / 22 ✗ (23)
epoch : 100 iter : 0 (10m 51s) 1.7222  / 2 ✓
epoch : 109 iter : 5 (11m 50s) 1.7205  / 21 ✗ (55)
epoch : 118 iter : 10 (12m 48s) 1.2955  / 2 ✓
epoch : 127 iter : 15 (13m 48s) 1.1215  / 10 ✓
epoch : 136 iter : 20 (14m 51s) 1.5828  / 21 ✗ (22)
epoch : 145 iter : 25 (15m 49s) 1.3729  / 27 ✓
epoch : 154 iter : 30 (16m 46s) 1.3438  / 21 ✗ (55)
epoch : 163 iter : 35 (17m 44s) 1.1932  / 2 ✗ (22)
epoch : 172 iter : 40 (18m 42s) 0.7531  / 7 ✓
epoch : 181 iter : 45 (19m 40s) 1.322

In [45]:
total = 0
right = 0
counter = 0

rnn.eval()
with torch.no_grad():
    for i, data in enumerate(test_loader, 0):
        counter = counter + 1
        inputs, labels = data[0].to(device), data[1].to(device)  
        output = rnn(inputs.float())
        guess, guess_i = categoryFromOutput(output)
        category = LABELS[int(labels[0])]
        
        if guess == category:
            right = right + 1


print('Accuracy of the network:  ',  (100 * right / counter))

Accuracy of the network:   53.42465753424658


Увеличение n_hidden повысило точность модели. Но уменьшение кадров понизило точность.