In [1]:
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import matplotlib.pyplot as plt
import torchvision
from torchvision import datasets
from torch.utils.data import DataLoader, Dataset
from torch import optim
import time
import math

from pathlib import Path
import os

use_cuda = torch.cuda.is_available()
# device = torch.device("cpu")
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda


In [2]:
data_path = "data/nturgb+d_skeletons/"
#### список отсутсвующих элементов так же будет доступен 
broken_files_path = "data/NTU_RGBD_samples_with_missing_skeletons.txt"

In [3]:
training_subjects = list(range(0, 28)) #количество людей выполняющих действия
training_classes = [4, 9, 15, 16, 23, 24, 28, 31, 43, 55] #классы которые будем использовать для обучения, полный список прдставлен тут https://github.com/shahroudy/NTURGB-D
training_cameras = [1, 2, 3] 

num_joint = 25
max_frame = 300 # Длина отрезка которую мы вычленяем из большого датасета

In [4]:
class Datareader:
    def __init__(self, data_path, broken_files_path, training_classes, chunk_len):
        self.data_path = data_path
        self.broken_files_path = broken_files_path
        self.training_subjects = list(range(0, 28))
        self.training_cameras = [1, 2, 3]
        self.training_classes = training_classes
        self.chunk_len = chunk_len

        
    def read_data(self):
        labels = []
        files = []
        action_classes = {}
        counter = 0
        files_counter = {}

        with open(self.broken_files_path, 'r') as f:
            broken_files = f.read().split("\n")

        raw_files = os.listdir(self.data_path)
        num_frames = 0

        for filename in raw_files:
            if filename not in broken_files:
                action_class = int(filename[filename.find('A') + 1:filename.find('A') + 4])
                subject_id = int(filename[filename.find('P') + 1:filename.find('P') + 4])
                camera_id = int(filename[filename.find('C') + 1:filename.find('C') + 4])
                if action_class in self.training_classes and camera_id in self.training_cameras:  #and subject_id in training_subjects:
                    if action_class in action_classes:
                        if files_counter[action_class] < 120:
                            files.append([filename,action_classes[action_class]])
                            files_counter[action_class] = files_counter[action_class] + 1
                    else:
                        action_classes.update({action_class : counter})
                        files_counter.update({action_class : 1})
                        counter+=1
                        files.append([filename,action_classes[action_class]])
    #                     labels.append([action_class])
        print("action classes: ", action_classes)
        print("action files: ", files_counter)

        return files, action_classes
    


    def read_skeleton_filter(self, file):
        with open(file, 'r') as f:
            skeleton_sequence = {}
            skeleton_sequence['numFrame'] = int(f.readline())
            skeleton_sequence['frameInfo'] = []
            for t in range(skeleton_sequence['numFrame']):
                frame_info = {}
                frame_info['numBody'] = int(f.readline())
                frame_info['bodyInfo'] = []

                for m in range(frame_info['numBody']):
                    body_info = {}
                    body_info_key = [
                        'bodyID', 'clipedEdges', 'handLeftConfidence',
                        'handLeftState', 'handRightConfidence', 'handRightState',
                        'isResticted', 'leanX', 'leanY', 'trackingState'
                    ]
                    body_info = {
                        k: float(v)
                        for k, v in zip(body_info_key, f.readline().split())
                    }
                    body_info['numJoint'] = int(f.readline())
                    body_info['jointInfo'] = []
                    for v in range(body_info['numJoint']):
                        joint_info_key = [
                            'x', 'y', 'z', 'depthX', 'depthY', 'colorX', 'colorY',
                            'orientationW', 'orientationX', 'orientationY',
                            'orientationZ', 'trackingState'
                        ]
                        joint_info = {
                            k: float(v)
                            for k, v in zip(joint_info_key, f.readline().split())
                        }
                        body_info['jointInfo'].append(joint_info)
                    frame_info['bodyInfo'].append(body_info)
                skeleton_sequence['frameInfo'].append(frame_info)

        return skeleton_sequence
    

    # Здесь мы используем нашу функцию фильр и оформляем дату в x,y,z-cкоординаты
    def read_xyz(self, file, max_body=1, num_joint=25):
        seq_info = self.read_skeleton_filter(file)
        data = np.zeros((max_body, seq_info['numFrame'], num_joint, 3))
        for n, f in enumerate(seq_info['frameInfo']):
            for m, b in enumerate(f['bodyInfo']):
                for j, v in enumerate(b['jointInfo']):
                    if m < max_body and j < num_joint:
                        data[m, n, j, :] = [v['x'], v['y'], v['z']]

                    else:
                        pass

        return data
    
    def create_coords_blocks(self, test_file):   
        frame_counter = 0
        new_labels = []
        new_frames = []
        blocks = []

        test_frames = self.read_xyz(self.data_path + test_file[0])[0]
        label = test_file[1]
        slice_len = self.chunk_len * int(len(test_frames)/self.chunk_len)


        for index in range(len(test_frames[:slice_len])):
            frame_counter += 1
            new_frames.append(test_frames[index].flatten())
            if frame_counter == self.chunk_len:
                frame_counter = 0
                blocks.append(np.array(new_frames))
                new_labels = new_labels + [label]
                new_frames = []


        return blocks, new_labels
    
    def create_test_data(self):
        working_files_with_labels, action_classes = self.read_data()
        data = []
        labels = []
        ##########################################################################
        numbers = {x: 0 for x in range(len(action_classes))} #####
        ##################################################################
        for file in working_files_with_labels:
            frames_blocks, label = self.create_coords_blocks(file)
            if label != [] and numbers[label[0]] <= 150:
                numbers[label[0]] = numbers[label[0]] + len(label)
                data = data + frames_blocks
                labels = labels + label
        data_np = np.asarray(data)
        labels_np = np.asarray(labels)

        data_sq = data_np.reshape(len(data_np), -1)
        test_data = pd.DataFrame(data_sq)
        test_labels = pd.DataFrame(labels_np)
        test_data['labels'] = test_labels
        
        return test_data

In [5]:
test_data = Datareader(data_path, broken_files_path, training_classes, chunk_len=45).create_test_data()

action classes:  {4: 0, 9: 1, 15: 2, 16: 3, 23: 4, 24: 5, 28: 6, 31: 7, 43: 8, 55: 9}
action files:  {4: 120, 9: 120, 15: 120, 16: 120, 23: 120, 24: 120, 28: 120, 31: 120, 43: 120, 55: 120}


In [6]:
test_data.to_csv("data/skels_45.csv", index = False)


In [7]:
skeletons = pd.read_csv("data/skels_45.csv")

In [8]:
LABELS = {
    0: "brushing hair", 
    1: "standing up", 
    2: "take off jacket", 
    3: "wear a shoe", 
    4: "hand waving", 
    5: "kicking something", 
    6: "make a phone call/answer phone", 
    7: "pointing to something with finger", 
    8: "falling", 
    9: "hugging other person"}

In [9]:
skeletons.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3366,3367,3368,3369,3370,3371,3372,3373,3374,labels
0,0.195193,0.19615,3.817392,0.208243,0.449287,3.735566,0.221046,0.697232,3.643296,0.191088,...,-0.050044,0.671995,3.6595,0.054728,0.618219,3.549659,0.09654,0.627727,3.503273,0
1,0.222701,0.179712,3.823472,0.217947,0.443189,3.743833,0.212921,0.700421,3.655576,0.174421,...,0.168651,0.544064,3.669563,0.079051,0.374522,3.622438,0.129491,0.39456,3.5846,0
2,0.224113,-0.031984,4.022326,0.217001,0.177784,3.932267,0.210569,0.384047,3.83198,0.188331,...,-0.114635,0.082521,3.7254,0.091566,-0.081738,3.656859,0.067479,-0.000107,3.618084,1
3,0.29915,0.148046,3.809126,0.300061,0.418474,3.727096,0.304327,0.684969,3.637994,0.288101,...,-0.015242,0.202249,3.749696,0.562961,0.149644,3.915798,0.609713,0.230888,3.8819,2
4,0.328908,0.162763,3.844153,0.28971,0.40743,3.733469,0.253113,0.646386,3.606434,0.218489,...,0.266303,0.250646,3.467945,0.096131,0.192195,3.363206,0.135501,0.271579,3.398533,2


In [10]:
skeletons.shape

(1383, 3376)

In [11]:
class Skeleton_Dataset(Dataset):
    def __init__(self, file_path, transform=None):
        self.data = pd.read_csv(file_path)
        self.transform = transform
        self.labels = self.data.iloc[:,-1]


    def __len__(self):
        return len(self.data) 
    
    
    def __getitem__(self, idx):
        item = np.asarray(self.data.iloc[idx,:-1]).reshape(45,25*3)
        label = self.labels[idx]
        if self.transform != None:
            item = transform(item)
            
        

        return (item, label) 

In [12]:
dataset = Skeleton_Dataset(file_path = "data/skels.csv", transform=None)

In [13]:
skel, lab = dataset.__getitem__(20)
skel, lab = dataset[20]

In [14]:
LABELS[lab]

'standing up'

In [15]:
skel.shape

(45, 75)

#### Оборачиваем в Даталоадер:

In [16]:
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [int(0.75*len(dataset)),int(0.25*len(dataset)+1)])

In [17]:
train_loader = DataLoader(train_dataset, batch_size = 16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size = 1, shuffle=True)

In [18]:
class LSTM_net(nn.Module):
    # Для того, что бы инициализировать LSTM нам нужно указать:
    # input_dim - размерность входного тензора. тензор входит в формате (seq_len, batch, input_dim)
    # (batch_size, seq, inp_dim) - if batch_first=True
    # hidden_dim - размерность вектора состояния h
    # output_dim - размерность выхода
    # layer_num - количество скрытых слоев в сети
    def __init__(self,input_dim,hidden_dim,output_dim,layer_num):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.lstm = torch.nn.LSTM(input_dim, hidden_dim,layer_num,batch_first=True)
        self.dr = torch.nn.Dropout2d(0.1)
        self.fc = torch.nn.Linear(hidden_dim,output_dim)
        
        
    def forward(self,inputs):
        x = inputs
        lstm_out,(hn,cn) = self.lstm(x)
        out = self.fc(lstm_out[:,-1,:])
        return out

Инициализируем наши переменные и модель:

In [19]:
n_hidden = 128
n_joints = 25*3
n_categories = len(LABELS)
n_layer = 2
rnn = LSTM_net(n_joints,n_hidden,n_categories,n_layer)
rnn.to(device)

LSTM_net(
  (lstm): LSTM(75, 128, num_layers=2, batch_first=True)
  (dr): Dropout2d(p=0.1, inplace=False)
  (fc): Linear(in_features=128, out_features=10, bias=True)
)

In [20]:
def categoryFromOutput(output):
    top_n, top_i = output.topk(1)
    category_i = top_i[0].item()
#     print(output.topk(5))
    return LABELS[category_i], category_i

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [21]:
criterion = nn.CrossEntropyLoss()
learning_rate = 0.0007
optimizer = optim.SGD(rnn.parameters(),lr=learning_rate,momentum=0.9)

all_losses = []
start = time.time()
counter = 0
for epoch in range(300):  
    current_loss = 0
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
    
        output = rnn(inputs.float())
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step() 


        current_loss += loss.item()
        category = LABELS[int(labels[0])]

        if counter % 500 == 0:
            guess, guess_i = categoryFromOutput(output)
            correct = '✓' if guess == category else '✗ (%s)' % category
            print('epoch : %d iter : %d (%s) %.4f  / %s %s' % (epoch, i, timeSince(start), loss, guess, correct))

        
        counter = counter + 1
    if counter % 100 == 0:
        all_losses.append(current_loss / 25)
        current_loss = 0

epoch : 0 iter : 0 (0m 0s) 2.3124  / brushing hair ✗ (make a phone call/answer phone)
epoch : 7 iter : 45 (0m 16s) 2.2574  / take off jacket ✗ (pointing to something with finger)
epoch : 15 iter : 25 (0m 31s) 2.2208  / wear a shoe ✗ (take off jacket)
epoch : 23 iter : 5 (0m 46s) 2.1471  / wear a shoe ✗ (falling)
epoch : 30 iter : 50 (1m 1s) 2.0132  / take off jacket ✓
epoch : 38 iter : 30 (1m 17s) 1.5570  / wear a shoe ✓
epoch : 46 iter : 10 (1m 32s) 1.8758  / take off jacket ✓
epoch : 53 iter : 55 (1m 47s) 1.1190  / hand waving ✓
epoch : 61 iter : 35 (2m 3s) 1.4280  / kicking something ✗ (hugging other person)
epoch : 69 iter : 15 (2m 18s) 1.8581  / kicking something ✓
epoch : 76 iter : 60 (2m 34s) 1.7058  / wear a shoe ✓
epoch : 84 iter : 40 (2m 49s) 1.0535  / brushing hair ✓
epoch : 92 iter : 20 (3m 5s) 1.4891  / hugging other person ✗ (make a phone call/answer phone)
epoch : 100 iter : 0 (3m 20s) 1.5435  / make a phone call/answer phone ✓
epoch : 107 iter : 45 (3m 35s) 0.9679  / ki

In [22]:
total = 0
right = 0
counter = 0

rnn.eval()
with torch.no_grad():
    for i, data in enumerate(test_loader, 0):
        counter = counter + 1
        inputs, labels = data[0].to(device), data[1].to(device)  
        output = rnn(inputs.float())
        guess, guess_i = categoryFromOutput(output)
        category = LABELS[int(labels[0])]
        
        if guess == category:
            right = right + 1

acc = 100 * right / counter
print('Accuracy of the network:  ',  acc)
results = pd.DataFrame({"chunks": [45], "layers": [2], "accuracy": [(100 * right / counter)]})

Accuracy of the network:   66.18497109826589


In [23]:
results

Unnamed: 0,chunks,layers,accuracy
0,45,2,66.184971


### Изменить модель: посмотреть зависимость от количества LSTM модулей в нашей модели  


In [24]:
n_hidden = 128
n_joints = 25*3
n_categories = len(LABELS)


In [25]:
num_layers = [1, 4, 6]

In [26]:
for n_layer in num_layers:
    
    rnn = LSTM_net(n_joints, n_hidden, n_categories, n_layer).to(device)
    criterion = nn.CrossEntropyLoss()
    learning_rate = 0.0007
    optimizer = optim.SGD(rnn.parameters(),lr=learning_rate,momentum=0.9)

    all_losses = []
    start = time.time()
    counter = 0
    for epoch in range(300):  
        current_loss = 0
        running_loss = 0.0
        
        for i, data in enumerate(train_loader, 0):

            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()

            output = rnn(inputs.float())
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step() 


            current_loss += loss.item()
            category = LABELS[int(labels[0])]

            if counter % 500 == 0:
                guess, guess_i = categoryFromOutput(output)
                correct = '✓' if guess == category else '✗ (%s)' % category
                print('epoch : %d iter : %d (%s) %.4f  / %s %s' % (epoch, i, timeSince(start), loss, guess, correct))


            counter = counter + 1
        if counter % 100 == 0:
            all_losses.append(current_loss / 25)
            current_loss = 0
        
        total = 0
        right = 0
        counter = 0

    rnn.eval()
    with torch.no_grad():
        for i, data in enumerate(test_loader, 0):
            counter = counter + 1
            inputs, labels = data[0].to(device), data[1].to(device)  
            output = rnn(inputs.float())
            guess, guess_i = categoryFromOutput(output)
            category = LABELS[int(labels[0])]

            if guess == category:
                right = right + 1
                
                
    results = results.append({
        'chunks': 45,
        'layers': n_layer,
        'accuracy' : (100 * right / counter)}, ignore_index=True)    

epoch : 0 iter : 0 (0m 0s) 2.2612  / hand waving ✗ (kicking something)
epoch : 1 iter : 0 (0m 1s) 2.2565  / wear a shoe ✓
epoch : 2 iter : 0 (0m 3s) 2.2325  / take off jacket ✗ (brushing hair)
epoch : 3 iter : 0 (0m 5s) 2.1890  / hugging other person ✗ (make a phone call/answer phone)
epoch : 4 iter : 0 (0m 7s) 2.1180  / hand waving ✓
epoch : 5 iter : 0 (0m 9s) 2.1439  / hugging other person ✗ (brushing hair)
epoch : 6 iter : 0 (0m 11s) 2.2764  / kicking something ✗ (standing up)
epoch : 7 iter : 0 (0m 13s) 2.1427  / falling ✓
epoch : 8 iter : 0 (0m 15s) 2.1044  / wear a shoe ✗ (hand waving)
epoch : 9 iter : 0 (0m 17s) 1.9462  / wear a shoe ✗ (pointing to something with finger)
epoch : 10 iter : 0 (0m 19s) 2.1098  / brushing hair ✗ (pointing to something with finger)
epoch : 11 iter : 0 (0m 21s) 1.8475  / wear a shoe ✓
epoch : 12 iter : 0 (0m 23s) 1.9039  / wear a shoe ✓
epoch : 13 iter : 0 (0m 24s) 2.0662  / hugging other person ✓
epoch : 14 iter : 0 (0m 26s) 1.6544  / wear a shoe ✓
e

epoch : 122 iter : 0 (3m 53s) 1.0440  / falling ✓
epoch : 123 iter : 0 (3m 55s) 1.2628  / wear a shoe ✓
epoch : 124 iter : 0 (3m 56s) 1.0814  / take off jacket ✗ (hand waving)
epoch : 125 iter : 0 (3m 58s) 0.6128  / kicking something ✓
epoch : 126 iter : 0 (4m 0s) 1.0160  / take off jacket ✓
epoch : 127 iter : 0 (4m 2s) 1.1123  / pointing to something with finger ✓
epoch : 128 iter : 0 (4m 4s) 0.7524  / make a phone call/answer phone ✓
epoch : 129 iter : 0 (4m 6s) 1.3724  / brushing hair ✗ (pointing to something with finger)
epoch : 130 iter : 0 (4m 8s) 0.8025  / hand waving ✓
epoch : 131 iter : 0 (4m 10s) 1.0451  / make a phone call/answer phone ✗ (take off jacket)
epoch : 132 iter : 0 (4m 12s) 1.2990  / take off jacket ✓
epoch : 133 iter : 0 (4m 14s) 0.6486  / pointing to something with finger ✓
epoch : 134 iter : 0 (4m 15s) 0.7053  / wear a shoe ✓
epoch : 135 iter : 0 (4m 17s) 1.1208  / wear a shoe ✓
epoch : 136 iter : 0 (4m 19s) 1.0105  / brushing hair ✓
epoch : 137 iter : 0 (4m 21

epoch : 249 iter : 0 (7m 53s) 0.4037  / hand waving ✓
epoch : 250 iter : 0 (7m 55s) 0.6385  / take off jacket ✗ (make a phone call/answer phone)
epoch : 251 iter : 0 (7m 57s) 1.0564  / standing up ✓
epoch : 252 iter : 0 (7m 59s) 0.6563  / make a phone call/answer phone ✗ (take off jacket)
epoch : 253 iter : 0 (8m 1s) 0.6820  / brushing hair ✓
epoch : 254 iter : 0 (8m 3s) 0.6654  / hand waving ✓
epoch : 255 iter : 0 (8m 5s) 0.3302  / standing up ✓
epoch : 256 iter : 0 (8m 7s) 0.5084  / wear a shoe ✓
epoch : 257 iter : 0 (8m 9s) 0.3848  / take off jacket ✓
epoch : 258 iter : 0 (8m 10s) 0.6995  / wear a shoe ✓
epoch : 259 iter : 0 (8m 12s) 0.6738  / wear a shoe ✗ (falling)
epoch : 260 iter : 0 (8m 14s) 0.8483  / standing up ✓
epoch : 261 iter : 0 (8m 16s) 0.7091  / standing up ✓
epoch : 262 iter : 0 (8m 18s) 0.6852  / falling ✓
epoch : 263 iter : 0 (8m 20s) 0.2862  / make a phone call/answer phone ✓
epoch : 264 iter : 0 (8m 22s) 0.8409  / make a phone call/answer phone ✓
epoch : 265 iter 

epoch : 64 iter : 0 (2m 19s) 2.3142  / make a phone call/answer phone ✗ (brushing hair)
epoch : 65 iter : 0 (2m 21s) 2.2633  / take off jacket ✗ (wear a shoe)
epoch : 66 iter : 0 (2m 23s) 2.2745  / make a phone call/answer phone ✗ (standing up)
epoch : 67 iter : 0 (2m 26s) 2.2846  / make a phone call/answer phone ✗ (hugging other person)
epoch : 68 iter : 0 (2m 28s) 2.3021  / take off jacket ✗ (hand waving)
epoch : 69 iter : 0 (2m 30s) 2.2959  / take off jacket ✗ (brushing hair)
epoch : 70 iter : 0 (2m 32s) 2.2766  / make a phone call/answer phone ✗ (hugging other person)
epoch : 71 iter : 0 (2m 34s) 2.2740  / take off jacket ✗ (pointing to something with finger)
epoch : 72 iter : 0 (2m 36s) 2.3316  / make a phone call/answer phone ✗ (standing up)
epoch : 73 iter : 0 (2m 39s) 2.2894  / make a phone call/answer phone ✗ (brushing hair)
epoch : 74 iter : 0 (2m 41s) 2.3189  / take off jacket ✗ (pointing to something with finger)
epoch : 75 iter : 0 (2m 43s) 2.2802  / take off jacket ✓
epoc

epoch : 180 iter : 0 (6m 32s) 1.8958  / take off jacket ✓
epoch : 181 iter : 0 (6m 34s) 1.8128  / wear a shoe ✗ (falling)
epoch : 182 iter : 0 (6m 37s) 1.5395  / kicking something ✓
epoch : 183 iter : 0 (6m 39s) 1.8486  / make a phone call/answer phone ✗ (hand waving)
epoch : 184 iter : 0 (6m 41s) 1.6494  / wear a shoe ✗ (standing up)
epoch : 185 iter : 0 (6m 43s) 1.5569  / wear a shoe ✓
epoch : 186 iter : 0 (6m 45s) 1.5307  / wear a shoe ✗ (standing up)
epoch : 187 iter : 0 (6m 48s) 1.5003  / wear a shoe ✗ (standing up)
epoch : 188 iter : 0 (6m 50s) 1.6832  / wear a shoe ✗ (standing up)
epoch : 189 iter : 0 (6m 52s) 1.7315  / make a phone call/answer phone ✗ (pointing to something with finger)
epoch : 190 iter : 0 (6m 54s) 1.5964  / take off jacket ✗ (hugging other person)
epoch : 191 iter : 0 (6m 56s) 1.8751  / make a phone call/answer phone ✗ (brushing hair)
epoch : 192 iter : 0 (6m 58s) 1.6501  / take off jacket ✗ (wear a shoe)
epoch : 193 iter : 0 (7m 1s) 1.5726  / wear a shoe ✗ (

epoch : 296 iter : 0 (10m 47s) 0.9604  / standing up ✓
epoch : 297 iter : 0 (10m 49s) 0.7937  / kicking something ✓
epoch : 298 iter : 0 (10m 51s) 0.9615  / make a phone call/answer phone ✓
epoch : 299 iter : 0 (10m 54s) 1.1223  / falling ✓
epoch : 0 iter : 0 (0m 0s) 2.3111  / hand waving ✗ (wear a shoe)
epoch : 1 iter : 0 (0m 2s) 2.3052  / hand waving ✗ (standing up)
epoch : 2 iter : 0 (0m 4s) 2.3041  / make a phone call/answer phone ✓
epoch : 3 iter : 0 (0m 7s) 2.2883  / make a phone call/answer phone ✗ (hugging other person)
epoch : 4 iter : 0 (0m 9s) 2.3092  / make a phone call/answer phone ✗ (wear a shoe)
epoch : 5 iter : 0 (0m 12s) 2.2859  / make a phone call/answer phone ✓
epoch : 6 iter : 0 (0m 14s) 2.3261  / make a phone call/answer phone ✗ (take off jacket)
epoch : 7 iter : 0 (0m 16s) 2.3095  / make a phone call/answer phone ✗ (hand waving)
epoch : 8 iter : 0 (0m 19s) 2.3152  / make a phone call/answer phone ✗ (falling)
epoch : 9 iter : 0 (0m 21s) 2.3141  / make a phone call/

epoch : 90 iter : 0 (3m 36s) 2.2852  / make a phone call/answer phone ✗ (wear a shoe)
epoch : 91 iter : 0 (3m 38s) 2.2903  / make a phone call/answer phone ✗ (hugging other person)
epoch : 92 iter : 0 (3m 41s) 2.2986  / make a phone call/answer phone ✗ (hugging other person)
epoch : 93 iter : 0 (3m 43s) 2.3234  / make a phone call/answer phone ✗ (hand waving)
epoch : 94 iter : 0 (3m 46s) 2.2534  / make a phone call/answer phone ✓
epoch : 95 iter : 0 (3m 48s) 2.3063  / make a phone call/answer phone ✗ (falling)
epoch : 96 iter : 0 (3m 50s) 2.3509  / make a phone call/answer phone ✓
epoch : 97 iter : 0 (3m 53s) 2.3292  / make a phone call/answer phone ✗ (take off jacket)
epoch : 98 iter : 0 (3m 55s) 2.3129  / make a phone call/answer phone ✗ (take off jacket)
epoch : 99 iter : 0 (3m 58s) 2.3024  / make a phone call/answer phone ✗ (kicking something)
epoch : 100 iter : 0 (4m 0s) 2.3167  / make a phone call/answer phone ✗ (hugging other person)
epoch : 101 iter : 0 (4m 2s) 2.3390  / make a

epoch : 182 iter : 0 (7m 16s) 2.3288  / make a phone call/answer phone ✗ (hand waving)
epoch : 183 iter : 0 (7m 18s) 2.2949  / make a phone call/answer phone ✗ (standing up)
epoch : 184 iter : 0 (7m 21s) 2.3404  / make a phone call/answer phone ✗ (kicking something)
epoch : 185 iter : 0 (7m 23s) 2.3607  / make a phone call/answer phone ✗ (brushing hair)
epoch : 186 iter : 0 (7m 25s) 2.3184  / make a phone call/answer phone ✗ (take off jacket)
epoch : 187 iter : 0 (7m 28s) 2.3506  / make a phone call/answer phone ✗ (hand waving)
epoch : 188 iter : 0 (7m 30s) 2.2663  / make a phone call/answer phone ✗ (falling)
epoch : 189 iter : 0 (7m 33s) 2.3346  / make a phone call/answer phone ✗ (wear a shoe)
epoch : 190 iter : 0 (7m 35s) 2.3299  / make a phone call/answer phone ✗ (hand waving)
epoch : 191 iter : 0 (7m 37s) 2.2943  / make a phone call/answer phone ✗ (pointing to something with finger)
epoch : 192 iter : 0 (7m 40s) 2.3133  / make a phone call/answer phone ✗ (kicking something)
epoch :

epoch : 274 iter : 0 (10m 57s) 2.2603  / make a phone call/answer phone ✗ (take off jacket)
epoch : 275 iter : 0 (10m 59s) 2.2870  / make a phone call/answer phone ✗ (hand waving)
epoch : 276 iter : 0 (11m 2s) 2.2728  / make a phone call/answer phone ✗ (falling)
epoch : 277 iter : 0 (11m 4s) 2.2821  / make a phone call/answer phone ✗ (falling)
epoch : 278 iter : 0 (11m 6s) 2.2798  / make a phone call/answer phone ✗ (take off jacket)
epoch : 279 iter : 0 (11m 9s) 2.2684  / make a phone call/answer phone ✓
epoch : 280 iter : 0 (11m 11s) 2.2966  / make a phone call/answer phone ✗ (falling)
epoch : 281 iter : 0 (11m 13s) 2.2593  / make a phone call/answer phone ✗ (standing up)
epoch : 282 iter : 0 (11m 16s) 2.2801  / make a phone call/answer phone ✗ (standing up)
epoch : 283 iter : 0 (11m 18s) 2.3180  / make a phone call/answer phone ✗ (brushing hair)
epoch : 284 iter : 0 (11m 21s) 2.2781  / make a phone call/answer phone ✗ (kicking something)
epoch : 285 iter : 0 (11m 23s) 2.2872  / make 

In [27]:
results

Unnamed: 0,chunks,layers,accuracy
0,45.0,2.0,66.184971
1,45.0,1.0,64.739884
2,45.0,4.0,54.624277
3,45.0,6.0,9.82659


### Сгенерировать другой датасет с меньшим количеством “кадров” в серии и сравнить улучшилось или ухудшилось качество предсказания. Провести несколько таких итераций, дать свою оценку уменьшению и увеличению кадров, назвать оптимальное, на ваш взгляд, их количество. Желательно сделать так, чтобы длина последовательности передавалась как атрибут класса.

In [28]:
test_data =  Datareader(data_path, broken_files_path, training_classes, chunk_len=30).create_test_data()

action classes:  {4: 0, 9: 1, 15: 2, 16: 3, 23: 4, 24: 5, 28: 6, 31: 7, 43: 8, 55: 9}
action files:  {4: 120, 9: 120, 15: 120, 16: 120, 23: 120, 24: 120, 28: 120, 31: 120, 43: 120, 55: 120}


In [29]:
test_data.to_csv("data/skels_30.csv", index = False)


In [30]:
skeletons = pd.read_csv("data/skels_30.csv")

In [31]:
skeletons.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2241,2242,2243,2244,2245,2246,2247,2248,2249,labels
0,0.195193,0.19615,3.817392,0.208243,0.449287,3.735566,0.221046,0.697232,3.643296,0.191088,...,-0.076086,0.352439,3.678333,0.072445,0.777312,3.474925,0.119312,0.732688,3.4598,0
1,0.207126,0.187448,3.819451,0.220117,0.454096,3.735067,0.232467,0.714655,3.638724,0.179146,...,0.082068,0.699071,3.676307,0.117671,0.479329,3.54589,0.167577,0.487943,3.501182,0
2,0.222104,0.223074,3.826334,0.219044,0.455948,3.751825,0.215,0.68452,3.667829,0.166631,...,0.168651,0.544064,3.669563,0.079051,0.374522,3.622438,0.129491,0.39456,3.5846,0
3,0.224113,-0.031984,4.022326,0.217001,0.177784,3.932267,0.210569,0.384047,3.83198,0.188331,...,-0.116302,-0.229616,3.850086,0.077438,-0.222702,3.700509,0.113458,-0.138128,3.685867,1
4,0.172138,-0.069111,3.985911,0.113765,0.08957,3.831272,0.055991,0.24631,3.665481,0.01208,...,-0.214219,0.080217,3.6691,0.06039,-0.003226,3.495363,0.041341,0.057401,3.4995,1


In [32]:
skeletons.shape

(1521, 2251)

In [33]:
class Skeleton_Dataset(Dataset):
    def __init__(self, file_path, transform=None):
        self.data = pd.read_csv(file_path)
        self.transform = transform
        self.labels = self.data.iloc[:,-1]


    def __len__(self):
        return len(self.data) 
    
    
    def __getitem__(self, idx):
        item = np.asarray(self.data.iloc[idx,:-1]).reshape(30,25*3)
        label = self.labels[idx]
        if self.transform != None:
            item = transform(item)
            
        

        return (item, label) 

In [34]:
dataset = Skeleton_Dataset(file_path = "data/skels_30.csv", transform=None)

In [35]:
skel, lab = dataset.__getitem__(20)
skel, lab = dataset[20]

In [36]:
LABELS[lab]

'make a phone call/answer phone'

In [37]:
skel.shape

(30, 75)

#### Оборачиваем в Даталоадер:

In [38]:
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [int(0.75*len(dataset)),int(0.25*len(dataset)+1)])

In [39]:
train_loader = DataLoader(train_dataset, batch_size = 16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size = 1, shuffle=True)

In [40]:
n_hidden = 128
n_joints = 25*3
n_categories = len(LABELS)


In [41]:
num_layers = [2, 4]

In [42]:
for n_layer in num_layers:
    
    rnn = LSTM_net(n_joints, n_hidden, n_categories, n_layer).to(device)
    criterion = nn.CrossEntropyLoss()
    learning_rate = 0.0007
    optimizer = optim.SGD(rnn.parameters(),lr=learning_rate,momentum=0.9)

    all_losses = []
    start = time.time()
    counter = 0
    for epoch in range(300):  
        current_loss = 0
        running_loss = 0.0
        
        for i, data in enumerate(train_loader, 0):

            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()

            output = rnn(inputs.float())
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step() 


            current_loss += loss.item()
            category = LABELS[int(labels[0])]

            if counter % 500 == 0:
                guess, guess_i = categoryFromOutput(output)
                correct = '✓' if guess == category else '✗ (%s)' % category
                print('epoch : %d iter : %d (%s) %.4f  / %s %s' % (epoch, i, timeSince(start), loss, guess, correct))


            counter = counter + 1
        if counter % 100 == 0:
            all_losses.append(current_loss / 25)
            current_loss = 0
        
        total = 0
        right = 0
        counter = 0

    rnn.eval()
    with torch.no_grad():
        for i, data in enumerate(test_loader, 0):
            counter = counter + 1
            inputs, labels = data[0].to(device), data[1].to(device)  
            output = rnn(inputs.float())
            guess, guess_i = categoryFromOutput(output)
            category = LABELS[int(labels[0])]

            if guess == category:
                right = right + 1
                
    

    results = results.append({
        'chunks': 30,
        'layers': n_layer,
        'accuracy' : (100 * right / counter)}, ignore_index=True)    

epoch : 0 iter : 0 (0m 0s) 2.2995  / brushing hair ✗ (falling)
epoch : 1 iter : 0 (0m 1s) 2.3118  / hand waving ✗ (kicking something)
epoch : 2 iter : 0 (0m 3s) 2.2963  / hand waving ✗ (hugging other person)
epoch : 3 iter : 0 (0m 4s) 2.2990  / brushing hair ✗ (falling)
epoch : 4 iter : 0 (0m 6s) 2.2916  / wear a shoe ✓
epoch : 5 iter : 0 (0m 8s) 2.2853  / wear a shoe ✗ (hugging other person)
epoch : 6 iter : 0 (0m 9s) 2.2997  / wear a shoe ✗ (hugging other person)
epoch : 7 iter : 0 (0m 11s) 2.2656  / wear a shoe ✗ (hugging other person)
epoch : 8 iter : 0 (0m 12s) 2.2910  / wear a shoe ✗ (make a phone call/answer phone)
epoch : 9 iter : 0 (0m 14s) 2.2881  / wear a shoe ✗ (make a phone call/answer phone)
epoch : 10 iter : 0 (0m 16s) 2.3021  / wear a shoe ✗ (standing up)
epoch : 11 iter : 0 (0m 17s) 2.2825  / wear a shoe ✓
epoch : 12 iter : 0 (0m 19s) 2.2892  / wear a shoe ✗ (make a phone call/answer phone)
epoch : 13 iter : 0 (0m 20s) 2.2763  / wear a shoe ✗ (hugging other person)
epo

epoch : 122 iter : 0 (3m 10s) 0.9336  / wear a shoe ✓
epoch : 123 iter : 0 (3m 12s) 0.7198  / standing up ✓
epoch : 124 iter : 0 (3m 13s) 1.1604  / standing up ✗ (kicking something)
epoch : 125 iter : 0 (3m 15s) 0.9594  / pointing to something with finger ✓
epoch : 126 iter : 0 (3m 17s) 0.7516  / make a phone call/answer phone ✓
epoch : 127 iter : 0 (3m 18s) 0.9004  / hand waving ✓
epoch : 128 iter : 0 (3m 20s) 0.9816  / brushing hair ✗ (make a phone call/answer phone)
epoch : 129 iter : 0 (3m 21s) 1.3359  / take off jacket ✗ (make a phone call/answer phone)
epoch : 130 iter : 0 (3m 23s) 1.3939  / hugging other person ✗ (kicking something)
epoch : 131 iter : 0 (3m 25s) 1.0688  / hugging other person ✓
epoch : 132 iter : 0 (3m 26s) 0.7600  / hugging other person ✓
epoch : 133 iter : 0 (3m 28s) 1.6166  / pointing to something with finger ✗ (hugging other person)
epoch : 134 iter : 0 (3m 30s) 0.9040  / make a phone call/answer phone ✓
epoch : 135 iter : 0 (3m 31s) 1.5581  / kicking someth

epoch : 245 iter : 0 (6m 22s) 1.1447  / make a phone call/answer phone ✗ (take off jacket)
epoch : 246 iter : 0 (6m 23s) 0.6581  / make a phone call/answer phone ✗ (hugging other person)
epoch : 247 iter : 0 (6m 25s) 0.6208  / hugging other person ✓
epoch : 248 iter : 0 (6m 27s) 0.7159  / take off jacket ✓
epoch : 249 iter : 0 (6m 28s) 0.7494  / make a phone call/answer phone ✓
epoch : 250 iter : 0 (6m 30s) 0.7183  / falling ✓
epoch : 251 iter : 0 (6m 31s) 0.3320  / hand waving ✓
epoch : 252 iter : 0 (6m 33s) 1.1110  / brushing hair ✓
epoch : 253 iter : 0 (6m 34s) 0.8597  / kicking something ✗ (hugging other person)
epoch : 254 iter : 0 (6m 36s) 0.9414  / take off jacket ✓
epoch : 255 iter : 0 (6m 37s) 0.7395  / hugging other person ✓
epoch : 256 iter : 0 (6m 39s) 0.6159  / brushing hair ✓
epoch : 257 iter : 0 (6m 40s) 0.5281  / pointing to something with finger ✓
epoch : 258 iter : 0 (6m 42s) 0.4980  / take off jacket ✓
epoch : 259 iter : 0 (6m 43s) 0.5143  / make a phone call/answer 

epoch : 69 iter : 0 (2m 0s) 2.3030  / wear a shoe ✗ (take off jacket)
epoch : 70 iter : 0 (2m 2s) 2.2926  / wear a shoe ✗ (kicking something)
epoch : 71 iter : 0 (2m 3s) 2.3048  / wear a shoe ✗ (hand waving)
epoch : 72 iter : 0 (2m 5s) 2.2951  / wear a shoe ✗ (falling)
epoch : 73 iter : 0 (2m 7s) 2.2873  / wear a shoe ✓
epoch : 74 iter : 0 (2m 9s) 2.3178  / wear a shoe ✗ (falling)
epoch : 75 iter : 0 (2m 10s) 2.3043  / wear a shoe ✗ (hugging other person)
epoch : 76 iter : 0 (2m 12s) 2.2808  / wear a shoe ✓
epoch : 77 iter : 0 (2m 14s) 2.2965  / wear a shoe ✗ (brushing hair)
epoch : 78 iter : 0 (2m 15s) 2.3028  / wear a shoe ✗ (hugging other person)
epoch : 79 iter : 0 (2m 17s) 2.2928  / wear a shoe ✗ (standing up)
epoch : 80 iter : 0 (2m 19s) 2.2996  / wear a shoe ✗ (falling)
epoch : 81 iter : 0 (2m 20s) 2.3038  / wear a shoe ✗ (make a phone call/answer phone)
epoch : 82 iter : 0 (2m 22s) 2.3010  / wear a shoe ✗ (brushing hair)
epoch : 83 iter : 0 (2m 24s) 2.3126  / wear a shoe ✗ (mak

epoch : 185 iter : 0 (5m 21s) 1.6115  / wear a shoe ✓
epoch : 186 iter : 0 (5m 23s) 1.6281  / hugging other person ✗ (pointing to something with finger)
epoch : 187 iter : 0 (5m 24s) 1.5991  / hugging other person ✗ (brushing hair)
epoch : 188 iter : 0 (5m 26s) 1.7416  / make a phone call/answer phone ✗ (take off jacket)
epoch : 189 iter : 0 (5m 28s) 1.6891  / falling ✗ (standing up)
epoch : 190 iter : 0 (5m 30s) 1.7098  / take off jacket ✓
epoch : 191 iter : 0 (5m 32s) 1.5054  / kicking something ✓
epoch : 192 iter : 0 (5m 33s) 1.5462  / brushing hair ✓
epoch : 193 iter : 0 (5m 35s) 1.6451  / take off jacket ✓
epoch : 194 iter : 0 (5m 37s) 1.8559  / take off jacket ✓
epoch : 195 iter : 0 (5m 39s) 2.1929  / falling ✗ (hugging other person)
epoch : 196 iter : 0 (5m 40s) 1.6714  / standing up ✗ (take off jacket)
epoch : 197 iter : 0 (5m 42s) 1.5239  / kicking something ✗ (falling)
epoch : 198 iter : 0 (5m 44s) 1.4866  / wear a shoe ✓
epoch : 199 iter : 0 (5m 46s) 1.4365  / take off jacke

In [45]:
results

Unnamed: 0,chunks,layers,accuracy
0,45.0,2.0,66.184971
1,45.0,1.0,64.739884
2,45.0,4.0,54.624277
3,45.0,6.0,9.82659
4,30.0,2.0,56.955381
5,30.0,4.0,49.606299


Наилучший результат получен при начальных настройках: 2 внутренних слоя, 45 чанков. При увеличении количества слоев точность предсказаний падает, при уменьшении до 1 слоя падает незначительно. Уменьшение количества чанков также ухудшает работу модели.