# Our Dataset Image Classificaiton

In [3]:
### Reference from: https://github.com/yysijie/st-gcn/blob/master/net/utils/graph.py

import os
import torch
import numpy as np


class Graph:
    """The Graph to model the skeletons extracted by the Alpha-Pose.
    Args:
        - strategy: (string) must be one of the follow candidates
            - uniform: Uniform Labeling,
            - distance: Distance Partitioning,
            - spatial: Spatial Configuration,
        For more information, please refer to the section 'Partition Strategies'
            in our paper (https://arxiv.org/abs/1801.07455).
        - layout: (string) must be one of the follow candidates
            - coco_cut: Is COCO format but cut 4 joints (L-R ears, L-R eyes) out.
        - max_hop: (int) the maximal distance between two connected nodes.
        - dilation: (int) controls the spacing between the kernel points.
    """
    def __init__(self,
                 layout='coco_cut',
                 strategy='uniform',
                 max_hop=1,
                 dilation=1):
        self.max_hop = max_hop
        self.dilation = dilation

        self.get_edge(layout)
        self.hop_dis = get_hop_distance(self.num_node, self.edge, max_hop)
        self.get_adjacency(strategy)

    def get_edge(self, layout):
        if layout == 'coco_cut':
            self.num_node = 14
            self_link = [(i, i) for i in range(self.num_node)]
            neighbor_link = [(6, 4), (4, 2), (2, 13), (13, 1), (5, 3), (3, 1), (12, 10),
                             (10, 8), (8, 2), (11, 9), (9, 7), (7, 1), (13, 0)]
            self.edge = self_link + neighbor_link
            self.center = 13
            
        elif layout == 'mediapipe_KSL':
            self.num_node = 47
            self_link = [(i, i) for i in range(self.num_node)]
            # used_key_points=
            # [0,11,12,13,14]+[i for i in range(33,33+21)] + [i for i in range(54,54+21)] 
            neighbor_link = [(0,1),(0,2),(1,3),(2,4),(3,26),(4,5), # nose-arms-wrist
                             
                             (5,6),(6,7),(7,8),(8,9),
                             (5,10),(10,11),(11,12),(12,13),
                             (5,14),(14,15),(15,16),(16,17),
                             (5,18),(18,19),(19,20),(20,21),
                             (5,22),(22,23),(23,24),(24,25),
                             
                             (26,27),(27,28),(28,29),(29,30),
                             (26,31),(31,32),(32,33),(33,34),
                             (26,35),(35,36),(36,37),(37,38),
                             (26,39),(39,40),(40,41),(41,42),
                             (26,43),(43,44),(44,45),(45,46),
                            ]
            self.edge = self_link + neighbor_link
            self.center = 0
        else:
            raise ValueError('This layout is not supported!')

    def get_adjacency(self, strategy):
        valid_hop = range(0, self.max_hop + 1, self.dilation)
        adjacency = np.zeros((self.num_node, self.num_node))
        for hop in valid_hop:
            adjacency[self.hop_dis == hop] = 1
        normalize_adjacency = normalize_digraph(adjacency)

        if strategy == 'uniform':
            A = np.zeros((1, self.num_node, self.num_node))
            A[0] = normalize_adjacency
            self.A = A
        elif strategy == 'distance':
            A = np.zeros((len(valid_hop), self.num_node, self.num_node))
            for i, hop in enumerate(valid_hop):
                A[i][self.hop_dis == hop] = normalize_adjacency[self.hop_dis ==
                                                                hop]
            self.A = A
        elif strategy == 'spatial':
            A = []
            for hop in valid_hop:
                a_root = np.zeros((self.num_node, self.num_node))
                a_close = np.zeros((self.num_node, self.num_node))
                a_further = np.zeros((self.num_node, self.num_node))
                for i in range(self.num_node):
                    for j in range(self.num_node):
                        if self.hop_dis[j, i] == hop:
                            if self.hop_dis[j, self.center] == self.hop_dis[i, self.center]:
                                a_root[j, i] = normalize_adjacency[j, i]
                            elif self.hop_dis[j, self.center] > self.hop_dis[i, self.center]:
                                a_close[j, i] = normalize_adjacency[j, i]
                            else:
                                a_further[j, i] = normalize_adjacency[j, i]
                if hop == 0:
                    A.append(a_root)
                else:
                    A.append(a_root + a_close)
                    A.append(a_further)
            A = np.stack(A)
            self.A = A
            #self.A = np.swapaxes(np.swapaxes(A, 0, 1), 1, 2)
        else:
            raise ValueError("This strategy is not supported!")


def get_hop_distance(num_node, edge, max_hop=1):
    A = np.zeros((num_node, num_node))
    for i, j in edge:
        A[j, i] = 1
        A[i, j] = 1

    # compute hop steps
    hop_dis = np.zeros((num_node, num_node)) + np.inf
    transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)]
    arrive_mat = (np.stack(transfer_mat) > 0)
    for d in range(max_hop, -1, -1):
        hop_dis[arrive_mat[d]] = d
    return hop_dis


def normalize_digraph(A):
    Dl = np.sum(A, 0)
    num_node = A.shape[0]
    Dn = np.zeros((num_node, num_node))
    for i in range(num_node):
        if Dl[i] > 0:
            Dn[i, i] = Dl[i]**(-1)
    AD = np.dot(A, Dn)
    return AD


def normalize_undigraph(A):
    Dl = np.sum(A, 0)
    num_node = A.shape[0]
    Dn = np.zeros((num_node, num_node))
    for i in range(num_node):
        if Dl[i] > 0:
            Dn[i, i] = Dl[i]**(-0.5)
    DAD = np.dot(np.dot(Dn, A), Dn)
    return DAD

In [4]:
### Reference from: https://github.com/yysijie/st-gcn/tree/master/net

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

# from Actionsrecognition.Utils import Graph


class GraphConvolution(nn.Module):
    """The basic module for applying a graph convolution.
    Args:
        - in_channel: (int) Number of channels in the input sequence data.
        - out_channels: (int) Number of channels produced by the convolution.
        - kernel_size: (int) Size of the graph convolving kernel.
        - t_kernel_size: (int) Size of the temporal convolving kernel.
        - t_stride: (int, optional) Stride of the temporal convolution. Default: 1
        - t_padding: (int, optional) Temporal zero-padding added to both sides of
            the input. Default: 0
        - t_dilation: (int, optional) Spacing between temporal kernel elements. Default: 1
        - bias: (bool, optional) If `True`, adds a learnable bias to the output.
            Default: `True`
    Shape:
        - Inputs x: Graph sequence in :math:`(N, in_channels, T_{in}, V)`,
                 A: Graph adjacency matrix in :math:`(K, V, V)`,
        - Output: Graph sequence out in :math:`(N, out_channels, T_{out}, V)`

            where
                :math:`N` is a batch size,
                :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
                :math:`T_{in}/T_{out}` is a length of input/output sequence,
                :math:`V` is the number of graph nodes.

    """
    def __init__(self, in_channels, out_channels, kernel_size,
                 t_kernel_size=1,
                 t_stride=1,
                 t_padding=0,
                 t_dilation=1,
                 bias=True):
        super().__init__()

        self.kernel_size = kernel_size
        self.conv = nn.Conv2d(in_channels,
                              out_channels * kernel_size,
                              kernel_size=(t_kernel_size, 1),
                              padding=(t_padding, 0),
                              stride=(t_stride, 1),
                              dilation=(t_dilation, 1),
                              bias=bias)

    def forward(self, x, A):
        x = self.conv(x)
        n, kc, t, v = x.size()
        x = x.view(n, self.kernel_size, kc//self.kernel_size, t, v)
        x = torch.einsum('nkctv,kvw->nctw', (x, A))

        return x.contiguous()


class st_gcn(nn.Module):
    """Applies a spatial temporal graph convolution over an input graph sequence.
    Args:
        - in_channels: (int) Number of channels in the input sequence data.
        - out_channels: (int) Number of channels produced by the convolution.
        - kernel_size: (tuple) Size of the temporal convolving kernel and
            graph convolving kernel.
        - stride: (int, optional) Stride of the temporal convolution. Default: 1
        - dropout: (int, optional) Dropout rate of the final output. Default: 0
        - residual: (bool, optional) If `True`, applies a residual mechanism.
            Default: `True`
    Shape:
        - Inputs x: Graph sequence in :math: `(N, in_channels, T_{in}, V)`,
                 A: Graph Adjecency matrix in :math: `(K, V, V)`,
        - Output: Graph sequence out in :math: `(N, out_channels, T_{out}, V)`
            where
                :math:`N` is a batch size,
                :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
                :math:`T_{in}/T_{out}` is a length of input/output sequence,
                :math:`V` is the number of graph nodes.
    """
    def __init__(self, in_channels, out_channels, kernel_size,
                 stride=1,
                 dropout=0,
                 residual=True):
        super().__init__()
        assert len(kernel_size) == 2
        assert kernel_size[0] % 2 == 1
        #print(kernel_size)(9, 3)
        padding = ((kernel_size[0] - 1) // 2, 0)
        #print(padding)(4, 0)

        self.gcn = GraphConvolution(in_channels, out_channels, kernel_size[1])
        self.tcn = nn.Sequential(nn.BatchNorm2d(out_channels),
                                 nn.ReLU(inplace=True),
                                 nn.Conv2d(out_channels,
                                           out_channels,
                                           (kernel_size[0], 1),
                                           (stride, 1),
                                           padding),
                                 nn.BatchNorm2d(out_channels),
                                 nn.Dropout(dropout, inplace=True),
                                 )

        if not residual:
            self.residual = lambda x: 0
        elif (in_channels == out_channels) and (stride == 1):
            self.residual = lambda x: x
        else:
            self.residual = nn.Sequential(nn.Conv2d(in_channels,
                                                    out_channels,
                                                    kernel_size=1,
                                                    stride=(stride, 1)),
                                          nn.BatchNorm2d(out_channels)
                                          )
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x, A):
        res = self.residual(x)
        # print(res)
        x = self.gcn(x, A)
        #print("x_in:",x.size())
        x = self.tcn(x) + res
        #print("x_out:",x.size())
        return self.relu(x)


class StreamSpatialTemporalGraph(nn.Module):
    """Spatial temporal graph convolutional networks.
    Args:
        - in_channels: (int) Number of input channels.
        - graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class.
        - num_class: (int) Number of class outputs. If `None` return pooling features of
            the last st-gcn layer instead.
        - edge_importance_weighting: (bool) If `True`, adds a learnable importance
            weighting to the edges of the graph.
        - **kwargs: (optional) Other parameters for graph convolution units.
    Shape:
        - Input: :math:`(N, in_channels, T_{in}, V_{in})`
        - Output: :math:`(N, num_class)` where
            :math:`N` is a batch size,
            :math:`T_{in}` is a length of input sequence,
            :math:`V_{in}` is the number of graph nodes,
        or If num_class is `None`: `(N, out_channels)`
            :math:`out_channels` is number of out_channels of the last layer.
    """
    def __init__(self, in_channels, graph_args, num_class=None,
                 edge_importance_weighting=True, **kwargs):
        super().__init__()
        # Load graph.
        graph = Graph(**graph_args)
        A = torch.tensor(graph.A, dtype=torch.float32, requires_grad=False)
        self.register_buffer('A', A)

        # Networks.
        spatial_kernel_size = A.size(0)
        temporal_kernel_size = 9
        kernel_size = (temporal_kernel_size, spatial_kernel_size)
        kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'}

        self.data_bn = nn.BatchNorm1d(in_channels * A.size(1))
        self.st_gcn_networks = nn.ModuleList((
            st_gcn(in_channels, 64, kernel_size, 1, residual=False, **kwargs0),
            st_gcn(64, 64, kernel_size, 1, **kwargs),
            st_gcn(64, 64, kernel_size, 1, **kwargs),
            st_gcn(64, 64, kernel_size, 1, **kwargs),
            st_gcn(64, 128, kernel_size, 2, **kwargs),
            st_gcn(128, 128, kernel_size, 1, **kwargs),
            st_gcn(128, 128, kernel_size, 1, **kwargs),
            st_gcn(128, 256, kernel_size, 2, **kwargs),
            st_gcn(256, 256, kernel_size, 1, **kwargs),
            st_gcn(256, 256, kernel_size, 1, **kwargs)
        ))

        # initialize parameters for edge importance weighting.
        if edge_importance_weighting:
            self.edge_importance = nn.ParameterList([
                nn.Parameter(torch.ones(A.size()))
                for i in self.st_gcn_networks
            ])
        else:
            self.edge_importance = [1] * len(self.st_gcn_networks)

        if num_class is not None:
            self.cls = nn.Conv2d(256, num_class, kernel_size=1)
        else:
            self.cls = lambda x: x

    def forward(self, x):
        # data normalization.
        N, C, T, V = x.size()
        x = x.permute(0, 3, 1, 2).contiguous()  # (N, V, C, T)
        x = x.view(N, V * C, T)
        x = self.data_bn(x)
        x = x.view(N, V, C, T)
        x = x.permute(0, 2, 3, 1).contiguous()
        x = x.view(N, C, T, V)

        # forward.
        for gcn, importance in zip(self.st_gcn_networks, self.edge_importance):
            x = gcn(x, self.A * importance)

        x = F.avg_pool2d(x, x.size()[2:])
        x = self.cls(x)
        x = x.view(x.size(0), -1)

        return x


class TwoStreamSpatialTemporalGraph(nn.Module):
    """Two inputs spatial temporal graph convolutional networks.
    Args:
        - graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class.
        - num_class: (int) Number of class outputs.
        - edge_importance_weighting: (bool) If `True`, adds a learnable importance
            weighting to the edges of the graph.
        - **kwargs: (optional) Other parameters for graph convolution units.
    Shape:
        - Input: :tuple of math:`((N, 3, T, V), (N, 2, T, V))`
        for points and motions stream where.
            :math:`N` is a batch size,
            :math:`in_channels` is data channels (3 is (x, y, score)), (2 is (mot_x, mot_y))
            :math:`T` is a length of input sequence,
            :math:`V` is the number of graph nodes,
        - Output: :math:`(N, num_class)`
    """
    def __init__(self, graph_args, num_class, edge_importance_weighting=True,
                 **kwargs):
        super().__init__()
        self.pts_stream = StreamSpatialTemporalGraph(3, graph_args, None,
                                                     edge_importance_weighting,
                                                     **kwargs)
        self.mot_stream = StreamSpatialTemporalGraph(3, graph_args, None,
                                                     edge_importance_weighting,
                                                     **kwargs)

        # self.fcn = nn.Linear(256 * 2, num_class)
        self.fcn = nn.Linear(256 , num_class)
        
        # self.atten1 = nn.Linear(256 * 2, 128)
        # self.atten_bn = nn.BatchNorm1d(128)
        # self.atten_relu= nn.ReLU(inplace=True)
        # self.atten2 = nn.Linear(128,32)
        # self.atten_relu2= nn.ReLU(inplace=True)
        # self.atten3 = nn.Linear(32, 256 * 2)
        # self.atten_act = nn.Sigmoid() 
        
    def forward(self, inputs):
        out1 = self.pts_stream(inputs[0])
        # out2 = self.mot_stream(inputs[1])
        
        #print(out1.size())torch.Size([32, 256])
        #print(out2.size())torch.Size([32, 256])
        # concat = torch.cat([out1, out2], dim=-1)
        
        # attn = self.atten1(concat)
        # attn = self.atten_bn(attn)
        # attn = self.atten_relu(attn)
        # attn = self.atten2(attn)
        # attn = self.atten_relu2(attn)
        # attn = self.atten3(attn)
        # attn = self.atten_act(attn)
        # concat = concat * attn
        
        
        # out = self.fcn(concat)
        out = self.fcn(out1)
        
        return out
        # return torch.sigmoid(out)
        #return F.softmax(out,dim=-1)

In [8]:
num_class=20

graph_args = {'layout':'mediapipe_KSL','strategy': 'spatial'}
model = TwoStreamSpatialTemporalGraph(graph_args, num_class)
a=torch.randn(1,3,40,47)
b=torch.randn(1,3,40,47)
# print(model)
out = model((a,b))
print(out.shape)


torch.Size([1, 20])


In [27]:
import os
import time
import torch
import pickle
import numpy as np
import torch.nn.functional as F
from shutil import copyfile
from tqdm import tqdm
from torch.utils import data
from torch.optim.adadelta import Adadelta
from sklearn.model_selection import train_test_split

#device = 'cuda'
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print("using", device, "device")

epochs = 30
batch_size = 32 #32

def load_dataset(data_files, batch_size, split_size=0.2,used_key_points=None):#0.2
    """Load data files into torch DataLoader with/without spliting train-test.
    """
    features, labels = [], []
    for fil in data_files:
        with open(fil, 'rb') as f:
            fts, lbs = pickle.load(f)
            features.append(fts)
            labels.append(lbs)
        del fts, lbs
    features = np.concatenate(features, axis=0)
    labels = np.concatenate(labels, axis=0)
    
    if used_key_points != None:
        features = features[:,:,:,used_key_points]
    print(features.shape)
    print(labels.shape)

    if split_size > 0:
        x_train, x_valid, y_train, y_valid = train_test_split(features, labels, test_size=split_size,random_state=0,stratify=labels)
        
        train_set = data.TensorDataset(torch.tensor(x_train, dtype=torch.float32),torch.tensor(y_train, dtype=torch.int64))
        valid_set = data.TensorDataset(torch.tensor(x_valid, dtype=torch.float32),torch.tensor(y_valid, dtype=torch.int64))
        train_loader = data.DataLoader(train_set, batch_size, shuffle=True)
        valid_loader = data.DataLoader(valid_set, batch_size)
    else:
        train_set = data.TensorDataset(torch.tensor(features, dtype=torch.float32),torch.tensor(labels, dtype=torch.int64))
        train_loader = data.DataLoader(train_set, batch_size, shuffle=True)
        valid_loader = None
    return train_loader, valid_loader

def accuracy_batch(y_pred, y_true):
    # print(y_pred.shape,y_true.shape)
    # return (y_pred.argmax(1) == y_true.argmax(1)).mean()
    return (y_pred.argmax(1) == y_true).mean()


def set_training(model, mode=True):
    for p in model.parameters():
        p.requires_grad = mode
    model.train(mode)
    return model

save_folder = os.path.join(os.environ['HOME'],"KSL_V2/Outputs")
os.makedirs(save_folder,exist_ok=True)
used_key_points=[0,11,12,13,14]+[i for i in range(33,33+21)] + [i for i in range(54,54+21)] 
train_loader, valid_loader = load_dataset([os.path.join(os.environ['HOME'],"KSL_V2/Datasets/KSL_1_dataset.pkl")], 32,0.2,used_key_points) #batch_size = 32
dataloader = {'train': train_loader, 'valid': valid_loader}
num_class=20

graph_args = {'layout':'mediapipe_KSL','strategy': 'spatial'}
model = TwoStreamSpatialTemporalGraph(graph_args, num_class).to(device)
n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"number of params: {n_parameters}")
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001)
losser = torch.nn.CrossEntropyLoss()

loss_list = {'train': [], 'valid': []}
accu_list = {'train': [], 'valid': []}
best_acc = -1
for e in range(epochs):
    print('Epoch {}/{}'.format(e, epochs - 1))
    for phase in ['train', 'valid']:
        if phase == 'train':
            model = set_training(model, True)
        else:
            model = set_training(model, False)

        run_loss = 0.0
        run_accu = 0.0
        with tqdm(dataloader[phase], desc=phase) as iterator:
            for pts, lbs in iterator:
                # Create motion input by distance of points (x, y) of the same node
                # in two frames.
                mot = pts[:, :, 1:, :] - pts[:, :, :-1, :]

                mot = mot.to(device)
                pts = pts.to(device)
                lbs = lbs.to(device)
                
                # Forward.
                out = model((pts, mot))
                #print(lbs)

                #print(out)
                loss = losser(out, lbs)

                if phase == 'train':
                    # Backward.
                    model.zero_grad()
                    loss.backward()
                    optimizer.step()

                run_loss += loss.item()
                accu = accuracy_batch(out.detach().cpu().numpy(),
                                      lbs.detach().cpu().numpy())
                run_accu += accu

                iterator.set_postfix_str(' loss: {:.4f}, accu: {:.4f}'.format(
                    loss.item(), accu))
                iterator.update()
                #break
        loss_list[phase].append(run_loss / len(iterator))
        accu_list[phase].append(run_accu / len(iterator))
        #print(accu_list)
        #print(torch.max(accu_list))
    if(best_acc < accu_list['valid'][-1]):
        best_acc = accu_list['valid'][-1]
        torch.save(model.state_dict(), os.path.join(save_folder, 'tsstg-model_best.pth'))
        #break

    print('Summary epoch:\n - Train loss: {:.4f}, accu: {:.4f}\n - Valid loss:'
          ' {:.4f}, accu: {:.4f}'.format(loss_list['train'][-1], accu_list['train'][-1],
                                         loss_list['valid'][-1], accu_list['valid'][-1]))
del model
# 0.9655696902654868

using cuda device
(129459, 3, 1, 47)
(129459,)
number of params: 6267268
Epoch 0/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.28it/s,  loss: 0.1763, accu: 0.9333]
valid: 100%|██████████| 810/810 [00:03<00:00, 220.73it/s,  loss: 1.0722, accu: 0.7500]


Summary epoch:
 - Train loss: 0.6597, accu: 0.7754
 - Valid loss: 0.3473, accu: 0.8924
Epoch 1/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.47it/s,  loss: 0.0760, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 220.69it/s,  loss: 0.7393, accu: 0.7500]


Summary epoch:
 - Train loss: 0.1868, accu: 0.9371
 - Valid loss: 0.2266, accu: 0.9503
Epoch 2/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.35it/s,  loss: 0.0786, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 221.04it/s,  loss: 0.3984, accu: 0.7500]


Summary epoch:
 - Train loss: 0.1170, accu: 0.9616
 - Valid loss: 0.0795, accu: 0.9790
Epoch 3/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.44it/s,  loss: 0.2121, accu: 0.9333]
valid: 100%|██████████| 810/810 [00:03<00:00, 220.46it/s,  loss: 1.4689, accu: 0.7500]


Summary epoch:
 - Train loss: 0.0883, accu: 0.9709
 - Valid loss: 0.1798, accu: 0.9430
Epoch 4/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.33it/s,  loss: 0.0366, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 220.39it/s,  loss: 0.0181, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0714, accu: 0.9761
 - Valid loss: 0.0959, accu: 0.9742
Epoch 5/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.39it/s,  loss: 0.0004, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 220.33it/s,  loss: 0.0476, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0609, accu: 0.9801
 - Valid loss: 0.0487, accu: 0.9917
Epoch 6/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.17it/s,  loss: 0.0088, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 221.46it/s,  loss: 0.0026, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0537, accu: 0.9827
 - Valid loss: 0.0551, accu: 0.9868
Epoch 7/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.15it/s,  loss: 0.0057, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 221.67it/s,  loss: 0.4199, accu: 0.7500]


Summary epoch:
 - Train loss: 0.0464, accu: 0.9844
 - Valid loss: 0.0442, accu: 0.9880
Epoch 8/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.26it/s,  loss: 0.0004, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 220.27it/s,  loss: 0.0017, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0440, accu: 0.9852
 - Valid loss: 0.0372, accu: 0.9924
Epoch 9/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.35it/s,  loss: 0.0001, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 219.45it/s,  loss: 0.7450, accu: 0.7500]


Summary epoch:
 - Train loss: 0.0403, accu: 0.9868
 - Valid loss: 0.0530, accu: 0.9823
Epoch 10/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.54it/s,  loss: 0.3293, accu: 0.9333]
valid: 100%|██████████| 810/810 [00:03<00:00, 221.07it/s,  loss: 2.6464, accu: 0.7500]


Summary epoch:
 - Train loss: 0.0366, accu: 0.9880
 - Valid loss: 0.3617, accu: 0.9279
Epoch 11/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.34it/s,  loss: 0.1618, accu: 0.9333]
valid: 100%|██████████| 810/810 [00:03<00:00, 220.62it/s,  loss: 1.2822, accu: 0.7500]


Summary epoch:
 - Train loss: 0.0348, accu: 0.9886
 - Valid loss: 0.0848, accu: 0.9752
Epoch 12/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.24it/s,  loss: 0.0008, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 220.98it/s,  loss: 0.0083, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0314, accu: 0.9899
 - Valid loss: 0.0265, accu: 0.9929
Epoch 13/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.39it/s,  loss: 0.0618, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 220.96it/s,  loss: 0.0000, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0302, accu: 0.9898
 - Valid loss: 0.0177, accu: 0.9953
Epoch 14/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.52it/s,  loss: 0.0213, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 219.97it/s,  loss: 0.0138, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0288, accu: 0.9906
 - Valid loss: 0.0373, accu: 0.9897
Epoch 15/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.41it/s,  loss: 0.0321, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 220.18it/s,  loss: 0.4398, accu: 0.7500]


Summary epoch:
 - Train loss: 0.0268, accu: 0.9909
 - Valid loss: 0.0433, accu: 0.9867
Epoch 16/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.44it/s,  loss: 0.3201, accu: 0.9333]
valid: 100%|██████████| 810/810 [00:03<00:00, 220.32it/s,  loss: 0.0033, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0258, accu: 0.9916
 - Valid loss: 0.0287, accu: 0.9908
Epoch 17/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.29it/s,  loss: 0.1812, accu: 0.9333]
valid: 100%|██████████| 810/810 [00:03<00:00, 221.26it/s,  loss: 0.0107, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0253, accu: 0.9916
 - Valid loss: 0.1005, accu: 0.9701
Epoch 18/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.40it/s,  loss: 0.0151, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 220.96it/s,  loss: 0.0005, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0240, accu: 0.9920
 - Valid loss: 0.0261, accu: 0.9959
Epoch 19/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.29it/s,  loss: 0.0312, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 220.11it/s,  loss: 0.0174, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0234, accu: 0.9922
 - Valid loss: 0.0366, accu: 0.9877
Epoch 20/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.25it/s,  loss: 0.0126, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 220.35it/s,  loss: 0.0068, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0221, accu: 0.9923
 - Valid loss: 0.0245, accu: 0.9952
Epoch 21/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.42it/s,  loss: 0.0060, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 220.36it/s,  loss: 0.0090, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0224, accu: 0.9926
 - Valid loss: 0.0391, accu: 0.9864
Epoch 22/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.34it/s,  loss: 0.0319, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 220.98it/s,  loss: 0.0142, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0207, accu: 0.9933
 - Valid loss: 0.0257, accu: 0.9931
Epoch 23/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.34it/s,  loss: 0.0157, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 220.52it/s,  loss: 0.0006, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0208, accu: 0.9930
 - Valid loss: 0.0239, accu: 0.9961
Epoch 24/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.48it/s,  loss: 0.0102, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 221.27it/s,  loss: 0.3277, accu: 0.7500]


Summary epoch:
 - Train loss: 0.0209, accu: 0.9932
 - Valid loss: 0.0263, accu: 0.9930
Epoch 25/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.35it/s,  loss: 0.0007, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 221.38it/s,  loss: 0.0426, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0186, accu: 0.9937
 - Valid loss: 0.0290, accu: 0.9922
Epoch 26/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.52it/s,  loss: 0.0008, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 220.85it/s,  loss: 0.0000, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0191, accu: 0.9939
 - Valid loss: 0.0474, accu: 0.9884
Epoch 27/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.48it/s,  loss: 0.0001, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 220.11it/s,  loss: 0.0001, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0185, accu: 0.9939
 - Valid loss: 0.0279, accu: 0.9925
Epoch 28/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.51it/s,  loss: 0.0003, accu: 1.0000]
valid: 100%|██████████| 810/810 [00:03<00:00, 221.13it/s,  loss: 0.0199, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0188, accu: 0.9940
 - Valid loss: 0.0283, accu: 0.9931
Epoch 29/29


train: 100%|██████████| 3237/3237 [00:46<00:00, 69.53it/s,  loss: 0.1341, accu: 0.9333]
valid: 100%|██████████| 810/810 [00:03<00:00, 220.09it/s,  loss: 0.0095, accu: 1.0000]

Summary epoch:
 - Train loss: 0.0180, accu: 0.9942
 - Valid loss: 0.0248, accu: 0.9927





# Our Dataset Video Classification

In [28]:
### Reference from: https://github.com/yysijie/st-gcn/blob/master/net/utils/graph.py

import os
import torch
import numpy as np


class Graph:
    """The Graph to model the skeletons extracted by the Alpha-Pose.
    Args:
        - strategy: (string) must be one of the follow candidates
            - uniform: Uniform Labeling,
            - distance: Distance Partitioning,
            - spatial: Spatial Configuration,
        For more information, please refer to the section 'Partition Strategies'
            in our paper (https://arxiv.org/abs/1801.07455).
        - layout: (string) must be one of the follow candidates
            - coco_cut: Is COCO format but cut 4 joints (L-R ears, L-R eyes) out.
        - max_hop: (int) the maximal distance between two connected nodes.
        - dilation: (int) controls the spacing between the kernel points.
    """
    def __init__(self,
                 layout='coco_cut',
                 strategy='uniform',
                 max_hop=1,
                 dilation=1):
        self.max_hop = max_hop
        self.dilation = dilation

        self.get_edge(layout)
        self.hop_dis = get_hop_distance(self.num_node, self.edge, max_hop)
        self.get_adjacency(strategy)

    def get_edge(self, layout):
        if layout == 'coco_cut':
            self.num_node = 14
            self_link = [(i, i) for i in range(self.num_node)]
            neighbor_link = [(6, 4), (4, 2), (2, 13), (13, 1), (5, 3), (3, 1), (12, 10),
                             (10, 8), (8, 2), (11, 9), (9, 7), (7, 1), (13, 0)]
            self.edge = self_link + neighbor_link
            self.center = 13
            
        elif layout == 'mediapipe_KSL':
            self.num_node = 47
            self_link = [(i, i) for i in range(self.num_node)]
            # used_key_points=
            # [0,11,12,13,14]+[i for i in range(33,33+21)] + [i for i in range(54,54+21)] 
            neighbor_link = [(0,1),(0,2),(1,3),(2,4),(3,26),(4,5), # nose-arms-wrist
                             
                             (5,6),(6,7),(7,8),(8,9),
                             (5,10),(10,11),(11,12),(12,13),
                             (5,14),(14,15),(15,16),(16,17),
                             (5,18),(18,19),(19,20),(20,21),
                             (5,22),(22,23),(23,24),(24,25),
                             
                             (26,27),(27,28),(28,29),(29,30),
                             (26,31),(31,32),(32,33),(33,34),
                             (26,35),(35,36),(36,37),(37,38),
                             (26,39),(39,40),(40,41),(41,42),
                             (26,43),(43,44),(44,45),(45,46),
                            ]
            self.edge = self_link + neighbor_link
            self.center = 0
        else:
            raise ValueError('This layout is not supported!')

    def get_adjacency(self, strategy):
        valid_hop = range(0, self.max_hop + 1, self.dilation)
        adjacency = np.zeros((self.num_node, self.num_node))
        for hop in valid_hop:
            adjacency[self.hop_dis == hop] = 1
        normalize_adjacency = normalize_digraph(adjacency)

        if strategy == 'uniform':
            A = np.zeros((1, self.num_node, self.num_node))
            A[0] = normalize_adjacency
            self.A = A
        elif strategy == 'distance':
            A = np.zeros((len(valid_hop), self.num_node, self.num_node))
            for i, hop in enumerate(valid_hop):
                A[i][self.hop_dis == hop] = normalize_adjacency[self.hop_dis ==
                                                                hop]
            self.A = A
        elif strategy == 'spatial':
            A = []
            for hop in valid_hop:
                a_root = np.zeros((self.num_node, self.num_node))
                a_close = np.zeros((self.num_node, self.num_node))
                a_further = np.zeros((self.num_node, self.num_node))
                for i in range(self.num_node):
                    for j in range(self.num_node):
                        if self.hop_dis[j, i] == hop:
                            if self.hop_dis[j, self.center] == self.hop_dis[i, self.center]:
                                a_root[j, i] = normalize_adjacency[j, i]
                            elif self.hop_dis[j, self.center] > self.hop_dis[i, self.center]:
                                a_close[j, i] = normalize_adjacency[j, i]
                            else:
                                a_further[j, i] = normalize_adjacency[j, i]
                if hop == 0:
                    A.append(a_root)
                else:
                    A.append(a_root + a_close)
                    A.append(a_further)
            A = np.stack(A)
            self.A = A
            #self.A = np.swapaxes(np.swapaxes(A, 0, 1), 1, 2)
        else:
            raise ValueError("This strategy is not supported!")


def get_hop_distance(num_node, edge, max_hop=1):
    A = np.zeros((num_node, num_node))
    for i, j in edge:
        A[j, i] = 1
        A[i, j] = 1

    # compute hop steps
    hop_dis = np.zeros((num_node, num_node)) + np.inf
    transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)]
    arrive_mat = (np.stack(transfer_mat) > 0)
    for d in range(max_hop, -1, -1):
        hop_dis[arrive_mat[d]] = d
    return hop_dis


def normalize_digraph(A):
    Dl = np.sum(A, 0)
    num_node = A.shape[0]
    Dn = np.zeros((num_node, num_node))
    for i in range(num_node):
        if Dl[i] > 0:
            Dn[i, i] = Dl[i]**(-1)
    AD = np.dot(A, Dn)
    return AD


def normalize_undigraph(A):
    Dl = np.sum(A, 0)
    num_node = A.shape[0]
    Dn = np.zeros((num_node, num_node))
    for i in range(num_node):
        if Dl[i] > 0:
            Dn[i, i] = Dl[i]**(-0.5)
    DAD = np.dot(np.dot(Dn, A), Dn)
    return DAD

In [29]:
### Reference from: https://github.com/yysijie/st-gcn/tree/master/net

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

# from Actionsrecognition.Utils import Graph


class GraphConvolution(nn.Module):
    """The basic module for applying a graph convolution.
    Args:
        - in_channel: (int) Number of channels in the input sequence data.
        - out_channels: (int) Number of channels produced by the convolution.
        - kernel_size: (int) Size of the graph convolving kernel.
        - t_kernel_size: (int) Size of the temporal convolving kernel.
        - t_stride: (int, optional) Stride of the temporal convolution. Default: 1
        - t_padding: (int, optional) Temporal zero-padding added to both sides of
            the input. Default: 0
        - t_dilation: (int, optional) Spacing between temporal kernel elements. Default: 1
        - bias: (bool, optional) If `True`, adds a learnable bias to the output.
            Default: `True`
    Shape:
        - Inputs x: Graph sequence in :math:`(N, in_channels, T_{in}, V)`,
                 A: Graph adjacency matrix in :math:`(K, V, V)`,
        - Output: Graph sequence out in :math:`(N, out_channels, T_{out}, V)`

            where
                :math:`N` is a batch size,
                :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
                :math:`T_{in}/T_{out}` is a length of input/output sequence,
                :math:`V` is the number of graph nodes.

    """
    def __init__(self, in_channels, out_channels, kernel_size,
                 t_kernel_size=1,
                 t_stride=1,
                 t_padding=0,
                 t_dilation=1,
                 bias=True):
        super().__init__()

        self.kernel_size = kernel_size
        self.conv = nn.Conv2d(in_channels,
                              out_channels * kernel_size,
                              kernel_size=(t_kernel_size, 1),
                              padding=(t_padding, 0),
                              stride=(t_stride, 1),
                              dilation=(t_dilation, 1),
                              bias=bias)

    def forward(self, x, A):
        x = self.conv(x)
        n, kc, t, v = x.size()
        x = x.view(n, self.kernel_size, kc//self.kernel_size, t, v)
        x = torch.einsum('nkctv,kvw->nctw', (x, A))

        return x.contiguous()


class st_gcn(nn.Module):
    """Applies a spatial temporal graph convolution over an input graph sequence.
    Args:
        - in_channels: (int) Number of channels in the input sequence data.
        - out_channels: (int) Number of channels produced by the convolution.
        - kernel_size: (tuple) Size of the temporal convolving kernel and
            graph convolving kernel.
        - stride: (int, optional) Stride of the temporal convolution. Default: 1
        - dropout: (int, optional) Dropout rate of the final output. Default: 0
        - residual: (bool, optional) If `True`, applies a residual mechanism.
            Default: `True`
    Shape:
        - Inputs x: Graph sequence in :math: `(N, in_channels, T_{in}, V)`,
                 A: Graph Adjecency matrix in :math: `(K, V, V)`,
        - Output: Graph sequence out in :math: `(N, out_channels, T_{out}, V)`
            where
                :math:`N` is a batch size,
                :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
                :math:`T_{in}/T_{out}` is a length of input/output sequence,
                :math:`V` is the number of graph nodes.
    """
    def __init__(self, in_channels, out_channels, kernel_size,
                 stride=1,
                 dropout=0,
                 residual=True):
        super().__init__()
        assert len(kernel_size) == 2
        assert kernel_size[0] % 2 == 1
        #print(kernel_size)(9, 3)
        padding = ((kernel_size[0] - 1) // 2, 0)
        #print(padding)(4, 0)

        self.gcn = GraphConvolution(in_channels, out_channels, kernel_size[1])
        self.tcn = nn.Sequential(nn.BatchNorm2d(out_channels),
                                 nn.ReLU(inplace=True),
                                 nn.Conv2d(out_channels,
                                           out_channels,
                                           (kernel_size[0], 1),
                                           (stride, 1),
                                           padding),
                                 nn.BatchNorm2d(out_channels),
                                 nn.Dropout(dropout, inplace=True),
                                 )

        if not residual:
            self.residual = lambda x: 0
        elif (in_channels == out_channels) and (stride == 1):
            self.residual = lambda x: x
        else:
            self.residual = nn.Sequential(nn.Conv2d(in_channels,
                                                    out_channels,
                                                    kernel_size=1,
                                                    stride=(stride, 1)),
                                          nn.BatchNorm2d(out_channels)
                                          )
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x, A):
        res = self.residual(x)
        # print(res)
        x = self.gcn(x, A)
        #print("x_in:",x.size())
        x = self.tcn(x) + res
        #print("x_out:",x.size())
        return self.relu(x)


class StreamSpatialTemporalGraph(nn.Module):
    """Spatial temporal graph convolutional networks.
    Args:
        - in_channels: (int) Number of input channels.
        - graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class.
        - num_class: (int) Number of class outputs. If `None` return pooling features of
            the last st-gcn layer instead.
        - edge_importance_weighting: (bool) If `True`, adds a learnable importance
            weighting to the edges of the graph.
        - **kwargs: (optional) Other parameters for graph convolution units.
    Shape:
        - Input: :math:`(N, in_channels, T_{in}, V_{in})`
        - Output: :math:`(N, num_class)` where
            :math:`N` is a batch size,
            :math:`T_{in}` is a length of input sequence,
            :math:`V_{in}` is the number of graph nodes,
        or If num_class is `None`: `(N, out_channels)`
            :math:`out_channels` is number of out_channels of the last layer.
    """
    def __init__(self, in_channels, graph_args, num_class=None,
                 edge_importance_weighting=True, **kwargs):
        super().__init__()
        # Load graph.
        graph = Graph(**graph_args)
        A = torch.tensor(graph.A, dtype=torch.float32, requires_grad=False)
        self.register_buffer('A', A)

        # Networks.
        spatial_kernel_size = A.size(0)
        temporal_kernel_size = 9
        kernel_size = (temporal_kernel_size, spatial_kernel_size)
        kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'}

        self.data_bn = nn.BatchNorm1d(in_channels * A.size(1))
        self.st_gcn_networks = nn.ModuleList((
            st_gcn(in_channels, 64, kernel_size, 1, residual=False, **kwargs0),
            st_gcn(64, 64, kernel_size, 1, **kwargs),
            st_gcn(64, 64, kernel_size, 1, **kwargs),
            st_gcn(64, 64, kernel_size, 1, **kwargs),
            st_gcn(64, 128, kernel_size, 2, **kwargs),
            st_gcn(128, 128, kernel_size, 1, **kwargs),
            st_gcn(128, 128, kernel_size, 1, **kwargs),
            st_gcn(128, 256, kernel_size, 2, **kwargs),
            st_gcn(256, 256, kernel_size, 1, **kwargs),
            st_gcn(256, 256, kernel_size, 1, **kwargs)
        ))

        # initialize parameters for edge importance weighting.
        if edge_importance_weighting:
            self.edge_importance = nn.ParameterList([
                nn.Parameter(torch.ones(A.size()))
                for i in self.st_gcn_networks
            ])
        else:
            self.edge_importance = [1] * len(self.st_gcn_networks)

        if num_class is not None:
            self.cls = nn.Conv2d(256, num_class, kernel_size=1)
        else:
            self.cls = lambda x: x

    def forward(self, x):
        # data normalization.
        N, C, T, V = x.size()
        x = x.permute(0, 3, 1, 2).contiguous()  # (N, V, C, T)
        x = x.view(N, V * C, T)
        x = self.data_bn(x)
        x = x.view(N, V, C, T)
        x = x.permute(0, 2, 3, 1).contiguous()
        x = x.view(N, C, T, V)

        # forward.
        for gcn, importance in zip(self.st_gcn_networks, self.edge_importance):
            x = gcn(x, self.A * importance)

        x = F.avg_pool2d(x, x.size()[2:])
        x = self.cls(x)
        x = x.view(x.size(0), -1)

        return x


class TwoStreamSpatialTemporalGraph(nn.Module):
    """Two inputs spatial temporal graph convolutional networks.
    Args:
        - graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class.
        - num_class: (int) Number of class outputs.
        - edge_importance_weighting: (bool) If `True`, adds a learnable importance
            weighting to the edges of the graph.
        - **kwargs: (optional) Other parameters for graph convolution units.
    Shape:
        - Input: :tuple of math:`((N, 3, T, V), (N, 2, T, V))`
        for points and motions stream where.
            :math:`N` is a batch size,
            :math:`in_channels` is data channels (3 is (x, y, score)), (2 is (mot_x, mot_y))
            :math:`T` is a length of input sequence,
            :math:`V` is the number of graph nodes,
        - Output: :math:`(N, num_class)`
    """
    def __init__(self, graph_args, num_class, edge_importance_weighting=True,
                 **kwargs):
        super().__init__()
        self.pts_stream = StreamSpatialTemporalGraph(3, graph_args, None,
                                                     edge_importance_weighting,
                                                     **kwargs)
        self.mot_stream = StreamSpatialTemporalGraph(3, graph_args, None,
                                                     edge_importance_weighting,
                                                     **kwargs)

        self.fcn = nn.Linear(256 * 2, num_class)
        # self.fcn = nn.Linear(256 , num_class)
        
        # self.atten1 = nn.Linear(256 * 2, 128)
        # self.atten_bn = nn.BatchNorm1d(128)
        # self.atten_relu= nn.ReLU(inplace=True)
        # self.atten2 = nn.Linear(128,32)
        # self.atten_relu2= nn.ReLU(inplace=True)
        # self.atten3 = nn.Linear(32, 256 * 2)
        # self.atten_act = nn.Sigmoid() 
        
    def forward(self, inputs):
        out1 = self.pts_stream(inputs[0])
        out2 = self.mot_stream(inputs[1])
        
        #print(out1.size())torch.Size([32, 256])
        #print(out2.size())torch.Size([32, 256])
        concat = torch.cat([out1, out2], dim=-1)
        
        # attn = self.atten1(concat)
        # attn = self.atten_bn(attn)
        # attn = self.atten_relu(attn)
        # attn = self.atten2(attn)
        # attn = self.atten_relu2(attn)
        # attn = self.atten3(attn)
        # attn = self.atten_act(attn)
        # concat = concat * attn
        
        
        out = self.fcn(concat)
        # out = self.fcn(out1)
        
        return out
        # return torch.sigmoid(out)
        #return F.softmax(out,dim=-1)

In [30]:
import os
import time
import torch
import pickle
import numpy as np
import torch.nn.functional as F
from shutil import copyfile
from tqdm import tqdm
from torch.utils import data
from torch.optim.adadelta import Adadelta
from sklearn.model_selection import train_test_split

#device = 'cuda'
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print("using", device, "device")

epochs = 30
batch_size = 32 #32

def load_dataset(data_files, batch_size, split_size=0.2,used_key_points=None):#0.2
    """Load data files into torch DataLoader with/without spliting train-test.
    """
    features, labels = [], []
    for fil in data_files:
        with open(fil, 'rb') as f:
            fts, lbs = pickle.load(f)
            features.append(fts)
            labels.append(lbs)
        del fts, lbs
    features = np.concatenate(features, axis=0)
    labels = np.concatenate(labels, axis=0)
    
    if used_key_points != None:
        features = features[:,:,:,used_key_points]
    print(features.shape)
    print(labels.shape)

    if split_size > 0:
        x_train, x_valid, y_train, y_valid = train_test_split(features, labels, test_size=split_size,random_state=0,stratify=labels)
        
        train_set = data.TensorDataset(torch.tensor(x_train, dtype=torch.float32),torch.tensor(y_train, dtype=torch.int64))
        valid_set = data.TensorDataset(torch.tensor(x_valid, dtype=torch.float32),torch.tensor(y_valid, dtype=torch.int64))
        train_loader = data.DataLoader(train_set, batch_size, shuffle=True)
        valid_loader = data.DataLoader(valid_set, batch_size)
    else:
        train_set = data.TensorDataset(torch.tensor(features, dtype=torch.float32),torch.tensor(labels, dtype=torch.int64))
        train_loader = data.DataLoader(train_set, batch_size, shuffle=True)
        valid_loader = None
    return train_loader, valid_loader

def accuracy_batch(y_pred, y_true):
    # print(y_pred.shape,y_true.shape)
    # return (y_pred.argmax(1) == y_true.argmax(1)).mean()
    return (y_pred.argmax(1) == y_true).mean()


def set_training(model, mode=True):
    for p in model.parameters():
        p.requires_grad = mode
    model.train(mode)
    return model

save_folder = os.path.join(os.environ['HOME'],"KSL_V2/Outputs")
os.makedirs(save_folder,exist_ok=True)
used_key_points=[0,11,12,13,14]+[i for i in range(33,33+21)] + [i for i in range(54,54+21)] 
train_loader, valid_loader = load_dataset([os.path.join(os.environ['HOME'],"KSL_V2/Datasets/KSL_116_dataset.pkl")], 32,0.2,used_key_points) #batch_size = 32
dataloader = {'train': train_loader, 'valid': valid_loader}
num_class=20

graph_args = {'layout':'mediapipe_KSL','strategy': 'spatial'}
model = TwoStreamSpatialTemporalGraph(graph_args, num_class).to(device)
n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"number of params: {n_parameters}")
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001)
losser = torch.nn.CrossEntropyLoss()

loss_list = {'train': [], 'valid': []}
accu_list = {'train': [], 'valid': []}
best_acc = -1
for e in range(epochs):
    print('Epoch {}/{}'.format(e, epochs - 1))
    for phase in ['train', 'valid']:
        if phase == 'train':
            model = set_training(model, True)
        else:
            model = set_training(model, False)

        run_loss = 0.0
        run_accu = 0.0
        with tqdm(dataloader[phase], desc=phase) as iterator:
            for pts, lbs in iterator:
                # Create motion input by distance of points (x, y) of the same node
                # in two frames.
                mot = pts[:, :, 1:, :] - pts[:, :, :-1, :]

                mot = mot.to(device)
                pts = pts.to(device)
                lbs = lbs.to(device)
                
                # Forward.
                out = model((pts, mot))
                #print(lbs)

                #print(out)
                loss = losser(out, lbs)

                if phase == 'train':
                    # Backward.
                    model.zero_grad()
                    loss.backward()
                    optimizer.step()

                run_loss += loss.item()
                accu = accuracy_batch(out.detach().cpu().numpy(),
                                      lbs.detach().cpu().numpy())
                run_accu += accu

                iterator.set_postfix_str(' loss: {:.4f}, accu: {:.4f}'.format(
                    loss.item(), accu))
                iterator.update()
                #break
        loss_list[phase].append(run_loss / len(iterator))
        accu_list[phase].append(run_accu / len(iterator))
        #print(accu_list)
        #print(torch.max(accu_list))
    if(best_acc < accu_list['valid'][-1]):
        best_acc = accu_list['valid'][-1]
        torch.save(model.state_dict(), os.path.join(save_folder, 'tsstg-model_best.pth'))
        #break

    print('Summary epoch:\n - Train loss: {:.4f}, accu: {:.4f}\n - Valid loss:'
          ' {:.4f}, accu: {:.4f}'.format(loss_list['train'][-1], accu_list['train'][-1],
                                         loss_list['valid'][-1], accu_list['valid'][-1]))
del model

using cuda device
(17983, 3, 116, 47)
(17983,)
number of params: 6272388
Epoch 0/29


train: 100%|██████████| 450/450 [01:43<00:00,  4.34it/s,  loss: 0.7064, accu: 0.7222]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.66it/s,  loss: 0.9989, accu: 0.5385]


Summary epoch:
 - Train loss: 1.1090, accu: 0.6188
 - Valid loss: 0.7139, accu: 0.7365
Epoch 1/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.31it/s,  loss: 0.5889, accu: 0.8333]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.60it/s,  loss: 1.4659, accu: 0.6923]


Summary epoch:
 - Train loss: 0.3621, accu: 0.8714
 - Valid loss: 1.2937, accu: 0.6690
Epoch 2/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.30it/s,  loss: 0.7709, accu: 0.8333]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.62it/s,  loss: 0.0841, accu: 0.9231]


Summary epoch:
 - Train loss: 0.1945, accu: 0.9330
 - Valid loss: 0.2577, accu: 0.9028
Epoch 3/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.30it/s,  loss: 0.1026, accu: 0.9444]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.60it/s,  loss: 0.0993, accu: 0.9231]


Summary epoch:
 - Train loss: 0.1069, accu: 0.9645
 - Valid loss: 0.0718, accu: 0.9794
Epoch 4/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.31it/s,  loss: 0.0146, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.64it/s,  loss: 0.0113, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0819, accu: 0.9733
 - Valid loss: 0.0471, accu: 0.9798
Epoch 5/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.31it/s,  loss: 0.0785, accu: 0.9444]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.63it/s,  loss: 0.0142, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0610, accu: 0.9802
 - Valid loss: 0.0794, accu: 0.9679
Epoch 6/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.30it/s,  loss: 0.0069, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.62it/s,  loss: 0.0010, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0526, accu: 0.9847
 - Valid loss: 0.0123, accu: 0.9970
Epoch 7/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.30it/s,  loss: 0.0080, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.62it/s,  loss: 0.0005, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0338, accu: 0.9888
 - Valid loss: 0.0046, accu: 0.9997
Epoch 8/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.30it/s,  loss: 0.0084, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.62it/s,  loss: 0.0011, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0329, accu: 0.9897
 - Valid loss: 0.0037, accu: 0.9997
Epoch 9/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.30it/s,  loss: 0.4532, accu: 0.8333]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.62it/s,  loss: 1.3023, accu: 0.6923]


Summary epoch:
 - Train loss: 0.0307, accu: 0.9905
 - Valid loss: 0.7841, accu: 0.8037
Epoch 10/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.30it/s,  loss: 0.0025, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.62it/s,  loss: 0.0028, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0290, accu: 0.9923
 - Valid loss: 0.0199, accu: 0.9934
Epoch 11/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.30it/s,  loss: 0.0009, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.63it/s,  loss: 0.0003, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0205, accu: 0.9929
 - Valid loss: 0.0133, accu: 0.9945
Epoch 12/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.31it/s,  loss: 0.0279, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.62it/s,  loss: 0.0021, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0218, accu: 0.9933
 - Valid loss: 0.0187, accu: 0.9950
Epoch 13/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.31it/s,  loss: 0.0023, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.63it/s,  loss: 0.0005, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0211, accu: 0.9938
 - Valid loss: 0.0017, accu: 1.0000
Epoch 14/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.31it/s,  loss: 0.0008, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.62it/s,  loss: 0.0089, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0167, accu: 0.9951
 - Valid loss: 0.3571, accu: 0.9533
Epoch 15/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.31it/s,  loss: 0.0094, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.63it/s,  loss: 0.0023, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0212, accu: 0.9942
 - Valid loss: 0.0097, accu: 0.9961
Epoch 16/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.31it/s,  loss: 0.0034, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.63it/s,  loss: 0.0006, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0147, accu: 0.9955
 - Valid loss: 0.0022, accu: 0.9994
Epoch 17/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.31it/s,  loss: 0.0005, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.61it/s,  loss: 0.0000, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0146, accu: 0.9962
 - Valid loss: 0.0017, accu: 1.0000
Epoch 18/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.31it/s,  loss: 0.0009, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.63it/s,  loss: 0.0010, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0182, accu: 0.9944
 - Valid loss: 0.0134, accu: 0.9942
Epoch 19/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.30it/s,  loss: 0.0001, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.62it/s,  loss: 0.0002, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0128, accu: 0.9964
 - Valid loss: 0.0039, accu: 0.9997
Epoch 20/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.30it/s,  loss: 0.0045, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.63it/s,  loss: 0.0001, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0085, accu: 0.9972
 - Valid loss: 0.0032, accu: 0.9986
Epoch 21/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.31it/s,  loss: 0.0000, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.63it/s,  loss: 0.0003, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0086, accu: 0.9972
 - Valid loss: 0.0005, accu: 1.0000
Epoch 22/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.30it/s,  loss: 0.0045, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.62it/s,  loss: 0.0000, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0115, accu: 0.9964
 - Valid loss: 0.0025, accu: 0.9994
Epoch 23/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.30it/s,  loss: 0.0190, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.62it/s,  loss: 0.0001, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0129, accu: 0.9967
 - Valid loss: 0.0062, accu: 0.9975
Epoch 24/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.30it/s,  loss: 0.0002, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.62it/s,  loss: 0.0002, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0134, accu: 0.9958
 - Valid loss: 0.0107, accu: 0.9967
Epoch 25/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.31it/s,  loss: 0.0000, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.63it/s,  loss: 0.0000, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0094, accu: 0.9968
 - Valid loss: 0.0002, accu: 1.0000
Epoch 26/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.30it/s,  loss: 0.0004, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.63it/s,  loss: 0.0002, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0119, accu: 0.9969
 - Valid loss: 0.0015, accu: 0.9997
Epoch 27/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.31it/s,  loss: 0.0002, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.63it/s,  loss: 0.0001, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0099, accu: 0.9972
 - Valid loss: 0.0026, accu: 0.9986
Epoch 28/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.31it/s,  loss: 0.0014, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.63it/s,  loss: 0.0000, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0057, accu: 0.9978
 - Valid loss: 0.0007, accu: 1.0000
Epoch 29/29


train: 100%|██████████| 450/450 [01:44<00:00,  4.30it/s,  loss: 0.0235, accu: 1.0000]
valid: 100%|██████████| 113/113 [00:08<00:00, 12.60it/s,  loss: 0.0007, accu: 1.0000]

Summary epoch:
 - Train loss: 0.0065, accu: 0.9978
 - Valid loss: 0.0020, accu: 0.9989





# KSL77 Dataset ImageClassification

In [1]:
### Reference from: https://github.com/yysijie/st-gcn/blob/master/net/utils/graph.py

import os
import torch
import numpy as np


class Graph:
    """The Graph to model the skeletons extracted by the Alpha-Pose.
    Args:
        - strategy: (string) must be one of the follow candidates
            - uniform: Uniform Labeling,
            - distance: Distance Partitioning,
            - spatial: Spatial Configuration,
        For more information, please refer to the section 'Partition Strategies'
            in our paper (https://arxiv.org/abs/1801.07455).
        - layout: (string) must be one of the follow candidates
            - coco_cut: Is COCO format but cut 4 joints (L-R ears, L-R eyes) out.
        - max_hop: (int) the maximal distance between two connected nodes.
        - dilation: (int) controls the spacing between the kernel points.
    """
    def __init__(self,
                 layout='coco_cut',
                 strategy='uniform',
                 max_hop=1,
                 dilation=1):
        self.max_hop = max_hop
        self.dilation = dilation

        self.get_edge(layout)
        self.hop_dis = get_hop_distance(self.num_node, self.edge, max_hop)
        self.get_adjacency(strategy)

    def get_edge(self, layout):
        if layout == 'coco_cut':
            self.num_node = 14
            self_link = [(i, i) for i in range(self.num_node)]
            neighbor_link = [(6, 4), (4, 2), (2, 13), (13, 1), (5, 3), (3, 1), (12, 10),
                             (10, 8), (8, 2), (11, 9), (9, 7), (7, 1), (13, 0)]
            self.edge = self_link + neighbor_link
            self.center = 13
            
        elif layout == 'mediapipe_KSL':
            self.num_node = 47
            self_link = [(i, i) for i in range(self.num_node)]
            # used_key_points=
            # [0,11,12,13,14]+[i for i in range(33,33+21)] + [i for i in range(54,54+21)] 
            neighbor_link = [(0,1),(0,2),(1,3),(2,4),(3,26),(4,5), # nose-arms-wrist
                             
                             (5,6),(6,7),(7,8),(8,9),
                             (5,10),(10,11),(11,12),(12,13),
                             (5,14),(14,15),(15,16),(16,17),
                             (5,18),(18,19),(19,20),(20,21),
                             (5,22),(22,23),(23,24),(24,25),
                             
                             (26,27),(27,28),(28,29),(29,30),
                             (26,31),(31,32),(32,33),(33,34),
                             (26,35),(35,36),(36,37),(37,38),
                             (26,39),(39,40),(40,41),(41,42),
                             (26,43),(43,44),(44,45),(45,46),
                            ]
            self.edge = self_link + neighbor_link
            self.center = 0
        else:
            raise ValueError('This layout is not supported!')

    def get_adjacency(self, strategy):
        valid_hop = range(0, self.max_hop + 1, self.dilation)
        adjacency = np.zeros((self.num_node, self.num_node))
        for hop in valid_hop:
            adjacency[self.hop_dis == hop] = 1
        normalize_adjacency = normalize_digraph(adjacency)

        if strategy == 'uniform':
            A = np.zeros((1, self.num_node, self.num_node))
            A[0] = normalize_adjacency
            self.A = A
        elif strategy == 'distance':
            A = np.zeros((len(valid_hop), self.num_node, self.num_node))
            for i, hop in enumerate(valid_hop):
                A[i][self.hop_dis == hop] = normalize_adjacency[self.hop_dis ==
                                                                hop]
            self.A = A
        elif strategy == 'spatial':
            A = []
            for hop in valid_hop:
                a_root = np.zeros((self.num_node, self.num_node))
                a_close = np.zeros((self.num_node, self.num_node))
                a_further = np.zeros((self.num_node, self.num_node))
                for i in range(self.num_node):
                    for j in range(self.num_node):
                        if self.hop_dis[j, i] == hop:
                            if self.hop_dis[j, self.center] == self.hop_dis[i, self.center]:
                                a_root[j, i] = normalize_adjacency[j, i]
                            elif self.hop_dis[j, self.center] > self.hop_dis[i, self.center]:
                                a_close[j, i] = normalize_adjacency[j, i]
                            else:
                                a_further[j, i] = normalize_adjacency[j, i]
                if hop == 0:
                    A.append(a_root)
                else:
                    A.append(a_root + a_close)
                    A.append(a_further)
            A = np.stack(A)
            self.A = A
            #self.A = np.swapaxes(np.swapaxes(A, 0, 1), 1, 2)
        else:
            raise ValueError("This strategy is not supported!")


def get_hop_distance(num_node, edge, max_hop=1):
    A = np.zeros((num_node, num_node))
    for i, j in edge:
        A[j, i] = 1
        A[i, j] = 1

    # compute hop steps
    hop_dis = np.zeros((num_node, num_node)) + np.inf
    transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)]
    arrive_mat = (np.stack(transfer_mat) > 0)
    for d in range(max_hop, -1, -1):
        hop_dis[arrive_mat[d]] = d
    return hop_dis


def normalize_digraph(A):
    Dl = np.sum(A, 0)
    num_node = A.shape[0]
    Dn = np.zeros((num_node, num_node))
    for i in range(num_node):
        if Dl[i] > 0:
            Dn[i, i] = Dl[i]**(-1)
    AD = np.dot(A, Dn)
    return AD


def normalize_undigraph(A):
    Dl = np.sum(A, 0)
    num_node = A.shape[0]
    Dn = np.zeros((num_node, num_node))
    for i in range(num_node):
        if Dl[i] > 0:
            Dn[i, i] = Dl[i]**(-0.5)
    DAD = np.dot(np.dot(Dn, A), Dn)
    return DAD

In [2]:
### Reference from: https://github.com/yysijie/st-gcn/tree/master/net

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

# from Actionsrecognition.Utils import Graph


class GraphConvolution(nn.Module):
    """The basic module for applying a graph convolution.
    Args:
        - in_channel: (int) Number of channels in the input sequence data.
        - out_channels: (int) Number of channels produced by the convolution.
        - kernel_size: (int) Size of the graph convolving kernel.
        - t_kernel_size: (int) Size of the temporal convolving kernel.
        - t_stride: (int, optional) Stride of the temporal convolution. Default: 1
        - t_padding: (int, optional) Temporal zero-padding added to both sides of
            the input. Default: 0
        - t_dilation: (int, optional) Spacing between temporal kernel elements. Default: 1
        - bias: (bool, optional) If `True`, adds a learnable bias to the output.
            Default: `True`
    Shape:
        - Inputs x: Graph sequence in :math:`(N, in_channels, T_{in}, V)`,
                 A: Graph adjacency matrix in :math:`(K, V, V)`,
        - Output: Graph sequence out in :math:`(N, out_channels, T_{out}, V)`

            where
                :math:`N` is a batch size,
                :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
                :math:`T_{in}/T_{out}` is a length of input/output sequence,
                :math:`V` is the number of graph nodes.

    """
    def __init__(self, in_channels, out_channels, kernel_size,
                 t_kernel_size=1,
                 t_stride=1,
                 t_padding=0,
                 t_dilation=1,
                 bias=True):
        super().__init__()

        self.kernel_size = kernel_size
        self.conv = nn.Conv2d(in_channels,
                              out_channels * kernel_size,
                              kernel_size=(t_kernel_size, 1),
                              padding=(t_padding, 0),
                              stride=(t_stride, 1),
                              dilation=(t_dilation, 1),
                              bias=bias)

    def forward(self, x, A):
        x = self.conv(x)
        n, kc, t, v = x.size()
        x = x.view(n, self.kernel_size, kc//self.kernel_size, t, v)
        x = torch.einsum('nkctv,kvw->nctw', (x, A))

        return x.contiguous()


class st_gcn(nn.Module):
    """Applies a spatial temporal graph convolution over an input graph sequence.
    Args:
        - in_channels: (int) Number of channels in the input sequence data.
        - out_channels: (int) Number of channels produced by the convolution.
        - kernel_size: (tuple) Size of the temporal convolving kernel and
            graph convolving kernel.
        - stride: (int, optional) Stride of the temporal convolution. Default: 1
        - dropout: (int, optional) Dropout rate of the final output. Default: 0
        - residual: (bool, optional) If `True`, applies a residual mechanism.
            Default: `True`
    Shape:
        - Inputs x: Graph sequence in :math: `(N, in_channels, T_{in}, V)`,
                 A: Graph Adjecency matrix in :math: `(K, V, V)`,
        - Output: Graph sequence out in :math: `(N, out_channels, T_{out}, V)`
            where
                :math:`N` is a batch size,
                :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
                :math:`T_{in}/T_{out}` is a length of input/output sequence,
                :math:`V` is the number of graph nodes.
    """
    def __init__(self, in_channels, out_channels, kernel_size,
                 stride=1,
                 dropout=0,
                 residual=True):
        super().__init__()
        assert len(kernel_size) == 2
        assert kernel_size[0] % 2 == 1
        #print(kernel_size)(9, 3)
        padding = ((kernel_size[0] - 1) // 2, 0)
        #print(padding)(4, 0)

        self.gcn = GraphConvolution(in_channels, out_channels, kernel_size[1])
        self.tcn = nn.Sequential(nn.BatchNorm2d(out_channels),
                                 nn.ReLU(inplace=True),
                                 nn.Conv2d(out_channels,
                                           out_channels,
                                           (kernel_size[0], 1),
                                           (stride, 1),
                                           padding),
                                 nn.BatchNorm2d(out_channels),
                                 nn.Dropout(dropout, inplace=True),
                                 )

        if not residual:
            self.residual = lambda x: 0
        elif (in_channels == out_channels) and (stride == 1):
            self.residual = lambda x: x
        else:
            self.residual = nn.Sequential(nn.Conv2d(in_channels,
                                                    out_channels,
                                                    kernel_size=1,
                                                    stride=(stride, 1)),
                                          nn.BatchNorm2d(out_channels)
                                          )
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x, A):
        res = self.residual(x)
        # print(res)
        x = self.gcn(x, A)
        #print("x_in:",x.size())
        x = self.tcn(x) + res
        #print("x_out:",x.size())
        return self.relu(x)


class StreamSpatialTemporalGraph(nn.Module):
    """Spatial temporal graph convolutional networks.
    Args:
        - in_channels: (int) Number of input channels.
        - graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class.
        - num_class: (int) Number of class outputs. If `None` return pooling features of
            the last st-gcn layer instead.
        - edge_importance_weighting: (bool) If `True`, adds a learnable importance
            weighting to the edges of the graph.
        - **kwargs: (optional) Other parameters for graph convolution units.
    Shape:
        - Input: :math:`(N, in_channels, T_{in}, V_{in})`
        - Output: :math:`(N, num_class)` where
            :math:`N` is a batch size,
            :math:`T_{in}` is a length of input sequence,
            :math:`V_{in}` is the number of graph nodes,
        or If num_class is `None`: `(N, out_channels)`
            :math:`out_channels` is number of out_channels of the last layer.
    """
    def __init__(self, in_channels, graph_args, num_class=None,
                 edge_importance_weighting=True, **kwargs):
        super().__init__()
        # Load graph.
        graph = Graph(**graph_args)
        A = torch.tensor(graph.A, dtype=torch.float32, requires_grad=False)
        self.register_buffer('A', A)

        # Networks.
        spatial_kernel_size = A.size(0)
        temporal_kernel_size = 9
        kernel_size = (temporal_kernel_size, spatial_kernel_size)
        kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'}

        self.data_bn = nn.BatchNorm1d(in_channels * A.size(1))
        self.st_gcn_networks = nn.ModuleList((
            st_gcn(in_channels, 64, kernel_size, 1, residual=False, **kwargs0),
            st_gcn(64, 64, kernel_size, 1, **kwargs),
            st_gcn(64, 64, kernel_size, 1, **kwargs),
            st_gcn(64, 64, kernel_size, 1, **kwargs),
            st_gcn(64, 128, kernel_size, 2, **kwargs),
            st_gcn(128, 128, kernel_size, 1, **kwargs),
            st_gcn(128, 128, kernel_size, 1, **kwargs),
            st_gcn(128, 256, kernel_size, 2, **kwargs),
            st_gcn(256, 256, kernel_size, 1, **kwargs),
            st_gcn(256, 256, kernel_size, 1, **kwargs)
        ))

        # initialize parameters for edge importance weighting.
        if edge_importance_weighting:
            self.edge_importance = nn.ParameterList([
                nn.Parameter(torch.ones(A.size()))
                for i in self.st_gcn_networks
            ])
        else:
            self.edge_importance = [1] * len(self.st_gcn_networks)

        if num_class is not None:
            self.cls = nn.Conv2d(256, num_class, kernel_size=1)
        else:
            self.cls = lambda x: x

    def forward(self, x):
        # data normalization.
        N, C, T, V = x.size()
        x = x.permute(0, 3, 1, 2).contiguous()  # (N, V, C, T)
        x = x.view(N, V * C, T)
        x = self.data_bn(x)
        x = x.view(N, V, C, T)
        x = x.permute(0, 2, 3, 1).contiguous()
        x = x.view(N, C, T, V)

        # forward.
        for gcn, importance in zip(self.st_gcn_networks, self.edge_importance):
            x = gcn(x, self.A * importance)

        x = F.avg_pool2d(x, x.size()[2:])
        x = self.cls(x)
        x = x.view(x.size(0), -1)

        return x


class TwoStreamSpatialTemporalGraph(nn.Module):
    """Two inputs spatial temporal graph convolutional networks.
    Args:
        - graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class.
        - num_class: (int) Number of class outputs.
        - edge_importance_weighting: (bool) If `True`, adds a learnable importance
            weighting to the edges of the graph.
        - **kwargs: (optional) Other parameters for graph convolution units.
    Shape:
        - Input: :tuple of math:`((N, 3, T, V), (N, 2, T, V))`
        for points and motions stream where.
            :math:`N` is a batch size,
            :math:`in_channels` is data channels (3 is (x, y, score)), (2 is (mot_x, mot_y))
            :math:`T` is a length of input sequence,
            :math:`V` is the number of graph nodes,
        - Output: :math:`(N, num_class)`
    """
    def __init__(self, graph_args, num_class, edge_importance_weighting=True,
                 **kwargs):
        super().__init__()
        self.pts_stream = StreamSpatialTemporalGraph(3, graph_args, None,
                                                     edge_importance_weighting,
                                                     **kwargs)
        self.mot_stream = StreamSpatialTemporalGraph(3, graph_args, None,
                                                     edge_importance_weighting,
                                                     **kwargs)

        # self.fcn = nn.Linear(256 * 2, num_class)
        self.fcn = nn.Linear(256 , num_class)
        
        # self.atten1 = nn.Linear(256 * 2, 128)
        # self.atten_bn = nn.BatchNorm1d(128)
        # self.atten_relu= nn.ReLU(inplace=True)
        # self.atten2 = nn.Linear(128,32)
        # self.atten_relu2= nn.ReLU(inplace=True)
        # self.atten3 = nn.Linear(32, 256 * 2)
        # self.atten_act = nn.Sigmoid() 
        
    def forward(self, inputs):
        out1 = self.pts_stream(inputs[0])
        # out2 = self.mot_stream(inputs[1])
        
        #print(out1.size())torch.Size([32, 256])
        #print(out2.size())torch.Size([32, 256])
        # concat = torch.cat([out1, out2], dim=-1)
        
        # attn = self.atten1(concat)
        # attn = self.atten_bn(attn)
        # attn = self.atten_relu(attn)
        # attn = self.atten2(attn)
        # attn = self.atten_relu2(attn)
        # attn = self.atten3(attn)
        # attn = self.atten_act(attn)
        # concat = concat * attn
        
        
        # out = self.fcn(concat)
        out = self.fcn(out1)
        
        return out
        # return torch.sigmoid(out)
        #return F.softmax(out,dim=-1)

In [3]:
import os
import time
import torch
import pickle
import numpy as np
import torch.nn.functional as F
from shutil import copyfile
from tqdm import tqdm
from torch.utils import data
from torch.optim.adadelta import Adadelta
from sklearn.model_selection import train_test_split

#device = 'cuda'
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print("using", device, "device")

epochs = 100
batch_size = 64 #32

def load_dataset(data_files, batch_size, split_size=0.2,used_key_points=None):#0.2
    """Load data files into torch DataLoader with/without spliting train-test.
    """
    features, labels = [], []
    for fil in data_files:
        with open(fil, 'rb') as f:
            fts, lbs = pickle.load(f)
            features.append(fts)
            labels.append(lbs)
        del fts, lbs
    features = np.concatenate(features, axis=0)
    labels = np.concatenate(labels, axis=0)
    
    if used_key_points != None:
        features = features[:,:,:,used_key_points]
    print(features.shape)
    print(labels.shape)

    if split_size > 0:
        x_train, x_valid, y_train, y_valid = train_test_split(features, labels, test_size=split_size,random_state=0,stratify=labels)
        
        train_set = data.TensorDataset(torch.tensor(x_train, dtype=torch.float32),torch.tensor(y_train, dtype=torch.int64))
        valid_set = data.TensorDataset(torch.tensor(x_valid, dtype=torch.float32),torch.tensor(y_valid, dtype=torch.int64))
        train_loader = data.DataLoader(train_set, batch_size, shuffle=True)
        valid_loader = data.DataLoader(valid_set, batch_size)
    else:
        train_set = data.TensorDataset(torch.tensor(features, dtype=torch.float32),torch.tensor(labels, dtype=torch.int64))
        train_loader = data.DataLoader(train_set, batch_size, shuffle=True)
        valid_loader = None
    return train_loader, valid_loader

def accuracy_batch(y_pred, y_true):
    # print(y_pred.shape,y_true.shape)
    # return (y_pred.argmax(1) == y_true.argmax(1)).mean()
    return (y_pred.argmax(1) == y_true).mean()


def set_training(model, mode=True):
    for p in model.parameters():
        p.requires_grad = mode
    model.train(mode)
    return model

save_folder = os.path.join(os.environ['HOME'],"KSL_V2/Outputs")
os.makedirs(save_folder,exist_ok=True)
used_key_points=[0,11,12,13,14]+[i for i in range(33,33+21)] + [i for i in range(54,54+21)] 
train_loader, valid_loader = load_dataset([os.path.join(os.environ['HOME'],"KSL_V2/Datasets/KSL77_1_dataset.pkl")], 32,0.2,used_key_points) #batch_size = 32
dataloader = {'train': train_loader, 'valid': valid_loader}
num_class=77

graph_args = {'layout':'mediapipe_KSL','strategy': 'spatial'}
model = TwoStreamSpatialTemporalGraph(graph_args, num_class).to(device)
n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"number of params: {n_parameters}")
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.0001)
losser = torch.nn.CrossEntropyLoss()

loss_list = {'train': [], 'valid': []}
accu_list = {'train': [], 'valid': []}
best_acc = -1
for e in range(epochs):
    print('Epoch {}/{}'.format(e, epochs - 1))
    for phase in ['train', 'valid']:
        if phase == 'train':
            model = set_training(model, True)
        else:
            model = set_training(model, False)

        run_loss = 0.0
        run_accu = 0.0
        with tqdm(dataloader[phase], desc=phase) as iterator:
            for pts, lbs in iterator:
                # Create motion input by distance of points (x, y) of the same node
                # in two frames.
                mot = pts[:, :, 1:, :] - pts[:, :, :-1, :]

                mot = mot.to(device)
                pts = pts.to(device)
                lbs = lbs.to(device)
                
                # Forward.
                out = model((pts, mot))
                #print(lbs)

                #print(out)
                loss = losser(out, lbs)

                if phase == 'train':
                    # Backward.
                    model.zero_grad()
                    loss.backward()
                    optimizer.step()

                run_loss += loss.item()
                accu = accuracy_batch(out.detach().cpu().numpy(),
                                      lbs.detach().cpu().numpy())
                run_accu += accu

                iterator.set_postfix_str(' loss: {:.4f}, accu: {:.4f}'.format(
                    loss.item(), accu))
                iterator.update()
                #break
        loss_list[phase].append(run_loss / len(iterator))
        accu_list[phase].append(run_accu / len(iterator))
        #print(accu_list)
        #print(torch.max(accu_list))
    if(best_acc < accu_list['valid'][-1]):
        best_acc = accu_list['valid'][-1]
        torch.save(model.state_dict(), os.path.join(save_folder, 'tsstg-model_best.pth'))
        #break

    print('Summary epoch:\n - Train loss: {:.4f}, accu: {:.4f}\n - Valid loss:'
          ' {:.4f}, accu: {:.4f}'.format(loss_list['train'][-1], accu_list['train'][-1],
                                         loss_list['valid'][-1], accu_list['valid'][-1]))
del model

using cuda device
(108373, 3, 1, 47)
(108373,)
number of params: 6281917
Epoch 0/99


train: 100%|██████████| 2710/2710 [00:45<00:00, 59.85it/s,  loss: 3.3436, accu: 0.1000]
valid: 100%|██████████| 678/678 [00:03<00:00, 190.58it/s,  loss: 1.9834, accu: 0.3636]


Summary epoch:
 - Train loss: 2.9000, accu: 0.2425
 - Valid loss: 2.3800, accu: 0.3538
Epoch 1/99


train: 100%|██████████| 2710/2710 [00:44<00:00, 60.55it/s,  loss: 2.3424, accu: 0.4000]
valid: 100%|██████████| 678/678 [00:03<00:00, 191.18it/s,  loss: 1.7863, accu: 0.4545]


Summary epoch:
 - Train loss: 2.1478, accu: 0.4137
 - Valid loss: 2.1252, accu: 0.4200
Epoch 2/99


train: 100%|██████████| 2710/2710 [00:44<00:00, 60.41it/s,  loss: 1.4630, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 190.60it/s,  loss: 1.0020, accu: 0.7273]


Summary epoch:
 - Train loss: 1.7351, accu: 0.5220
 - Valid loss: 1.2949, accu: 0.6395
Epoch 3/99


train: 100%|██████████| 2710/2710 [00:44<00:00, 60.48it/s,  loss: 2.7902, accu: 0.3000]
valid: 100%|██████████| 678/678 [00:03<00:00, 191.13it/s,  loss: 1.7586, accu: 0.6364]


Summary epoch:
 - Train loss: 1.4600, accu: 0.5906
 - Valid loss: 1.7154, accu: 0.5171
Epoch 4/99


train: 100%|██████████| 2710/2710 [00:44<00:00, 60.43it/s,  loss: 1.5890, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 190.87it/s,  loss: 0.5235, accu: 0.9091]


Summary epoch:
 - Train loss: 1.2659, accu: 0.6448
 - Valid loss: 1.0196, accu: 0.7128
Epoch 5/99


train: 100%|██████████| 2710/2710 [00:39<00:00, 69.30it/s,  loss: 2.1312, accu: 0.4000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.86it/s,  loss: 1.1388, accu: 0.4545]


Summary epoch:
 - Train loss: 1.1187, accu: 0.6824
 - Valid loss: 0.9741, accu: 0.7222
Epoch 6/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.21it/s,  loss: 2.1703, accu: 0.4000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.12it/s,  loss: 0.5841, accu: 0.9091]


Summary epoch:
 - Train loss: 1.0088, accu: 0.7102
 - Valid loss: 0.8123, accu: 0.7717
Epoch 7/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.43it/s,  loss: 1.6991, accu: 0.5000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.25it/s,  loss: 0.8531, accu: 0.6364]


Summary epoch:
 - Train loss: 0.9284, accu: 0.7311
 - Valid loss: 0.8148, accu: 0.7683
Epoch 8/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.21it/s,  loss: 2.1276, accu: 0.4000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.99it/s,  loss: 1.7987, accu: 0.5455]


Summary epoch:
 - Train loss: 0.8580, accu: 0.7510
 - Valid loss: 1.5735, accu: 0.5567
Epoch 9/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.44it/s,  loss: 2.2445, accu: 0.3000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.94it/s,  loss: 0.3580, accu: 0.9091]


Summary epoch:
 - Train loss: 0.8041, accu: 0.7656
 - Valid loss: 0.6062, accu: 0.8249
Epoch 10/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.29it/s,  loss: 2.9110, accu: 0.3000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.64it/s,  loss: 0.1680, accu: 1.0000]


Summary epoch:
 - Train loss: 0.7573, accu: 0.7775
 - Valid loss: 0.5245, accu: 0.8514
Epoch 11/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.35it/s,  loss: 2.0520, accu: 0.5000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.81it/s,  loss: 0.2512, accu: 1.0000]


Summary epoch:
 - Train loss: 0.7124, accu: 0.7906
 - Valid loss: 0.4538, accu: 0.8684
Epoch 12/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.24it/s,  loss: 1.6595, accu: 0.5000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.64it/s,  loss: 0.3964, accu: 0.8182]


Summary epoch:
 - Train loss: 0.6736, accu: 0.8001
 - Valid loss: 0.5239, accu: 0.8455
Epoch 13/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.30it/s,  loss: 2.9323, accu: 0.3000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.92it/s,  loss: 0.1104, accu: 1.0000]


Summary epoch:
 - Train loss: 0.6421, accu: 0.8084
 - Valid loss: 0.4840, accu: 0.8540
Epoch 14/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.40it/s,  loss: 0.7315, accu: 0.8000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.39it/s,  loss: 0.1618, accu: 1.0000]


Summary epoch:
 - Train loss: 0.6174, accu: 0.8154
 - Valid loss: 0.4472, accu: 0.8650
Epoch 15/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.32it/s,  loss: 1.0400, accu: 0.8000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.54it/s,  loss: 0.1784, accu: 1.0000]


Summary epoch:
 - Train loss: 0.5910, accu: 0.8225
 - Valid loss: 0.3724, accu: 0.8952
Epoch 16/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.38it/s,  loss: 1.4912, accu: 0.5000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.90it/s,  loss: 0.2300, accu: 1.0000]


Summary epoch:
 - Train loss: 0.5748, accu: 0.8264
 - Valid loss: 0.4622, accu: 0.8668
Epoch 17/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.19it/s,  loss: 3.7467, accu: 0.3000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.85it/s,  loss: 0.5743, accu: 0.8182]


Summary epoch:
 - Train loss: 0.5471, accu: 0.8351
 - Valid loss: 0.8949, accu: 0.7669
Epoch 18/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.34it/s,  loss: 1.9030, accu: 0.4000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.70it/s,  loss: 0.1788, accu: 1.0000]


Summary epoch:
 - Train loss: 0.5420, accu: 0.8367
 - Valid loss: 0.3246, accu: 0.9060
Epoch 19/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.39it/s,  loss: 0.8519, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.35it/s,  loss: 0.3225, accu: 0.9091]


Summary epoch:
 - Train loss: 0.5223, accu: 0.8431
 - Valid loss: 0.3260, accu: 0.9021
Epoch 20/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 69.99it/s,  loss: 0.7988, accu: 0.8000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.07it/s,  loss: 0.0800, accu: 1.0000]


Summary epoch:
 - Train loss: 0.5040, accu: 0.8476
 - Valid loss: 0.2866, accu: 0.9174
Epoch 21/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.33it/s,  loss: 2.3350, accu: 0.4000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.84it/s,  loss: 0.7983, accu: 0.8182]


Summary epoch:
 - Train loss: 0.4855, accu: 0.8525
 - Valid loss: 0.4936, accu: 0.8633
Epoch 22/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.22it/s,  loss: 1.1049, accu: 0.8000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.58it/s,  loss: 0.1969, accu: 0.9091]


Summary epoch:
 - Train loss: 0.4735, accu: 0.8556
 - Valid loss: 0.2625, accu: 0.9263
Epoch 23/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.34it/s,  loss: 1.1536, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.34it/s,  loss: 0.0298, accu: 1.0000]


Summary epoch:
 - Train loss: 0.4601, accu: 0.8596
 - Valid loss: 0.2695, accu: 0.9248
Epoch 24/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.39it/s,  loss: 0.7743, accu: 0.8000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.92it/s,  loss: 0.2202, accu: 0.9091]


Summary epoch:
 - Train loss: 0.4505, accu: 0.8627
 - Valid loss: 0.2568, accu: 0.9265
Epoch 25/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.21it/s,  loss: 0.7510, accu: 0.8000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.16it/s,  loss: 0.0803, accu: 1.0000]


Summary epoch:
 - Train loss: 0.4427, accu: 0.8645
 - Valid loss: 0.2734, accu: 0.9232
Epoch 26/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.36it/s,  loss: 3.1884, accu: 0.4000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.41it/s,  loss: 0.0752, accu: 1.0000]


Summary epoch:
 - Train loss: 0.4272, accu: 0.8698
 - Valid loss: 0.3349, accu: 0.9016
Epoch 27/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.39it/s,  loss: 1.2010, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.08it/s,  loss: 0.0486, accu: 1.0000]


Summary epoch:
 - Train loss: 0.4133, accu: 0.8724
 - Valid loss: 0.2342, accu: 0.9303
Epoch 28/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.30it/s,  loss: 1.6026, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.45it/s,  loss: 0.0382, accu: 1.0000]


Summary epoch:
 - Train loss: 0.4151, accu: 0.8712
 - Valid loss: 0.3425, accu: 0.8991
Epoch 29/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.23it/s,  loss: 0.7012, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.16it/s,  loss: 0.0733, accu: 1.0000]


Summary epoch:
 - Train loss: 0.4026, accu: 0.8753
 - Valid loss: 0.2276, accu: 0.9311
Epoch 30/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.41it/s,  loss: 1.2063, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.44it/s,  loss: 1.3047, accu: 0.4545]


Summary epoch:
 - Train loss: 0.3926, accu: 0.8790
 - Valid loss: 0.8432, accu: 0.7623
Epoch 31/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.23it/s,  loss: 0.2402, accu: 1.0000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.69it/s,  loss: 0.0476, accu: 1.0000]


Summary epoch:
 - Train loss: 0.3841, accu: 0.8814
 - Valid loss: 0.2020, accu: 0.9418
Epoch 32/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.43it/s,  loss: 1.2749, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.35it/s,  loss: 0.2168, accu: 0.9091]


Summary epoch:
 - Train loss: 0.3793, accu: 0.8821
 - Valid loss: 0.2450, accu: 0.9287
Epoch 33/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.12it/s,  loss: 1.3503, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.52it/s,  loss: 0.1809, accu: 1.0000]


Summary epoch:
 - Train loss: 0.3670, accu: 0.8872
 - Valid loss: 0.2647, accu: 0.9224
Epoch 34/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.24it/s,  loss: 1.2089, accu: 0.4000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.30it/s,  loss: 2.3271, accu: 0.5455]


Summary epoch:
 - Train loss: 0.3645, accu: 0.8866
 - Valid loss: 1.1108, accu: 0.6937
Epoch 35/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.30it/s,  loss: 0.9826, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.51it/s,  loss: 0.2727, accu: 0.9091]


Summary epoch:
 - Train loss: 0.3553, accu: 0.8889
 - Valid loss: 0.2586, accu: 0.9254
Epoch 36/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.30it/s,  loss: 1.0177, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.12it/s,  loss: 1.3918, accu: 0.6364]


Summary epoch:
 - Train loss: 0.3561, accu: 0.8894
 - Valid loss: 0.9010, accu: 0.7515
Epoch 37/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.28it/s,  loss: 1.8463, accu: 0.4000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.78it/s,  loss: 0.0371, accu: 1.0000]


Summary epoch:
 - Train loss: 0.3478, accu: 0.8909
 - Valid loss: 0.2431, accu: 0.9287
Epoch 38/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.21it/s,  loss: 1.7115, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.81it/s,  loss: 0.1288, accu: 1.0000]


Summary epoch:
 - Train loss: 0.3379, accu: 0.8951
 - Valid loss: 0.4628, accu: 0.8656
Epoch 39/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.32it/s,  loss: 2.0152, accu: 0.5000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.03it/s,  loss: 0.4778, accu: 0.7273]


Summary epoch:
 - Train loss: 0.3417, accu: 0.8936
 - Valid loss: 0.2061, accu: 0.9430
Epoch 40/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.21it/s,  loss: 2.1741, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.10it/s,  loss: 0.0385, accu: 1.0000]


Summary epoch:
 - Train loss: 0.3246, accu: 0.8988
 - Valid loss: 0.2119, accu: 0.9400
Epoch 41/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.39it/s,  loss: 1.4460, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 221.84it/s,  loss: 0.0520, accu: 1.0000]


Summary epoch:
 - Train loss: 0.3257, accu: 0.8983
 - Valid loss: 0.2065, accu: 0.9407
Epoch 42/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.17it/s,  loss: 1.8447, accu: 0.4000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.98it/s,  loss: 0.2447, accu: 0.9091]


Summary epoch:
 - Train loss: 0.3241, accu: 0.8985
 - Valid loss: 0.2019, accu: 0.9437
Epoch 43/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.32it/s,  loss: 0.5214, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.86it/s,  loss: 0.2143, accu: 0.8182]


Summary epoch:
 - Train loss: 0.3167, accu: 0.8999
 - Valid loss: 0.1890, accu: 0.9476
Epoch 44/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.16it/s,  loss: 1.5972, accu: 0.5000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.71it/s,  loss: 0.2188, accu: 0.9091]


Summary epoch:
 - Train loss: 0.3123, accu: 0.9014
 - Valid loss: 0.2051, accu: 0.9413
Epoch 45/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.21it/s,  loss: 1.2700, accu: 0.5000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.22it/s,  loss: 0.0266, accu: 1.0000]


Summary epoch:
 - Train loss: 0.3118, accu: 0.9025
 - Valid loss: 0.1889, accu: 0.9487
Epoch 46/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.37it/s,  loss: 1.6142, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.23it/s,  loss: 2.2872, accu: 0.3636]


Summary epoch:
 - Train loss: 0.3095, accu: 0.9022
 - Valid loss: 1.0975, accu: 0.7027
Epoch 47/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.51it/s,  loss: 0.8793, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.12it/s,  loss: 0.0330, accu: 1.0000]


Summary epoch:
 - Train loss: 0.3029, accu: 0.9046
 - Valid loss: 0.1923, accu: 0.9484
Epoch 48/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.44it/s,  loss: 1.6992, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.44it/s,  loss: 0.0422, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2986, accu: 0.9050
 - Valid loss: 0.2081, accu: 0.9421
Epoch 49/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.50it/s,  loss: 0.0979, accu: 1.0000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.89it/s,  loss: 0.0374, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2905, accu: 0.9079
 - Valid loss: 0.1578, accu: 0.9561
Epoch 50/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.15it/s,  loss: 0.4378, accu: 0.9000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.00it/s,  loss: 0.0418, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2863, accu: 0.9097
 - Valid loss: 0.1734, accu: 0.9529
Epoch 51/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.23it/s,  loss: 1.9617, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.63it/s,  loss: 0.1471, accu: 0.9091]


Summary epoch:
 - Train loss: 0.2797, accu: 0.9114
 - Valid loss: 0.2380, accu: 0.9325
Epoch 52/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.39it/s,  loss: 1.9343, accu: 0.4000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.97it/s,  loss: 0.1366, accu: 0.9091]


Summary epoch:
 - Train loss: 0.2833, accu: 0.9100
 - Valid loss: 0.2213, accu: 0.9361
Epoch 53/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.54it/s,  loss: 1.3485, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.84it/s,  loss: 0.4431, accu: 0.8182]


Summary epoch:
 - Train loss: 0.2780, accu: 0.9124
 - Valid loss: 0.2505, accu: 0.9253
Epoch 54/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.24it/s,  loss: 1.0994, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.61it/s,  loss: 1.1953, accu: 0.6364]


Summary epoch:
 - Train loss: 0.2715, accu: 0.9148
 - Valid loss: 0.6913, accu: 0.8028
Epoch 55/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.41it/s,  loss: 1.0753, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.36it/s,  loss: 0.0288, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2717, accu: 0.9135
 - Valid loss: 0.1633, accu: 0.9537
Epoch 56/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.42it/s,  loss: 0.3575, accu: 0.8000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.76it/s,  loss: 2.7954, accu: 0.4545]


Summary epoch:
 - Train loss: 0.2669, accu: 0.9160
 - Valid loss: 1.8306, accu: 0.5687
Epoch 57/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.37it/s,  loss: 1.1318, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.90it/s,  loss: 1.8620, accu: 0.6364]


Summary epoch:
 - Train loss: 0.2719, accu: 0.9144
 - Valid loss: 0.8630, accu: 0.7672
Epoch 58/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.45it/s,  loss: 0.5297, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 224.13it/s,  loss: 0.0492, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2670, accu: 0.9150
 - Valid loss: 0.1769, accu: 0.9508
Epoch 59/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.15it/s,  loss: 0.5690, accu: 0.9000]
valid: 100%|██████████| 678/678 [00:03<00:00, 224.43it/s,  loss: 0.1314, accu: 0.9091]


Summary epoch:
 - Train loss: 0.2620, accu: 0.9173
 - Valid loss: 0.1673, accu: 0.9529
Epoch 60/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.43it/s,  loss: 1.3025, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.88it/s,  loss: 0.0989, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2624, accu: 0.9173
 - Valid loss: 0.1632, accu: 0.9553
Epoch 61/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.14it/s,  loss: 0.5111, accu: 0.9000]
valid: 100%|██████████| 678/678 [00:03<00:00, 221.47it/s,  loss: 0.0246, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2591, accu: 0.9184
 - Valid loss: 0.1579, accu: 0.9563
Epoch 62/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.41it/s,  loss: 1.7214, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.55it/s,  loss: 0.0277, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2561, accu: 0.9188
 - Valid loss: 0.2039, accu: 0.9402
Epoch 63/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.49it/s,  loss: 1.3985, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.61it/s,  loss: 0.2536, accu: 0.9091]


Summary epoch:
 - Train loss: 0.2497, accu: 0.9213
 - Valid loss: 0.4665, accu: 0.8634
Epoch 64/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.44it/s,  loss: 0.6861, accu: 0.8000]
valid: 100%|██████████| 678/678 [00:03<00:00, 224.26it/s,  loss: 0.0183, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2515, accu: 0.9201
 - Valid loss: 0.1735, accu: 0.9532
Epoch 65/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.43it/s,  loss: 3.1724, accu: 0.3000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.70it/s,  loss: 1.6129, accu: 0.6364]


Summary epoch:
 - Train loss: 0.2393, accu: 0.9237
 - Valid loss: 0.8831, accu: 0.7654
Epoch 66/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.39it/s,  loss: 1.2788, accu: 0.5000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.08it/s,  loss: 1.4234, accu: 0.6364]


Summary epoch:
 - Train loss: 0.2373, accu: 0.9243
 - Valid loss: 1.0058, accu: 0.7311
Epoch 67/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.55it/s,  loss: 3.0218, accu: 0.5000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.30it/s,  loss: 0.1388, accu: 0.9091]


Summary epoch:
 - Train loss: 0.2447, accu: 0.9232
 - Valid loss: 0.2308, accu: 0.9342
Epoch 68/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.33it/s,  loss: 1.7773, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.66it/s,  loss: 0.0347, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2369, accu: 0.9255
 - Valid loss: 0.1742, accu: 0.9508
Epoch 69/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.39it/s,  loss: 0.8529, accu: 0.8000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.77it/s,  loss: 0.0062, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2367, accu: 0.9252
 - Valid loss: 0.1618, accu: 0.9558
Epoch 70/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.45it/s,  loss: 0.5748, accu: 0.8000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.24it/s,  loss: 0.0142, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2330, accu: 0.9261
 - Valid loss: 0.1571, accu: 0.9558
Epoch 71/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.49it/s,  loss: 2.0335, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 221.91it/s,  loss: 0.0631, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2370, accu: 0.9253
 - Valid loss: 0.1644, accu: 0.9535
Epoch 72/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.43it/s,  loss: 1.9293, accu: 0.8000]
valid: 100%|██████████| 678/678 [00:03<00:00, 224.41it/s,  loss: 0.0096, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2295, accu: 0.9270
 - Valid loss: 0.1604, accu: 0.9565
Epoch 73/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.46it/s,  loss: 1.7938, accu: 0.5000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.57it/s,  loss: 0.0441, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2324, accu: 0.9259
 - Valid loss: 0.2783, accu: 0.9184
Epoch 74/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.53it/s,  loss: 1.9306, accu: 0.5000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.75it/s,  loss: 0.0400, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2246, accu: 0.9276
 - Valid loss: 0.2106, accu: 0.9432
Epoch 75/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.46it/s,  loss: 0.3817, accu: 0.9000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.78it/s,  loss: 0.0136, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2221, accu: 0.9288
 - Valid loss: 0.1497, accu: 0.9592
Epoch 76/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.39it/s,  loss: 1.2420, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.46it/s,  loss: 0.0454, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2219, accu: 0.9301
 - Valid loss: 0.1825, accu: 0.9487
Epoch 77/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.28it/s,  loss: 0.5465, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 224.22it/s,  loss: 0.0121, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2285, accu: 0.9268
 - Valid loss: 0.1529, accu: 0.9582
Epoch 78/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.47it/s,  loss: 0.9699, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.27it/s,  loss: 0.0174, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2189, accu: 0.9304
 - Valid loss: 0.1516, accu: 0.9579
Epoch 79/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.56it/s,  loss: 0.9718, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 224.18it/s,  loss: 2.0938, accu: 0.5455]


Summary epoch:
 - Train loss: 0.2189, accu: 0.9300
 - Valid loss: 1.3549, accu: 0.6628
Epoch 80/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.56it/s,  loss: 1.0795, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 224.07it/s,  loss: 0.0497, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2146, accu: 0.9312
 - Valid loss: 0.1584, accu: 0.9564
Epoch 81/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.40it/s,  loss: 0.2472, accu: 0.9000]
valid: 100%|██████████| 678/678 [00:03<00:00, 224.06it/s,  loss: 2.3681, accu: 0.6364]


Summary epoch:
 - Train loss: 0.2108, accu: 0.9324
 - Valid loss: 1.2893, accu: 0.6928
Epoch 82/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.55it/s,  loss: 3.7780, accu: 0.3000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.73it/s,  loss: 3.4830, accu: 0.4545]


Summary epoch:
 - Train loss: 0.2112, accu: 0.9324
 - Valid loss: 2.0759, accu: 0.5517
Epoch 83/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.53it/s,  loss: 0.1936, accu: 0.9000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.44it/s,  loss: 0.1146, accu: 0.9091]


Summary epoch:
 - Train loss: 0.2139, accu: 0.9311
 - Valid loss: 0.1364, accu: 0.9638
Epoch 84/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.43it/s,  loss: 0.3663, accu: 0.9000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.22it/s,  loss: 0.0042, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2068, accu: 0.9336
 - Valid loss: 0.1427, accu: 0.9622
Epoch 85/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.57it/s,  loss: 3.3459, accu: 0.3000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.47it/s,  loss: 0.1184, accu: 0.9091]


Summary epoch:
 - Train loss: 0.2058, accu: 0.9340
 - Valid loss: 0.3111, accu: 0.9173
Epoch 86/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.56it/s,  loss: 0.3515, accu: 1.0000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.64it/s,  loss: 0.1022, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2082, accu: 0.9334
 - Valid loss: 0.2278, accu: 0.9340
Epoch 87/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.52it/s,  loss: 0.5623, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.73it/s,  loss: 0.1765, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2042, accu: 0.9341
 - Valid loss: 0.3018, accu: 0.9122
Epoch 88/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.46it/s,  loss: 0.6045, accu: 0.8000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.52it/s,  loss: 0.0067, accu: 1.0000]


Summary epoch:
 - Train loss: 0.1985, accu: 0.9365
 - Valid loss: 0.1474, accu: 0.9609
Epoch 89/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.36it/s,  loss: 0.0591, accu: 1.0000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.69it/s,  loss: 0.0678, accu: 0.9091]


Summary epoch:
 - Train loss: 0.1985, accu: 0.9361
 - Valid loss: 0.1420, accu: 0.9641
Epoch 90/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.39it/s,  loss: 2.4193, accu: 0.2000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.46it/s,  loss: 0.0364, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2011, accu: 0.9351
 - Valid loss: 0.2279, accu: 0.9380
Epoch 91/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.49it/s,  loss: 1.1122, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 224.06it/s,  loss: 0.0900, accu: 1.0000]


Summary epoch:
 - Train loss: 0.2016, accu: 0.9352
 - Valid loss: 0.1820, accu: 0.9512
Epoch 92/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.51it/s,  loss: 1.3285, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.56it/s,  loss: 0.0163, accu: 1.0000]


Summary epoch:
 - Train loss: 0.1945, accu: 0.9382
 - Valid loss: 0.1562, accu: 0.9569
Epoch 93/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.44it/s,  loss: 1.4843, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.97it/s,  loss: 0.3289, accu: 0.9091]


Summary epoch:
 - Train loss: 0.1930, accu: 0.9383
 - Valid loss: 0.1361, accu: 0.9620
Epoch 94/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.53it/s,  loss: 1.2258, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.33it/s,  loss: 1.1643, accu: 0.6364]


Summary epoch:
 - Train loss: 0.1870, accu: 0.9398
 - Valid loss: 0.7101, accu: 0.8072
Epoch 95/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.55it/s,  loss: 1.3707, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 224.27it/s,  loss: 0.0261, accu: 1.0000]


Summary epoch:
 - Train loss: 0.1940, accu: 0.9372
 - Valid loss: 0.1654, accu: 0.9527
Epoch 96/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.34it/s,  loss: 0.4672, accu: 0.8000]
valid: 100%|██████████| 678/678 [00:03<00:00, 224.63it/s,  loss: 1.1900, accu: 0.7273]


Summary epoch:
 - Train loss: 0.1956, accu: 0.9372
 - Valid loss: 0.5423, accu: 0.8561
Epoch 97/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.52it/s,  loss: 1.0129, accu: 0.6000]
valid: 100%|██████████| 678/678 [00:03<00:00, 223.46it/s,  loss: 0.0555, accu: 1.0000]


Summary epoch:
 - Train loss: 0.1795, accu: 0.9420
 - Valid loss: 0.1534, accu: 0.9573
Epoch 98/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.37it/s,  loss: 0.4786, accu: 0.8000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.60it/s,  loss: 0.0106, accu: 1.0000]


Summary epoch:
 - Train loss: 0.1914, accu: 0.9387
 - Valid loss: 0.1898, accu: 0.9502
Epoch 99/99


train: 100%|██████████| 2710/2710 [00:38<00:00, 70.49it/s,  loss: 1.3843, accu: 0.7000]
valid: 100%|██████████| 678/678 [00:03<00:00, 222.87it/s,  loss: 0.8458, accu: 0.7273]

Summary epoch:
 - Train loss: 0.1830, accu: 0.9406
 - Valid loss: 0.5291, accu: 0.8542





# KSL77 Dataset Video Classification

In [4]:
### Reference from: https://github.com/yysijie/st-gcn/blob/master/net/utils/graph.py

import os
import torch
import numpy as np


class Graph:
    """The Graph to model the skeletons extracted by the Alpha-Pose.
    Args:
        - strategy: (string) must be one of the follow candidates
            - uniform: Uniform Labeling,
            - distance: Distance Partitioning,
            - spatial: Spatial Configuration,
        For more information, please refer to the section 'Partition Strategies'
            in our paper (https://arxiv.org/abs/1801.07455).
        - layout: (string) must be one of the follow candidates
            - coco_cut: Is COCO format but cut 4 joints (L-R ears, L-R eyes) out.
        - max_hop: (int) the maximal distance between two connected nodes.
        - dilation: (int) controls the spacing between the kernel points.
    """
    def __init__(self,
                 layout='coco_cut',
                 strategy='uniform',
                 max_hop=1,
                 dilation=1):
        self.max_hop = max_hop
        self.dilation = dilation

        self.get_edge(layout)
        self.hop_dis = get_hop_distance(self.num_node, self.edge, max_hop)
        self.get_adjacency(strategy)

    def get_edge(self, layout):
        if layout == 'coco_cut':
            self.num_node = 14
            self_link = [(i, i) for i in range(self.num_node)]
            neighbor_link = [(6, 4), (4, 2), (2, 13), (13, 1), (5, 3), (3, 1), (12, 10),
                             (10, 8), (8, 2), (11, 9), (9, 7), (7, 1), (13, 0)]
            self.edge = self_link + neighbor_link
            self.center = 13
            
        elif layout == 'mediapipe_KSL':
            self.num_node = 47
            self_link = [(i, i) for i in range(self.num_node)]
            # used_key_points=
            # [0,11,12,13,14]+[i for i in range(33,33+21)] + [i for i in range(54,54+21)] 
            neighbor_link = [(0,1),(0,2),(1,3),(2,4),(3,26),(4,5), # nose-arms-wrist
                             
                             (5,6),(6,7),(7,8),(8,9),
                             (5,10),(10,11),(11,12),(12,13),
                             (5,14),(14,15),(15,16),(16,17),
                             (5,18),(18,19),(19,20),(20,21),
                             (5,22),(22,23),(23,24),(24,25),
                             
                             (26,27),(27,28),(28,29),(29,30),
                             (26,31),(31,32),(32,33),(33,34),
                             (26,35),(35,36),(36,37),(37,38),
                             (26,39),(39,40),(40,41),(41,42),
                             (26,43),(43,44),(44,45),(45,46),
                            ]
            self.edge = self_link + neighbor_link
            self.center = 0
        else:
            raise ValueError('This layout is not supported!')

    def get_adjacency(self, strategy):
        valid_hop = range(0, self.max_hop + 1, self.dilation)
        adjacency = np.zeros((self.num_node, self.num_node))
        for hop in valid_hop:
            adjacency[self.hop_dis == hop] = 1
        normalize_adjacency = normalize_digraph(adjacency)

        if strategy == 'uniform':
            A = np.zeros((1, self.num_node, self.num_node))
            A[0] = normalize_adjacency
            self.A = A
        elif strategy == 'distance':
            A = np.zeros((len(valid_hop), self.num_node, self.num_node))
            for i, hop in enumerate(valid_hop):
                A[i][self.hop_dis == hop] = normalize_adjacency[self.hop_dis ==
                                                                hop]
            self.A = A
        elif strategy == 'spatial':
            A = []
            for hop in valid_hop:
                a_root = np.zeros((self.num_node, self.num_node))
                a_close = np.zeros((self.num_node, self.num_node))
                a_further = np.zeros((self.num_node, self.num_node))
                for i in range(self.num_node):
                    for j in range(self.num_node):
                        if self.hop_dis[j, i] == hop:
                            if self.hop_dis[j, self.center] == self.hop_dis[i, self.center]:
                                a_root[j, i] = normalize_adjacency[j, i]
                            elif self.hop_dis[j, self.center] > self.hop_dis[i, self.center]:
                                a_close[j, i] = normalize_adjacency[j, i]
                            else:
                                a_further[j, i] = normalize_adjacency[j, i]
                if hop == 0:
                    A.append(a_root)
                else:
                    A.append(a_root + a_close)
                    A.append(a_further)
            A = np.stack(A)
            self.A = A
            #self.A = np.swapaxes(np.swapaxes(A, 0, 1), 1, 2)
        else:
            raise ValueError("This strategy is not supported!")


def get_hop_distance(num_node, edge, max_hop=1):
    A = np.zeros((num_node, num_node))
    for i, j in edge:
        A[j, i] = 1
        A[i, j] = 1

    # compute hop steps
    hop_dis = np.zeros((num_node, num_node)) + np.inf
    transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)]
    arrive_mat = (np.stack(transfer_mat) > 0)
    for d in range(max_hop, -1, -1):
        hop_dis[arrive_mat[d]] = d
    return hop_dis


def normalize_digraph(A):
    Dl = np.sum(A, 0)
    num_node = A.shape[0]
    Dn = np.zeros((num_node, num_node))
    for i in range(num_node):
        if Dl[i] > 0:
            Dn[i, i] = Dl[i]**(-1)
    AD = np.dot(A, Dn)
    return AD


def normalize_undigraph(A):
    Dl = np.sum(A, 0)
    num_node = A.shape[0]
    Dn = np.zeros((num_node, num_node))
    for i in range(num_node):
        if Dl[i] > 0:
            Dn[i, i] = Dl[i]**(-0.5)
    DAD = np.dot(np.dot(Dn, A), Dn)
    return DAD

In [5]:
### Reference from: https://github.com/yysijie/st-gcn/tree/master/net

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

# from Actionsrecognition.Utils import Graph


class GraphConvolution(nn.Module):
    """The basic module for applying a graph convolution.
    Args:
        - in_channel: (int) Number of channels in the input sequence data.
        - out_channels: (int) Number of channels produced by the convolution.
        - kernel_size: (int) Size of the graph convolving kernel.
        - t_kernel_size: (int) Size of the temporal convolving kernel.
        - t_stride: (int, optional) Stride of the temporal convolution. Default: 1
        - t_padding: (int, optional) Temporal zero-padding added to both sides of
            the input. Default: 0
        - t_dilation: (int, optional) Spacing between temporal kernel elements. Default: 1
        - bias: (bool, optional) If `True`, adds a learnable bias to the output.
            Default: `True`
    Shape:
        - Inputs x: Graph sequence in :math:`(N, in_channels, T_{in}, V)`,
                 A: Graph adjacency matrix in :math:`(K, V, V)`,
        - Output: Graph sequence out in :math:`(N, out_channels, T_{out}, V)`

            where
                :math:`N` is a batch size,
                :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
                :math:`T_{in}/T_{out}` is a length of input/output sequence,
                :math:`V` is the number of graph nodes.

    """
    def __init__(self, in_channels, out_channels, kernel_size,
                 t_kernel_size=1,
                 t_stride=1,
                 t_padding=0,
                 t_dilation=1,
                 bias=True):
        super().__init__()

        self.kernel_size = kernel_size
        self.conv = nn.Conv2d(in_channels,
                              out_channels * kernel_size,
                              kernel_size=(t_kernel_size, 1),
                              padding=(t_padding, 0),
                              stride=(t_stride, 1),
                              dilation=(t_dilation, 1),
                              bias=bias)

    def forward(self, x, A):
        x = self.conv(x)
        n, kc, t, v = x.size()
        x = x.view(n, self.kernel_size, kc//self.kernel_size, t, v)
        x = torch.einsum('nkctv,kvw->nctw', (x, A))

        return x.contiguous()


class st_gcn(nn.Module):
    """Applies a spatial temporal graph convolution over an input graph sequence.
    Args:
        - in_channels: (int) Number of channels in the input sequence data.
        - out_channels: (int) Number of channels produced by the convolution.
        - kernel_size: (tuple) Size of the temporal convolving kernel and
            graph convolving kernel.
        - stride: (int, optional) Stride of the temporal convolution. Default: 1
        - dropout: (int, optional) Dropout rate of the final output. Default: 0
        - residual: (bool, optional) If `True`, applies a residual mechanism.
            Default: `True`
    Shape:
        - Inputs x: Graph sequence in :math: `(N, in_channels, T_{in}, V)`,
                 A: Graph Adjecency matrix in :math: `(K, V, V)`,
        - Output: Graph sequence out in :math: `(N, out_channels, T_{out}, V)`
            where
                :math:`N` is a batch size,
                :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
                :math:`T_{in}/T_{out}` is a length of input/output sequence,
                :math:`V` is the number of graph nodes.
    """
    def __init__(self, in_channels, out_channels, kernel_size,
                 stride=1,
                 dropout=0,
                 residual=True):
        super().__init__()
        assert len(kernel_size) == 2
        assert kernel_size[0] % 2 == 1
        #print(kernel_size)(9, 3)
        padding = ((kernel_size[0] - 1) // 2, 0)
        #print(padding)(4, 0)

        self.gcn = GraphConvolution(in_channels, out_channels, kernel_size[1])
        self.tcn = nn.Sequential(nn.BatchNorm2d(out_channels),
                                 nn.ReLU(inplace=True),
                                 nn.Conv2d(out_channels,
                                           out_channels,
                                           (kernel_size[0], 1),
                                           (stride, 1),
                                           padding),
                                 nn.BatchNorm2d(out_channels),
                                 nn.Dropout(dropout, inplace=True),
                                 )

        if not residual:
            self.residual = lambda x: 0
        elif (in_channels == out_channels) and (stride == 1):
            self.residual = lambda x: x
        else:
            self.residual = nn.Sequential(nn.Conv2d(in_channels,
                                                    out_channels,
                                                    kernel_size=1,
                                                    stride=(stride, 1)),
                                          nn.BatchNorm2d(out_channels)
                                          )
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x, A):
        res = self.residual(x)
        # print(res)
        x = self.gcn(x, A)
        #print("x_in:",x.size())
        x = self.tcn(x) + res
        #print("x_out:",x.size())
        return self.relu(x)


class StreamSpatialTemporalGraph(nn.Module):
    """Spatial temporal graph convolutional networks.
    Args:
        - in_channels: (int) Number of input channels.
        - graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class.
        - num_class: (int) Number of class outputs. If `None` return pooling features of
            the last st-gcn layer instead.
        - edge_importance_weighting: (bool) If `True`, adds a learnable importance
            weighting to the edges of the graph.
        - **kwargs: (optional) Other parameters for graph convolution units.
    Shape:
        - Input: :math:`(N, in_channels, T_{in}, V_{in})`
        - Output: :math:`(N, num_class)` where
            :math:`N` is a batch size,
            :math:`T_{in}` is a length of input sequence,
            :math:`V_{in}` is the number of graph nodes,
        or If num_class is `None`: `(N, out_channels)`
            :math:`out_channels` is number of out_channels of the last layer.
    """
    def __init__(self, in_channels, graph_args, num_class=None,
                 edge_importance_weighting=True, **kwargs):
        super().__init__()
        # Load graph.
        graph = Graph(**graph_args)
        A = torch.tensor(graph.A, dtype=torch.float32, requires_grad=False)
        self.register_buffer('A', A)

        # Networks.
        spatial_kernel_size = A.size(0)
        temporal_kernel_size = 9
        kernel_size = (temporal_kernel_size, spatial_kernel_size)
        kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'}

        self.data_bn = nn.BatchNorm1d(in_channels * A.size(1))
        self.st_gcn_networks = nn.ModuleList((
            st_gcn(in_channels, 64, kernel_size, 1, residual=False, **kwargs0),
            st_gcn(64, 64, kernel_size, 1, **kwargs),
            st_gcn(64, 64, kernel_size, 1, **kwargs),
            st_gcn(64, 64, kernel_size, 1, **kwargs),
            st_gcn(64, 128, kernel_size, 2, **kwargs),
            st_gcn(128, 128, kernel_size, 1, **kwargs),
            st_gcn(128, 128, kernel_size, 1, **kwargs),
            st_gcn(128, 256, kernel_size, 2, **kwargs),
            st_gcn(256, 256, kernel_size, 1, **kwargs),
            st_gcn(256, 256, kernel_size, 1, **kwargs)
        ))

        # initialize parameters for edge importance weighting.
        if edge_importance_weighting:
            self.edge_importance = nn.ParameterList([
                nn.Parameter(torch.ones(A.size()))
                for i in self.st_gcn_networks
            ])
        else:
            self.edge_importance = [1] * len(self.st_gcn_networks)

        if num_class is not None:
            self.cls = nn.Conv2d(256, num_class, kernel_size=1)
        else:
            self.cls = lambda x: x

    def forward(self, x):
        # data normalization.
        N, C, T, V = x.size()
        x = x.permute(0, 3, 1, 2).contiguous()  # (N, V, C, T)
        x = x.view(N, V * C, T)
        x = self.data_bn(x)
        x = x.view(N, V, C, T)
        x = x.permute(0, 2, 3, 1).contiguous()
        x = x.view(N, C, T, V)

        # forward.
        for gcn, importance in zip(self.st_gcn_networks, self.edge_importance):
            x = gcn(x, self.A * importance)

        x = F.avg_pool2d(x, x.size()[2:])
        x = self.cls(x)
        x = x.view(x.size(0), -1)

        return x


class TwoStreamSpatialTemporalGraph(nn.Module):
    """Two inputs spatial temporal graph convolutional networks.
    Args:
        - graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class.
        - num_class: (int) Number of class outputs.
        - edge_importance_weighting: (bool) If `True`, adds a learnable importance
            weighting to the edges of the graph.
        - **kwargs: (optional) Other parameters for graph convolution units.
    Shape:
        - Input: :tuple of math:`((N, 3, T, V), (N, 2, T, V))`
        for points and motions stream where.
            :math:`N` is a batch size,
            :math:`in_channels` is data channels (3 is (x, y, score)), (2 is (mot_x, mot_y))
            :math:`T` is a length of input sequence,
            :math:`V` is the number of graph nodes,
        - Output: :math:`(N, num_class)`
    """
    def __init__(self, graph_args, num_class, edge_importance_weighting=True,
                 **kwargs):
        super().__init__()
        self.pts_stream = StreamSpatialTemporalGraph(3, graph_args, None,
                                                     edge_importance_weighting,
                                                     **kwargs)
        self.mot_stream = StreamSpatialTemporalGraph(3, graph_args, None,
                                                     edge_importance_weighting,
                                                     **kwargs)

        self.fcn = nn.Linear(256 * 2, num_class)
        # self.fcn = nn.Linear(256 , num_class)
        
        # self.atten1 = nn.Linear(256 * 2, 128)
        # self.atten_bn = nn.BatchNorm1d(128)
        # self.atten_relu= nn.ReLU(inplace=True)
        # self.atten2 = nn.Linear(128,32)
        # self.atten_relu2= nn.ReLU(inplace=True)
        # self.atten3 = nn.Linear(32, 256 * 2)
        # self.atten_act = nn.Sigmoid() 
        
    def forward(self, inputs):
        out1 = self.pts_stream(inputs[0])
        out2 = self.mot_stream(inputs[1])
        
        #print(out1.size())torch.Size([32, 256])
        #print(out2.size())torch.Size([32, 256])
        concat = torch.cat([out1, out2], dim=-1)
        
        # attn = self.atten1(concat)
        # attn = self.atten_bn(attn)
        # attn = self.atten_relu(attn)
        # attn = self.atten2(attn)
        # attn = self.atten_relu2(attn)
        # attn = self.atten3(attn)
        # attn = self.atten_act(attn)
        # concat = concat * attn
        
        
        out = self.fcn(concat)
        # out = self.fcn(out1)
        
        return out
        # return torch.sigmoid(out)
        #return F.softmax(out,dim=-1)

In [6]:
import os
import time
import torch
import pickle
import numpy as np
import torch.nn.functional as F
from shutil import copyfile
from tqdm import tqdm
from torch.utils import data
from torch.optim.adadelta import Adadelta
from sklearn.model_selection import train_test_split

#device = 'cuda'
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print("using", device, "device")

epochs = 100
batch_size = 64 #32

def load_dataset(data_files, batch_size, split_size=0.2,used_key_points=None):#0.2
    """Load data files into torch DataLoader with/without spliting train-test.
    """
    features, labels = [], []
    for fil in data_files:
        with open(fil, 'rb') as f:
            fts, lbs = pickle.load(f)
            features.append(fts)
            labels.append(lbs)
        del fts, lbs
    features = np.concatenate(features, axis=0)
    labels = np.concatenate(labels, axis=0)
    
    if used_key_points != None:
        features = features[:,:,:,used_key_points]
    print(features.shape)
    print(labels.shape)

    if split_size > 0:
        x_train, x_valid, y_train, y_valid = train_test_split(features, labels, test_size=split_size,random_state=0,stratify=labels)
        
        train_set = data.TensorDataset(torch.tensor(x_train, dtype=torch.float32),torch.tensor(y_train, dtype=torch.int64))
        valid_set = data.TensorDataset(torch.tensor(x_valid, dtype=torch.float32),torch.tensor(y_valid, dtype=torch.int64))
        train_loader = data.DataLoader(train_set, batch_size, shuffle=True)
        valid_loader = data.DataLoader(valid_set, batch_size)
    else:
        train_set = data.TensorDataset(torch.tensor(features, dtype=torch.float32),torch.tensor(labels, dtype=torch.int64))
        train_loader = data.DataLoader(train_set, batch_size, shuffle=True)
        valid_loader = None
    return train_loader, valid_loader

def accuracy_batch(y_pred, y_true):
    # print(y_pred.shape,y_true.shape)
    # return (y_pred.argmax(1) == y_true.argmax(1)).mean()
    return (y_pred.argmax(1) == y_true).mean()


def set_training(model, mode=True):
    for p in model.parameters():
        p.requires_grad = mode
    model.train(mode)
    return model

save_folder = os.path.join(os.environ['HOME'],"KSL_V2/Outputs")
os.makedirs(save_folder,exist_ok=True)
used_key_points=[0,11,12,13,14]+[i for i in range(33,33+21)] + [i for i in range(54,54+21)] 
train_loader, valid_loader = load_dataset([os.path.join(os.environ['HOME'],"KSL_V2/Datasets/KSL77_25_dataset.pkl")], 32,0.2,used_key_points) #batch_size = 32
dataloader = {'train': train_loader, 'valid': valid_loader}
num_class=77

graph_args = {'layout':'mediapipe_KSL','strategy': 'spatial'}
model = TwoStreamSpatialTemporalGraph(graph_args, num_class).to(device)
n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"number of params: {n_parameters}")
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001)
losser = torch.nn.CrossEntropyLoss()

loss_list = {'train': [], 'valid': []}
accu_list = {'train': [], 'valid': []}
best_acc = -1
for e in range(epochs):
    print('Epoch {}/{}'.format(e, epochs - 1))
    for phase in ['train', 'valid']:
        if phase == 'train':
            model = set_training(model, True)
        else:
            model = set_training(model, False)

        run_loss = 0.0
        run_accu = 0.0
        with tqdm(dataloader[phase], desc=phase) as iterator:
            for pts, lbs in iterator:
                # Create motion input by distance of points (x, y) of the same node
                # in two frames.
                mot = pts[:, :, 1:, :] - pts[:, :, :-1, :]

                mot = mot.to(device)
                pts = pts.to(device)
                lbs = lbs.to(device)
                
                # Forward.
                out = model((pts, mot))
                #print(lbs)

                #print(out)
                loss = losser(out, lbs)

                if phase == 'train':
                    # Backward.
                    model.zero_grad()
                    loss.backward()
                    optimizer.step()

                run_loss += loss.item()
                accu = accuracy_batch(out.detach().cpu().numpy(),
                                      lbs.detach().cpu().numpy())
                run_accu += accu

                iterator.set_postfix_str(' loss: {:.4f}, accu: {:.4f}'.format(
                    loss.item(), accu))
                iterator.update()
                #break
        loss_list[phase].append(run_loss / len(iterator))
        accu_list[phase].append(run_accu / len(iterator))
        #print(accu_list)
        #print(torch.max(accu_list))
    if(best_acc < accu_list['valid'][-1]):
        best_acc = accu_list['valid'][-1]
        torch.save(model.state_dict(), os.path.join(save_folder, 'tsstg-model_best.pth'))
        #break

    print('Summary epoch:\n - Train loss: {:.4f}, accu: {:.4f}\n - Valid loss:'
          ' {:.4f}, accu: {:.4f}'.format(loss_list['train'][-1], accu_list['train'][-1],
                                         loss_list['valid'][-1], accu_list['valid'][-1]))
del model
#0.996577079107505

using cuda device
(78877, 3, 25, 47)
(78877,)
number of params: 6301629
Epoch 0/99


train: 100%|██████████| 1972/1972 [01:58<00:00, 16.65it/s,  loss: 2.4502, accu: 0.3793]
valid: 100%|██████████| 493/493 [00:09<00:00, 53.01it/s,  loss: 2.2034, accu: 0.3750]


Summary epoch:
 - Train loss: 2.5456, accu: 0.2990
 - Valid loss: 1.8127, accu: 0.4618
Epoch 1/99


train: 100%|██████████| 1972/1972 [01:59<00:00, 16.47it/s,  loss: 0.8237, accu: 0.6552]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.69it/s,  loss: 1.8028, accu: 0.4688]


Summary epoch:
 - Train loss: 1.3821, accu: 0.5894
 - Valid loss: 1.4313, accu: 0.5927
Epoch 2/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.36it/s,  loss: 0.3319, accu: 0.9655]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.52it/s,  loss: 1.1674, accu: 0.7188]


Summary epoch:
 - Train loss: 0.8636, accu: 0.7388
 - Valid loss: 0.8292, accu: 0.7608
Epoch 3/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.2140, accu: 0.9655]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.39it/s,  loss: 1.1643, accu: 0.6562]


Summary epoch:
 - Train loss: 0.5746, accu: 0.8248
 - Valid loss: 0.8759, accu: 0.7551
Epoch 4/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.2298, accu: 0.9655]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.51it/s,  loss: 0.5384, accu: 0.7812]


Summary epoch:
 - Train loss: 0.4033, accu: 0.8756
 - Valid loss: 0.5541, accu: 0.8434
Epoch 5/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.1006, accu: 0.9655]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.48it/s,  loss: 0.6275, accu: 0.8125]


Summary epoch:
 - Train loss: 0.3002, accu: 0.9058
 - Valid loss: 0.4210, accu: 0.8763
Epoch 6/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0828, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.44it/s,  loss: 0.5091, accu: 0.8438]


Summary epoch:
 - Train loss: 0.2348, accu: 0.9265
 - Valid loss: 0.4455, accu: 0.9192
Epoch 7/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0847, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.48it/s,  loss: 0.2559, accu: 0.9062]


Summary epoch:
 - Train loss: 0.1845, accu: 0.9423
 - Valid loss: 0.2466, accu: 0.9404
Epoch 8/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.1163, accu: 0.9310]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.39it/s,  loss: 0.3857, accu: 0.8750]


Summary epoch:
 - Train loss: 0.1582, accu: 0.9494
 - Valid loss: 0.2570, accu: 0.9359
Epoch 9/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.3732, accu: 0.9310]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.43it/s,  loss: 0.5921, accu: 0.7812]


Summary epoch:
 - Train loss: 0.1374, accu: 0.9564
 - Valid loss: 0.5739, accu: 0.8469
Epoch 10/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.1633, accu: 0.9310]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.35it/s,  loss: 0.1233, accu: 0.9375]


Summary epoch:
 - Train loss: 0.1171, accu: 0.9621
 - Valid loss: 0.4863, accu: 0.9311
Epoch 11/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.1967, accu: 0.9655]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.36it/s,  loss: 0.4472, accu: 0.8438]


Summary epoch:
 - Train loss: 0.1030, accu: 0.9673
 - Valid loss: 0.5918, accu: 0.8804
Epoch 12/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.1015, accu: 0.9655]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.36it/s,  loss: 0.2081, accu: 0.9375]


Summary epoch:
 - Train loss: 0.0939, accu: 0.9699
 - Valid loss: 0.2986, accu: 0.9110
Epoch 13/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0117, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.41it/s,  loss: 0.0722, accu: 0.9688]


Summary epoch:
 - Train loss: 0.0867, accu: 0.9727
 - Valid loss: 0.2224, accu: 0.9601
Epoch 14/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.1135, accu: 0.9655]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.42it/s,  loss: 0.2801, accu: 0.8750]


Summary epoch:
 - Train loss: 0.0783, accu: 0.9756
 - Valid loss: 0.3887, accu: 0.9199
Epoch 15/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.3661, accu: 0.8276]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.39it/s,  loss: 1.6508, accu: 0.6250]


Summary epoch:
 - Train loss: 0.0695, accu: 0.9774
 - Valid loss: 1.1863, accu: 0.7529
Epoch 16/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0025, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.42it/s,  loss: 0.0339, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0654, accu: 0.9789
 - Valid loss: 0.1270, accu: 0.9627
Epoch 17/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0058, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.44it/s,  loss: 0.4816, accu: 0.8750]


Summary epoch:
 - Train loss: 0.0622, accu: 0.9800
 - Valid loss: 0.1650, accu: 0.9733
Epoch 18/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.1415, accu: 0.9655]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.39it/s,  loss: 0.1046, accu: 0.9688]


Summary epoch:
 - Train loss: 0.0553, accu: 0.9827
 - Valid loss: 0.3468, accu: 0.9163
Epoch 19/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0015, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.42it/s,  loss: 0.2024, accu: 0.9375]


Summary epoch:
 - Train loss: 0.0527, accu: 0.9837
 - Valid loss: 0.2259, accu: 0.9457
Epoch 20/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.1771, accu: 0.9310]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.40it/s,  loss: 0.6000, accu: 0.8125]


Summary epoch:
 - Train loss: 0.0478, accu: 0.9851
 - Valid loss: 0.5094, accu: 0.8916
Epoch 21/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0515, accu: 0.9655]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.42it/s,  loss: 0.2656, accu: 0.9375]


Summary epoch:
 - Train loss: 0.0463, accu: 0.9853
 - Valid loss: 0.5792, accu: 0.8983
Epoch 22/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0026, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.45it/s,  loss: 0.0950, accu: 0.9375]


Summary epoch:
 - Train loss: 0.0440, accu: 0.9858
 - Valid loss: 0.1943, accu: 0.9504
Epoch 23/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0002, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.45it/s,  loss: 0.1045, accu: 0.9375]


Summary epoch:
 - Train loss: 0.0431, accu: 0.9861
 - Valid loss: 0.1805, accu: 0.9631
Epoch 24/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0102, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.44it/s,  loss: 0.1201, accu: 0.9375]


Summary epoch:
 - Train loss: 0.0382, accu: 0.9879
 - Valid loss: 0.2000, accu: 0.9499
Epoch 25/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0050, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.40it/s,  loss: 0.1862, accu: 0.9688]


Summary epoch:
 - Train loss: 0.0394, accu: 0.9877
 - Valid loss: 0.1216, accu: 0.9643
Epoch 26/99


train: 100%|██████████| 1972/1972 [02:01<00:00, 16.30it/s,  loss: 0.0005, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.39it/s,  loss: 0.8294, accu: 0.8750]


Summary epoch:
 - Train loss: 0.0340, accu: 0.9892
 - Valid loss: 0.5182, accu: 0.8927
Epoch 27/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.30it/s,  loss: 0.0011, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.41it/s,  loss: 0.1049, accu: 0.9688]


Summary epoch:
 - Train loss: 0.0359, accu: 0.9884
 - Valid loss: 0.1330, accu: 0.9657
Epoch 28/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.30it/s,  loss: 0.1305, accu: 0.9655]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.43it/s,  loss: 0.2261, accu: 0.9375]


Summary epoch:
 - Train loss: 0.0328, accu: 0.9895
 - Valid loss: 0.4336, accu: 0.9019
Epoch 29/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0357, accu: 0.9655]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.44it/s,  loss: 0.0838, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0318, accu: 0.9896
 - Valid loss: 0.2206, accu: 0.9563
Epoch 30/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0038, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.46it/s,  loss: 0.2611, accu: 0.9062]


Summary epoch:
 - Train loss: 0.0311, accu: 0.9907
 - Valid loss: 0.3100, accu: 0.9295
Epoch 31/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.30it/s,  loss: 0.0265, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.40it/s,  loss: 0.0444, accu: 0.9688]


Summary epoch:
 - Train loss: 0.0305, accu: 0.9904
 - Valid loss: 0.1178, accu: 0.9665
Epoch 32/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0007, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.43it/s,  loss: 0.0481, accu: 0.9688]


Summary epoch:
 - Train loss: 0.0284, accu: 0.9909
 - Valid loss: 0.2998, accu: 0.9741
Epoch 33/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0003, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.39it/s,  loss: 0.0369, accu: 0.9688]


Summary epoch:
 - Train loss: 0.0254, accu: 0.9921
 - Valid loss: 0.3839, accu: 0.9600
Epoch 34/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0053, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.48it/s,  loss: 0.0881, accu: 0.9688]


Summary epoch:
 - Train loss: 0.0281, accu: 0.9914
 - Valid loss: 0.1969, accu: 0.9634
Epoch 35/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0007, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.52it/s,  loss: 0.0065, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0222, accu: 0.9926
 - Valid loss: 0.1163, accu: 0.9778
Epoch 36/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.33it/s,  loss: 0.0010, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.45it/s,  loss: 0.2758, accu: 0.9062]


Summary epoch:
 - Train loss: 0.0260, accu: 0.9918
 - Valid loss: 0.2712, accu: 0.9515
Epoch 37/99


train:   2%|▏         | 49/1972 [00:02<01:28, 21.82it/s,  loss: 0.0083, accu: 1.0000]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0008, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.48it/s,  loss: 0.1930, accu: 0.9375]


Summary epoch:
 - Train loss: 0.0221, accu: 0.9932
 - Valid loss: 0.2855, accu: 0.9331
Epoch 42/99


train:  23%|██▎       | 445/1972 [00:24<01:19, 19.32it/s,  loss: 0.0112, accu: 1.0000]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.1957, accu: 0.9655]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.47it/s,  loss: 0.0072, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0214, accu: 0.9934
 - Valid loss: 0.2186, accu: 0.9752
Epoch 46/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.33it/s,  loss: 0.0069, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.52it/s,  loss: 0.0360, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0196, accu: 0.9939
 - Valid loss: 0.1523, accu: 0.9674
Epoch 47/99


train:   9%|▉         | 179/1972 [00:09<01:42, 17.48it/s,  loss: 0.0130, accu: 1.0000]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0000, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.47it/s,  loss: 0.0800, accu: 0.9688]


Summary epoch:
 - Train loss: 0.0180, accu: 0.9946
 - Valid loss: 0.1328, accu: 0.9871
Epoch 51/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0041, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.52it/s,  loss: 0.0111, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0174, accu: 0.9947
 - Valid loss: 0.1462, accu: 0.9739
Epoch 52/99


train:  22%|██▏       | 430/1972 [00:23<01:34, 16.34it/s,  loss: 0.0004, accu: 1.0000]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0009, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.47it/s,  loss: 0.2474, accu: 0.9375]


Summary epoch:
 - Train loss: 0.0187, accu: 0.9943
 - Valid loss: 0.2137, accu: 0.9576
Epoch 54/99


train:  86%|████████▌ | 1693/1972 [01:38<00:16, 17.12it/s,  loss: 0.0039, accu: 1.0000]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0001, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.48it/s,  loss: 0.0037, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0149, accu: 0.9951
 - Valid loss: 0.1137, accu: 0.9859
Epoch 56/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0160, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.50it/s,  loss: 0.0483, accu: 0.9688]


Summary epoch:
 - Train loss: 0.0157, accu: 0.9950
 - Valid loss: 0.1542, accu: 0.9797
Epoch 57/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0002, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.46it/s,  loss: 0.1499, accu: 0.9375]


Summary epoch:
 - Train loss: 0.0160, accu: 0.9948
 - Valid loss: 0.1434, accu: 0.9770
Epoch 58/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0007, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.38it/s,  loss: 0.0013, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0147, accu: 0.9951
 - Valid loss: 0.1374, accu: 0.9898
Epoch 59/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.30it/s,  loss: 0.1220, accu: 0.9655]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.40it/s,  loss: 0.0009, accu: 1.0000] 


Summary epoch:
 - Train loss: 0.0151, accu: 0.9952
 - Valid loss: 0.7283, accu: 0.9914
Epoch 60/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.30it/s,  loss: 0.0001, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.42it/s,  loss: 0.0009, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0157, accu: 0.9952
 - Valid loss: 0.2255, accu: 0.9893
Epoch 61/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.30it/s,  loss: 0.0000, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.40it/s,  loss: 0.0638, accu: 0.9688]


Summary epoch:
 - Train loss: 0.0133, accu: 0.9958
 - Valid loss: 0.0985, accu: 0.9888
Epoch 62/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.30it/s,  loss: 0.0003, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.35it/s,  loss: 0.0039, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0146, accu: 0.9954
 - Valid loss: 0.2447, accu: 0.9897
Epoch 63/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.2109, accu: 0.9655]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.46it/s,  loss: 0.2009, accu: 0.9375]


Summary epoch:
 - Train loss: 0.0131, accu: 0.9960
 - Valid loss: 0.2590, accu: 0.9716
Epoch 64/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0039, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.44it/s,  loss: 0.0013, accu: 1.0000] 


Summary epoch:
 - Train loss: 0.0137, accu: 0.9959
 - Valid loss: 0.6978, accu: 0.9959
Epoch 65/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0002, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.50it/s,  loss: 0.0724, accu: 0.9688]


Summary epoch:
 - Train loss: 0.0139, accu: 0.9956
 - Valid loss: 0.1108, accu: 0.9814
Epoch 66/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0000, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.45it/s,  loss: 0.0192, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0126, accu: 0.9960
 - Valid loss: 0.2781, accu: 0.9947
Epoch 67/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0037, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.47it/s,  loss: 0.0002, accu: 1.0000] 


Summary epoch:
 - Train loss: 0.0118, accu: 0.9963
 - Valid loss: 0.4088, accu: 0.9967
Epoch 68/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0000, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.44it/s,  loss: 0.0117, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0136, accu: 0.9958
 - Valid loss: 0.1328, accu: 0.9900
Epoch 69/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.3555, accu: 0.9655]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.42it/s,  loss: 0.1229, accu: 0.9688] 


Summary epoch:
 - Train loss: 0.0131, accu: 0.9958
 - Valid loss: 0.3951, accu: 0.9622
Epoch 70/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0002, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.43it/s,  loss: 0.0038, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0121, accu: 0.9962
 - Valid loss: 0.0984, accu: 0.9800
Epoch 71/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0005, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.40it/s,  loss: 0.0052, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0110, accu: 0.9967
 - Valid loss: 0.3035, accu: 0.9949
Epoch 72/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0004, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.44it/s,  loss: 0.0037, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0119, accu: 0.9963
 - Valid loss: 0.1888, accu: 0.9946
Epoch 73/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0005, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.39it/s,  loss: 0.0385, accu: 0.9688]


Summary epoch:
 - Train loss: 0.0118, accu: 0.9962
 - Valid loss: 0.1542, accu: 0.9887
Epoch 74/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0080, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.47it/s,  loss: 0.2219, accu: 0.9375]


Summary epoch:
 - Train loss: 0.0131, accu: 0.9963
 - Valid loss: 0.3607, accu: 0.9549
Epoch 75/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.33it/s,  loss: 0.0006, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.48it/s,  loss: 0.0126, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0114, accu: 0.9964
 - Valid loss: 0.1660, accu: 0.9792
Epoch 76/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.33it/s,  loss: 0.0004, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.51it/s,  loss: 0.0369, accu: 0.9688]


Summary epoch:
 - Train loss: 0.0112, accu: 0.9967
 - Valid loss: 0.2361, accu: 0.9518
Epoch 77/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.33it/s,  loss: 0.0002, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.46it/s,  loss: 0.3671, accu: 0.9062]


Summary epoch:
 - Train loss: 0.0107, accu: 0.9964
 - Valid loss: 0.4435, accu: 0.9262
Epoch 78/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.1455, accu: 0.9655]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.43it/s,  loss: 0.0320, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0124, accu: 0.9964
 - Valid loss: 0.0537, accu: 0.9852
Epoch 79/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.33it/s,  loss: 0.0001, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.47it/s,  loss: 0.3385, accu: 0.9375]


Summary epoch:
 - Train loss: 0.0109, accu: 0.9964
 - Valid loss: 0.1375, accu: 0.9864
Epoch 80/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.33it/s,  loss: 0.0029, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.41it/s,  loss: 0.0096, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0104, accu: 0.9967
 - Valid loss: 0.1307, accu: 0.9960
Epoch 81/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0003, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.46it/s,  loss: 0.0147, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0099, accu: 0.9969
 - Valid loss: 0.2091, accu: 0.9965
Epoch 82/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0004, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.37it/s,  loss: 0.0345, accu: 0.9688]


Summary epoch:
 - Train loss: 0.0098, accu: 0.9968
 - Valid loss: 0.0931, accu: 0.9898
Epoch 83/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0001, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.42it/s,  loss: 0.0290, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0105, accu: 0.9965
 - Valid loss: 0.1286, accu: 0.9676
Epoch 84/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0162, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.46it/s,  loss: 0.1912, accu: 0.9688]


Summary epoch:
 - Train loss: 0.0088, accu: 0.9971
 - Valid loss: 0.1025, accu: 0.9746
Epoch 85/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0000, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.39it/s,  loss: 0.0011, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0104, accu: 0.9968
 - Valid loss: 0.0692, accu: 0.9933
Epoch 86/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0531, accu: 0.9655]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.46it/s,  loss: 0.0002, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0093, accu: 0.9971
 - Valid loss: 0.1105, accu: 0.9886
Epoch 87/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0001, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.39it/s,  loss: 0.0157, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0102, accu: 0.9968
 - Valid loss: 0.0814, accu: 0.9811
Epoch 88/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0003, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.46it/s,  loss: 0.0004, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0112, accu: 0.9963
 - Valid loss: 0.0951, accu: 0.9741
Epoch 89/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.32it/s,  loss: 0.0000, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.45it/s,  loss: 0.3591, accu: 0.9375]


Summary epoch:
 - Train loss: 0.0101, accu: 0.9970
 - Valid loss: 0.2690, accu: 0.9451
Epoch 90/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0000, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.42it/s,  loss: 0.0223, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0084, accu: 0.9975
 - Valid loss: 0.1487, accu: 0.9663
Epoch 91/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.30it/s,  loss: 0.0069, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.46it/s,  loss: 0.0138, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0098, accu: 0.9968
 - Valid loss: 0.1049, accu: 0.9760
Epoch 92/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.30it/s,  loss: 0.0000, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.44it/s,  loss: 0.0109, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0103, accu: 0.9969
 - Valid loss: 0.0923, accu: 0.9793
Epoch 93/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.30it/s,  loss: 0.0002, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.43it/s,  loss: 0.0091, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0092, accu: 0.9972
 - Valid loss: 0.1683, accu: 0.9643
Epoch 94/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.30it/s,  loss: 0.0000, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.42it/s,  loss: 0.0011, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0100, accu: 0.9970
 - Valid loss: 0.1520, accu: 0.9815
Epoch 95/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.30it/s,  loss: 0.0000, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.52it/s,  loss: 0.0101, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0088, accu: 0.9971
 - Valid loss: 0.1476, accu: 0.9831
Epoch 96/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.31it/s,  loss: 0.0000, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.47it/s,  loss: 0.0126, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0080, accu: 0.9974
 - Valid loss: 0.0998, accu: 0.9783
Epoch 97/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.30it/s,  loss: 0.0000, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.45it/s,  loss: 0.3191, accu: 0.9062]


Summary epoch:
 - Train loss: 0.0087, accu: 0.9974
 - Valid loss: 0.3692, accu: 0.9261
Epoch 98/99


train: 100%|██████████| 1972/1972 [02:01<00:00, 16.30it/s,  loss: 0.0000, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.39it/s,  loss: 0.0015, accu: 1.0000]


Summary epoch:
 - Train loss: 0.0089, accu: 0.9972
 - Valid loss: 0.0924, accu: 0.9821
Epoch 99/99


train: 100%|██████████| 1972/1972 [02:00<00:00, 16.30it/s,  loss: 0.0062, accu: 1.0000]
valid: 100%|██████████| 493/493 [00:09<00:00, 52.43it/s,  loss: 0.0023, accu: 1.0000]

Summary epoch:
 - Train loss: 0.0082, accu: 0.9974
 - Valid loss: 0.0690, accu: 0.9860



