In [1]:
import random
import torch
import json
import numpy as np
import pandas as pd
from datasets.czech_slr_dataset import load_dataset, tensor_to_dictionary, dictionary_to_tensor
from normalization.body_normalization import normalize_single_dict as normalize_single_body_dict
from normalization.hand_normalization import normalize_single_dict as normalize_single_hand_dict

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torch.load('D:\Spoter_transformer_SLR\out-checkpoints\lsa_64_spoter\checkpoint_t_10.pth').to(device)
model.train(False)

SPOTER(
  (transformer): Transformer(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0): TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): _LinearWithBias(in_features=108, out_features=108, bias=True)
          )
          (linear1): Linear(in_features=108, out_features=2048, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=2048, out_features=108, bias=True)
          (norm1): LayerNorm((108,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((108,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
          (dropout2): Dropout(p=0.1, inplace=False)
        )
        (1): TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): _LinearWithBias(in_features=108, out_features=108, bias=True)
          )
          (linear1): Linear(in_features=108, out_features=2048, bias=True)
     

In [8]:
def normalise_a_single_input(test_input):
    depth_map = torch.from_numpy(np.copy(test_input))
    depth_map = tensor_to_dictionary(depth_map)
    depth_map = normalize_single_body_dict(depth_map)
    depth_map = normalize_single_hand_dict(depth_map)
    depth_map = dictionary_to_tensor(depth_map)

    # Move the landmark position interval to improve performance
    depth_map = depth_map - 0.5
    return depth_map

In [5]:
inputs = pd.read_csv(r'D:\Spoter_transformer_SLR\datasets\LSA64_60fps.csv', encoding="utf-8")
test_inputs, act_lables = load_dataset(file_location=None, dataframe=inputs)

In [6]:
SIGN_2_ORD = json.load(open(r"E:\Muxin\LSA64-ARSL\sign_to_prediction_index_map.json"))
ORD_2_SIGN = {value: key for key, value in SIGN_2_ORD.items()}

In [12]:
for i in range(5):
    x = random.randint(1, 3000)
    test_input = test_inputs[x]
    actual_sign = act_lables[x]

    test_input = normalise_a_single_input(test_input)
    test_input = torch.tensor(np.array(test_input)).squeeze(0).to(device)
    output = model(test_input).expand(1, -1, -1)
    top_1_prediction = int(torch.argmax(torch.nn.functional.softmax(output, dim=2)))

    print("Actual sign gloss: " + str(ORD_2_SIGN[actual_sign]))
        print("Actual sign gloss: " + str(ORD_2_SIGN[actual_sign]))

    print("Pred sign gloass: " + str(ORD_2_SIGN[top_1_prediction]))

    top_num = 5
    prediction = torch.nn.functional.softmax(output, dim=2)
    top_valves, top_indices = torch.topk(prediction, k=top_num)
    print("Top {} acc & value:".format(top_num) + str(top_valves) + " " + str(top_indices) + "\n")

Actual sign gloss: to land
Pred sign gloass: to land
Top 5 acc & value:tensor([[[9.9915e-01, 1.9271e-04, 9.7318e-05, 7.5792e-05, 5.7947e-05]]],
       grad_fn=<TopkBackward>) tensor([[[54, 43, 33, 35, 34]]])

Actual sign gloss: yellow
Pred sign gloass: yellow
Top 5 acc & value:tensor([[[9.9947e-01, 1.2258e-04, 9.3704e-05, 4.6333e-05, 4.2250e-05]]],
       grad_fn=<TopkBackward>) tensor([[[ 4, 17, 39, 46, 38]]])

Actual sign gloss: colors
Pred sign gloass: colors
Top 5 acc & value:tensor([[[9.9879e-01, 4.2851e-04, 2.1218e-04, 1.3728e-04, 1.0531e-04]]],
       grad_fn=<TopkBackward>) tensor([[[ 7, 16, 14, 57, 29]]])

Actual sign gloss: mock
Pred sign gloass: mock
Top 5 acc & value:tensor([[[9.9639e-01, 2.0043e-03, 6.0046e-04, 2.4084e-04, 1.8708e-04]]],
       grad_fn=<TopkBackward>) tensor([[[29, 18, 25, 38, 12]]])

Actual sign gloss: last name
Pred sign gloass: last name
Top 5 acc & value:tensor([[[9.9918e-01, 2.0543e-04, 1.0771e-04, 4.5389e-05, 4.5192e-05]]],
       grad_fn=<TopkBackwa