### import libs

In [2]:
import csv, importlib, torch, cv2, time
from demo_utils import *

### load demo configurations

In [6]:
cfg = CFG()
device = cfg.device
topk = 5
video_file = 'uploads/livedemo.avi'
store_vid = True

Running on device =  cpu


### load sign classes

In [7]:
class_map = {}
reader = csv.reader(open(cfg.class_map_path))
header = next(reader)
for row in reader:
    class_map[int(row[0])] = row[1]

### load inference model

In [8]:
module = importlib.import_module('models.'+cfg.model_type)
model = getattr(module, 'Model')(*cfg.model_params.get_model_params())
model.load_state_dict(torch.load(cfg.save_model_path, map_location=device)['model_state_dict'])
model.to(device)
model.eval()

Model(
  (pos_encoder): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (layers): ModuleList(
    (0): PartAttentionLayer(
      (drop_part): DropPart()
      (blocks): ModuleList(
        (0): PartAttentionBlock(
          (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
          (attn): MSA(
            (qkv): Linear(in_features=128, out_features=384, bias=True)
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=128, out_features=128, bias=True)
            (proj_drop): Dropout(p=0.1, inplace=False)
            (softmax): Softmax(dim=-1)
            (attn_act): ReLU()
          )
          (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
          (ff): FeedForward(
            (fc1): Linear(in_features=128, out_features=256, bias=True)
            (act): GELU(approximate='none')
            (fc2): Linear(in_features=256, out_features=128, bias=True)
            (drop): Dropout(p=0.1, inpla

### recognise video function

In [9]:
def recognise(video):
    vid_feat = torch.tensor(cfg.test_transform(get_video_data(video)), dtype=torch.float32).unsqueeze(0)
    with torch.no_grad():
        output_list = torch.argsort(model(vid_feat.to(device)).squeeze(0)).cpu().tolist()
    results = {}
    for i in range(1, topk+1):
        results[f'word{i}'] = class_map[output_list[-i]]
    return results

### capture video and recognise

In [33]:
record_time = 5
wait_time = 3
record_start = False
video = []

cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Cannot open camera")
    exit()

width, height = 1280, (1280//4)*3
cv2.namedWindow("Video", cv2.WINDOW_NORMAL)
cv2.resizeWindow("Video", width, height)

font = cv2.FONT_HERSHEY_SIMPLEX

if store_vid:
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    fps = cap.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(video_file, cv2.VideoWriter_fourcc(
        'M', 'J', 'P', 'G'), fps, (frame_width, frame_height))

start_time = time.time()

while cap.isOpened():
    is_read, image = cap.read()

    if not is_read:
        break

    video.append(image)

    if store_vid and record_start:
        out.write(image)
        out.write(image)
        tmp_time = time.time() - rec_start_time
        cv2.putText(image, str(int(record_time + 1 - tmp_time)), org=(50, 80), fontFace=font, fontScale=1, color=(255, 255, 255), thickness=2)
        if tmp_time > record_time:
            break

    if not record_start:
        height, width, channels = image.shape
        white_image = np.ones((height, width, channels), dtype=np.uint8) * 255
        alpha = 0.5
        image = cv2.addWeighted(image, 1 - alpha, white_image, alpha, 0)

        tmp_time = time.time() - start_time
        cv2.putText(image, str(int(wait_time + 1 - tmp_time)), org=(50, 100), fontFace=font, fontScale=3, color=(0, 0, 255), thickness=10)
        if tmp_time > wait_time:
            record_start = True
            rec_start_time = time.time()

    cv2.imshow('Video', image)

    # Press Q on keyboard to  exit
    if cv2.waitKey(25) & 0xFF == ord('q'):
        break

cap.release()
if store_vid:
    out.release()
cv2.destroyAllWindows()

print('Top 5 results - ')
results = recognise(video_file)
for i in results.keys():
    print(i+' : '+results[i])

Top 5 results - 
word1 : A
word2 : Funny
word3 : Thursday
word4 : Ghee
word5 : Swimming
