# Imports

In [1]:
# Allow importing items in src folder
import sys

SCRIPT_DIR = "/home/shane/Projects/bsltranslate-model"
sys.path.append(SCRIPT_DIR)

In [2]:
from src.files import load_pickle, read_vocab_file
from src.models import Cnn_3d, load_model
from src.inference import load_video, predict_signs
from src.utilities import save_to_csv

# Variables

In [3]:
model_dir = '../models/20220620212654/desktop/'
model_path = model_dir + 'stream_cnn_3d.pt'
normalization_stats_path = model_dir + 'stream_cnn_3d_norm_stats.pkl'
vocab_file = model_dir + 'stream_cnn_3d_vocab.csv'

In [4]:
dataset_dir = '../../bsltranslate-full-dataset/'
video_fname = "Alphabet50.mp4"
video_path = dataset_dir + 'videos/' + video_fname
label_folder = dataset_dir #+ 'video_labels/'

In [5]:
model_name = Cnn_3d
model_fps = 7
model_coords_per_sign = 126

# Code

In [6]:
def group_predictions(data):
    grouped_data = []
    start_time = data[0][0]
    cur_sign = data[0][1]
    sign_length = 0
    for idx, frame in enumerate(data):
        sign_length += 1
        if frame[1] != cur_sign:
            end_time = data[idx - 1][0]
            if cur_sign != 'NaS' and sign_length > 12:
                grouped_data.append([round(start_time/1000, 2), round(end_time/1000, 2), cur_sign])
            start_time = frame[0]
            cur_sign = frame[1]
            sign_length = 0
    return grouped_data

# Get Video Predictions

In [7]:
# Load the normalization stats
normalization_stats = load_pickle(normalization_stats_path)

# Load vocab
vocab = read_vocab_file(vocab_file)

# Load the model
model = load_model(model_name, len(vocab), model_path)

# Open the video
video = load_video(video_path)

# Loop over the frames of the video & process
predictions = predict_signs(video, model, vocab, normalization_stats,
                            frames_per_sign=model_fps, number_of_coords=model_coords_per_sign,
                            display_pred=False)

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [8]:
grouped_preds = group_predictions(predictions)

In [9]:
label_file_name = label_folder + video_fname.split(".")[0] + ".csv"
label_file_name

'../../bsltranslate-full-dataset/Alphabet50.csv'

In [10]:
save_to_csv(grouped_preds, label_file_name, headers=['start_time', 'end_time', 'word'])

Make sure to manually check the output here - this isn't very accurate.

Run `ffplay ${video_path}` to do that