# Concatenative Synthesis (Rule-Based Translator)

In [1]:
import sign_language_translator as slt

## Translate

In [None]:
model = slt.models.ConcatenativeSynthesis(text_language="urdu", sign_language="psl", sign_format="vid")
# text = "ایک سیب اچھا ہے۔"
text = "یہ مگرمچھ نیلا ہے۔"
# text = "تربوز لال ہے۔"
sentence = model.translate(text)
sentence.show(inline_player="html5")

In [None]:
sentence.save(f"outputs/{text}.mp4")

## Custom Translator

In [3]:
import json
import os

import jsonschema
import numpy as np

import sign_language_translator as slt

`configure dataset directory`

In [4]:
data_dir = "./temp_dataset"
slt.Assets.set_root_dir(data_dir)

`create vocabulary object`

In [None]:
# our mapping datasets: https://github.com/sign-language-translator/sign-language-datasets/blob/main/parallel_texts
# define mapping
mapping_dataset = [
    {
        "country": "xx",
        "description": "custom sign language videos and corresponding words.",
        "mapping": [
            {
                "label": "xx-yy-1_greeting",
                "token": {
                    "en": ["hello", "hey",],
                    "hi": ["नमस्ते"],
                }
            },
            {
                "label": "xx-yy-1_world",
                "token": {
                    "en": ["world", "globe", "earth",],
                    "ur": ["دنیا"],
                }
            },
        ],
        "organization": "yy",
        "url": "https://www.example.com"
    }
]

# validate the dataset format
schema_path = slt.Assets.download("mapping-schema.json")[0]
with open(schema_path, 'r') as f:
    jsonschema.validate(mapping_dataset, json.load(f))

# store in data_dir so it can be retrieved whenever its needed
with open(os.path.join(data_dir, "xx-dictionary-mapping.json"), "w", encoding="utf-8") as f:
    json.dump(mapping_dataset, f, indent=2, ensure_ascii=False)

# read with our mapping dataset reader
vocab = slt.languages.Vocab(
    language="en", country="xx", organization="yy", data_root_dir=data_dir, arg_is_regex=False
)
vocab.supported_words

`demo videos`

In [3]:
import sign_language_translator as slt

In [None]:
# local storage
frames = np.zeros((10, 256, 256, 3), dtype=np.uint8) # (frames, height, width, channels)
slt.Video(frames, fps=5).save(os.path.join(slt.Assets.ROOT_DIR, "videos", "xx-yy-1_greeting.mp4"))

# hosted online (auto-downloaded when needed)
slt.Assets.FILE_TO_URL.update({"videos/xx-yy-1_world.mp4": "https://github.com/sign-language-translator/sign-language-datasets/releases/download/v0.0.2/wordless_wordless.mp4"})

# videos can also be stored inside zip archives (data_dir/datasets/xx-yy-1_videos-mp4.zip)
# the needed videos will be extracted automatically to data_dir/videos
# slt.Assets.download(...), slt.Assets.extract(...), slt.Assets.fetch(...)

`create languages`

In [None]:
class MySignLang(slt.languages.SignLanguage):
    pass

class MyTextLang(slt.languages.TextLanguage):
    pass

# TODO: class MyEnglish

`create model`

In [None]:
model = slt.models.ConcatenativeSynthesis(
    text_language=MyTextLang(),
    sign_language=MySignLang(),
    sign_format="video"
)

`translate`

In [None]:
sign = model.translate("Hello world!")
sign.show()

In [None]:
# translate (step-by-step)
text = "Hi earth."
tokens = model.blah_blah_blah(text)
sign_filenames = model.blah_blah(tokens)
sign_list = model.blah(sign_filenames)
sign = model.concatenate_signs(sign_list)