<a href="https://colab.research.google.com/github/sign-language-processing/datasets/blob/master/examples/load.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%capture
! pip install git+https://github.com/sign-language-processing/datasets.git

In [None]:
import tensorflow_datasets as tfds
import sign_language_datasets.datasets
from sign_language_datasets.datasets.config import SignDatasetConfig

import itertools

# RWTH Phoenix 2014 T

In [None]:
config = SignDatasetConfig(name="only-annotations", version="3.0.0", include_video=False)
rwth_phoenix2014_t = tfds.load(name='rwth_phoenix2014_t', builder_kwargs=dict(config=config))

for datum in itertools.islice(rwth_phoenix2014_t["train"], 0, 10):
  print(datum['gloss'].numpy().decode('utf-8'))
  print(datum['text'].numpy().decode('utf-8'))
  print()

# Dicta Sign

In [None]:
config = SignDatasetConfig(name="only-annotations", version="1.0.0", include_video=False, include_pose=None)
dicta_sign = tfds.load(name='dicta_sign', builder_kwargs={"config": config})

for datum in itertools.islice(dicta_sign["train"], 0, 10):
  print(datum['hamnosys'].numpy().decode('utf-8'), datum['text'].numpy().decode('utf-8'))

# ChicagoFSWild+

In [None]:
# Version 2.0.0 is ChicagoFSWild+, 1.0.0 is ChicagoFSWild
config = SignDatasetConfig(name="only-annotations", version="2.0.0", include_video=False)
chicagofswild = tfds.load(name='chicago_fs_wild', builder_kwargs=dict(config=config))

for datum in itertools.islice(chicagofswild["train"], 0, 10):
  print(datum['text'].numpy().decode('utf-8'))

# AUTSL

In [None]:
config = SignDatasetConfig(name="only-annotations", version="1.0.0", include_video=False)
autsl = tfds.load(name='autsl', builder_kwargs={"config": config})

for datum in itertools.islice(autsl["train"], 0, 10):
  print(datum['id'].numpy().decode('utf-8'), datum['gloss_id'].numpy())

# SignBank

In [None]:
signbank = tfds.load(name='sign_bank')

for datum in itertools.islice(signbank["train"], 0, 10):
  print(datum['id'].numpy().decode('utf-8'), datum['sign_writing'].numpy().decode('utf-8'), [f.decode('utf-8') for f in datum['terms'].numpy()])

# SignTyp (https://signtyp.uconn.edu/signpuddle/index.php?ui=1&sgn=9032)


In [None]:
config = SignDatasetConfig(name="only-annotations", version="1.0.0", include_video=False, extra={"PHPSESSID": "hj9co07ct7f5noq529no9u09l4"})
signtyp = tfds.load(name='sign_typ', builder_kwargs=dict(config=config))

for datum in itertools.islice(signtyp["train"], 0, 10):
  print(datum['video'].numpy().decode('utf-8'), datum['sign_writing'].numpy().decode('utf-8'))

# Sign2Mint

In [None]:
config = SignDatasetConfig(name="only-annotations", version="1.0.0", include_video=False)
sign2mint = tfds.load(name='sign2_mint', builder_kwargs={"config": config})

for datum in itertools.islice(sign2mint["train"], 0, 10):
  print(datum['fachbegriff'].numpy().decode('utf-8'), datum['video'].numpy().decode('utf-8'), datum['gebaerdenschrift']['url'].numpy().decode('utf-8'))

# SWOJS Gloss√°rio

In [None]:
config = SignDatasetConfig(name="only-annotations", version="1.0.0", include_video=False)
swojs_glossario = tfds.load(name='swojs_glossario', builder_kwargs={"config": config})

def decode(tl):
  return list(map(lambda t: t.decode('utf-8'), tl.numpy()))

for datum in itertools.islice(swojs_glossario["train"], 0, 10):
  print(decode(datum['sign_writing']), datum['video'].numpy().decode('utf-8'))

# DGS Corpus

In [None]:
%%capture
! pip install pympi-ling

In [None]:
config = SignDatasetConfig(name="only-annotations", version="1.0.0", include_video=False, include_pose=None)
dgs_corpus = tfds.load('dgs_corpus', builder_kwargs=dict(config=config))

from sign_language_datasets.datasets.dgs_corpus.dgs_utils import get_elan_sentences

for datum in itertools.islice(dgs_corpus["train"], 0, 10):
  elan_path = datum["paths"]["eaf"].numpy().decode('utf-8')
  sentences =  get_elan_sentences(elan_path)

  try:
    sentence = next(sentences)
    print(" ".join([s["gloss"] for s in sentence["glosses"]]))
    print(sentence["german"])
    print()
  except StopIteration:
    pass

# NGT Corpus

In [None]:
%%capture
! pip install pympi-ling

In [None]:
from sign_language_datasets.datasets.ngt_corpus.ngt_corpus_utils import get_elan_sentences_ngt_corpus

config = SignDatasetConfig(name="only-annotations", version="1.0.0", include_video=False)
ngt = tfds.load(name='ngt_corpus', builder_kwargs={"config": config})

for datum in itertools.islice(ngt["train"], 0, 10):
  print(datum['id'].numpy().decode('utf-8'))
  elan_path = datum["paths"]["eaf"].numpy().decode('utf-8')

  sentences = get_elan_sentences_ngt_corpus(elan_path)

  for sentence in sentences:
    print(sentence)