In [None]:
# !pip install tensorflow

In [2]:
import os
import tensorflow as tf

In [73]:
# Words-to-Phonems
import lirecouleur.word

In [44]:
# ! cd .. && tree 

# Understand English - Alignments by nicknochnack

In [22]:
# https://github.com/nicknochnack/LipNet/blob/main/LipNet.ipynb

vocab = [x for x in "abcdefghijklmnopqrstuvwxyz'?!123456789 "]

char_to_num = tf.keras.layers.StringLookup(vocabulary=vocab, oov_token="")
num_to_char = tf.keras.layers.StringLookup(
    vocabulary=char_to_num.get_vocabulary(), oov_token="", invert=True
)

def load_alignments(path:str): 
    with open(path, 'r') as f: 
        lines = f.readlines() 
    tokens = []
    for line in lines:
        line = line.split()
        if line[2] != 'sil': 
            tokens = [*tokens,' ',line[2]]
    return char_to_num(tf.reshape(tf.strings.unicode_split(tokens, input_encoding='UTF-8'), (-1)))[1:]

In [51]:
print([(i+1, char) for i, char in enumerate(vocab)])

[(1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'), (6, 'f'), (7, 'g'), (8, 'h'), (9, 'i'), (10, 'j'), (11, 'k'), (12, 'l'), (13, 'm'), (14, 'n'), (15, 'o'), (16, 'p'), (17, 'q'), (18, 'r'), (19, 's'), (20, 't'), (21, 'u'), (22, 'v'), (23, 'w'), (24, 'x'), (25, 'y'), (26, 'z'), (27, "'"), (28, '?'), (29, '!'), (30, '1'), (31, '2'), (32, '3'), (33, '4'), (34, '5'), (35, '6'), (36, '7'), (37, '8'), (38, '9'), (39, ' ')]


In [None]:
# 0 23750 sil
# 23750 29500 bin
# 29500 34000 blue
# 34000 35500 at
# 35500 41000 f
# 41000 47250 two
# 47250 53000 now
# 53000 74500 sil

In [45]:
alignment_path = os.path.join(os.getcwd(), "..", "raw_data", "Sample_English", "align", "bbaf2n.align")
load_alignments(alignment_path)

<tf.Tensor: shape=(21,), dtype=int64, numpy=
array([ 2,  9, 14, 39,  2, 12, 21,  5, 39,  1, 20, 39,  6, 39, 20, 23, 15,
       39, 14, 15, 23])>

In [43]:
with open(alignment_path, 'r') as f: 
    lines = f.readlines() 
    
print(lines)
print("-----------------------")

tokens = []

print(tokens)
print("-----------------------")

for line in lines:
    line = line.split()
    print(f"line --> {line}")
    if line[2] != 'sil': 
        tokens = [*tokens,' ',line[2]]
    print(f"all tokens from begining --> {tokens}")
    print("-------")

print("-----------------------")

final_result = char_to_num(tf.reshape(tf.strings.unicode_split(tokens, input_encoding='UTF-8'), (-1)))[1:]
print(final_result)

['0 23750 sil\n', '23750 29500 bin\n', '29500 34000 blue\n', '34000 35500 at\n', '35500 41000 f\n', '41000 47250 two\n', '47250 53000 now\n', '53000 74500 sil\n']
-----------------------
[]
-----------------------
line --> ['0', '23750', 'sil']
all tokens from begining --> []
-------
line --> ['23750', '29500', 'bin']
all tokens from begining --> [' ', 'bin']
-------
line --> ['29500', '34000', 'blue']
all tokens from begining --> [' ', 'bin', ' ', 'blue']
-------
line --> ['34000', '35500', 'at']
all tokens from begining --> [' ', 'bin', ' ', 'blue', ' ', 'at']
-------
line --> ['35500', '41000', 'f']
all tokens from begining --> [' ', 'bin', ' ', 'blue', ' ', 'at', ' ', 'f']
-------
line --> ['41000', '47250', 'two']
all tokens from begining --> [' ', 'bin', ' ', 'blue', ' ', 'at', ' ', 'f', ' ', 'two']
-------
line --> ['47250', '53000', 'now']
all tokens from begining --> [' ', 'bin', ' ', 'blue', ' ', 'at', ' ', 'f', ' ', 'two', ' ', 'now']
-------
line --> ['53000', '74500', 'sil

# French

In [14]:
phon_transcripts = os.path.join(os.getcwd(), "..", "raw_data", "sample", "transcripts", "phonetique")
full_transcripts = os.path.join(os.getcwd(), "..", "raw_data", "sample", "transcripts", "complet")

In [93]:
test_phon = os.path.join(phon_transcripts, "18.txt")
test_full = os.path.join(full_transcripts, "18.wav.txt")

## Test Words to phonems with LireCouleur

In [99]:
print(lirecouleur.word.phonemes("œuf"))
print(lirecouleur.word.phonemes("oeufs"))
print(lirecouleur.word.phonemes("oeuf"))




[('x^', 'œu'), ('f', 'f')]
[('x^', 'oeu'), ('#', 'f'), ('#', 's')]
[('x', 'oeu'), ('f', 'f')]


In [94]:
with open(test_full, "r") as f:
    lines = f.readlines()
    lines = [line.split() for line in lines]

print(lines)
print("")

words = lines[1]
print(words)

for word in words:
    print(lirecouleur.word.phonemes(word))

[['0', '0.0', 'sil'], ['examen', 'du', 'CAC'], ['1.32', '1.32', 'sil']]

['examen', 'du', 'CAC']
[('e^', 'e'), ('gz', 'x'), ('a', 'a'), ('m', 'm'), ('e~', 'en')]
[('d', 'd'), ('y', 'u')]
[('k', 'C'), ('a', 'A'), ('k', 'C')]


In [95]:
with open(test_phon, "r") as f:
    lines = f.readlines()
    lines = [line.split() for line in lines]

lines
# words = lines[1]
# print(words)

# for word in words:
#     print(lirecouleur.word.phonemes(word))

[['0'],
 ['0'],
 ['0', '</s>'],
 ['0.12', 'l'],
 ['0.18', 'E'],
 ['0.22', 'g'],
 ['0.28', 'z'],
 ['0.35', 'a'],
 ['0.41', 'm'],
 ['0.49', 'U~'],
 ['0.58', 'd'],
 ['0.63', 'y'],
 ['0.68', 'k'],
 ['0.83', 'a'],
 ['0.86', '</s>'],
 ['0']]

- https://keras.io/examples/audio/ctc_asr/#:~:text=CTC%20is%20an%20algorithm%20used,transcript%20align%20to%20the%20audio

# 