In [2]:
# !pip install tensorflow

In [3]:
import os
import tensorflow as tf

In [4]:
# Words-to-Phonems
import lirecouleur.word
import lirecouleur.text

In [5]:
# ! cd .. && tree 

# Understand English - Alignments by nicknochnack

In [6]:
# https://github.com/nicknochnack/LipNet/blob/main/LipNet.ipynb

vocab = [x for x in "abcdefghijklmnopqrstuvwxyz'?!123456789 "]

char_to_num = tf.keras.layers.StringLookup(vocabulary=vocab, oov_token="")
num_to_char = tf.keras.layers.StringLookup(
    vocabulary=char_to_num.get_vocabulary(), oov_token="", invert=True
)

def load_alignments(path:str): 
    with open(path, 'r') as f: 
        lines = f.readlines() 
    tokens = []
    for line in lines:
        line = line.split()
        if line[2] != 'sil': 
            tokens = [*tokens,' ',line[2]]
    return char_to_num(tf.reshape(tf.strings.unicode_split(tokens, input_encoding='UTF-8'), (-1)))[1:]

In [7]:
print([(i+1, char) for i, char in enumerate(vocab)])

[(1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'), (6, 'f'), (7, 'g'), (8, 'h'), (9, 'i'), (10, 'j'), (11, 'k'), (12, 'l'), (13, 'm'), (14, 'n'), (15, 'o'), (16, 'p'), (17, 'q'), (18, 'r'), (19, 's'), (20, 't'), (21, 'u'), (22, 'v'), (23, 'w'), (24, 'x'), (25, 'y'), (26, 'z'), (27, "'"), (28, '?'), (29, '!'), (30, '1'), (31, '2'), (32, '3'), (33, '4'), (34, '5'), (35, '6'), (36, '7'), (37, '8'), (38, '9'), (39, ' ')]


In [8]:
# 0 23750 sil
# 23750 29500 bin
# 29500 34000 blue
# 34000 35500 at
# 35500 41000 f
# 41000 47250 two
# 47250 53000 now
# 53000 74500 sil

In [9]:
alignment_path = os.path.join(os.getcwd(), "..", "raw_data", "Sample_English", "align", "bbaf2n.align")
load_alignments(alignment_path)

<tf.Tensor: shape=(21,), dtype=int64, numpy=
array([ 2,  9, 14, 39,  2, 12, 21,  5, 39,  1, 20, 39,  6, 39, 20, 23, 15,
       39, 14, 15, 23])>

In [10]:
with open(alignment_path, 'r') as f: 
    lines = f.readlines() 
    
print(lines)
print("-----------------------")

tokens = []

print(tokens)
print("-----------------------")

for line in lines:
    line = line.split()
    print(f"line --> {line}")
    if line[2] != 'sil': 
        tokens = [*tokens,' ',line[2]]
    print(f"all tokens from begining --> {tokens}")
    print("-------")

print("-----------------------")

final_result = char_to_num(tf.reshape(tf.strings.unicode_split(tokens, input_encoding='UTF-8'), (-1)))[1:]
print(final_result)

['0 23750 sil\n', '23750 29500 bin\n', '29500 34000 blue\n', '34000 35500 at\n', '35500 41000 f\n', '41000 47250 two\n', '47250 53000 now\n', '53000 74500 sil\n']
-----------------------
[]
-----------------------
line --> ['0', '23750', 'sil']
all tokens from begining --> []
-------
line --> ['23750', '29500', 'bin']
all tokens from begining --> [' ', 'bin']
-------
line --> ['29500', '34000', 'blue']
all tokens from begining --> [' ', 'bin', ' ', 'blue']
-------
line --> ['34000', '35500', 'at']
all tokens from begining --> [' ', 'bin', ' ', 'blue', ' ', 'at']
-------
line --> ['35500', '41000', 'f']
all tokens from begining --> [' ', 'bin', ' ', 'blue', ' ', 'at', ' ', 'f']
-------
line --> ['41000', '47250', 'two']
all tokens from begining --> [' ', 'bin', ' ', 'blue', ' ', 'at', ' ', 'f', ' ', 'two']
-------
line --> ['47250', '53000', 'now']
all tokens from begining --> [' ', 'bin', ' ', 'blue', ' ', 'at', ' ', 'f', ' ', 'two', ' ', 'now']
-------
line --> ['53000', '74500', 'sil

# French

In [11]:
phon_transcripts = os.path.join(os.getcwd(), "..", "raw_data", "sample", "transcripts", "phonetique")
full_transcripts = os.path.join(os.getcwd(), "..", "raw_data", "sample", "transcripts", "complet")

In [18]:
test_phon = os.path.join(phon_transcripts, "28.txt")
test_full = os.path.join(full_transcripts, "28.wav.txt")

## Test Words to phonems with LireCouleur

In [19]:
print(lirecouleur.word.phonemes("œuf"))
print(lirecouleur.word.phonemes("oeufs"))
print(lirecouleur.word.phonemes("oeuf"))
print(lirecouleur.word.phonemes("maison"))



[('x^', 'œu'), ('f', 'f')]
[('x^', 'oeu'), ('#', 'f'), ('#', 's')]
[('x', 'oeu'), ('f', 'f')]
[('m', 'm'), ('e^_comp', 'ai'), ('z_s', 's'), ('o~', 'on')]


In [20]:
print(lirecouleur.word.syllables("œuf"))
print(lirecouleur.word.syllables("oeufs"))
print(lirecouleur.word.syllables("oeuf"))
print(lirecouleur.word.syllables("maison"))

['œuf']
['oeufs']
['oeuf']
['mai', 'son']


In [21]:
print(lirecouleur.text.phonemes("la maison est grande"))

[[('l', 'l'), ('a', 'a')], ' ', [('m', 'm'), ('e^_comp', 'ai'), ('z_s', 's'), ('o~', 'on')], ' ', [('e^_comp', 'est')], ' ', [('g', 'g'), ('r', 'r'), ('a~', 'an'), ('d', 'd'), ('q_caduc', 'e')]]


In [22]:
with open(test_full, "r") as f:
    lines = f.readlines()
    lines = [line.split() for line in lines]

print(lines)
print("")

words = lines[1]
print(words)

for word in words:
    print(lirecouleur.word.phonemes(word))

[['0', '0.0', 'sil'], ['Gilles', 'mataka'], ['1.16', '1.16', 'sil']]

['Gilles', 'mataka']
[('z^_g', 'G'), ('i', 'i'), ('j_q_caduc', 'lle'), ('#', 's')]
[('m', 'm'), ('a', 'a'), ('t', 't'), ('a', 'a'), ('k', 'k'), ('a', 'a')]


In [23]:
with open(test_phon, "r") as f:
    lines = f.readlines()
    lines = [line.split() for line in lines]

phonems = [line[1] for line in lines if len(line) > 1]
phonems = [phonem.replace("</s>", "") for phonem in phonems]
phonems = "".join(phonems)
phonems

# words = lines[1]
# print(words)

# for word in words:
#     print(lirecouleur.word.phonemes(word))

'Zilmataka'

In [30]:
def get_phonems(phonem_transcript: str) -> list:
    """Get the file path of a phonem transcript and return a clean list with the truth phonems."""
    with open(phonem_transcript, "r") as f:
        lines = f.readlines()
        lines = [line.split() for line in lines]

    phonems = [line[1] for line in lines if len(line) > 1]
    phonems = [phonem.replace("</s>", "") for phonem in phonems]
    phonems = [phonem for phonem in phonems if len(phonem) > 0]
    
    return phonems 

get_phonems(test_phon)

['Z', 'i', 'l', 'm', 'a', 't', 'a', 'k', 'a']

In [56]:
full_db_path = "/Users/ArthurPech/Downloads/SilentSpeak/"
all_transcripts = []

sessions = ["I", "II"]
files = [f"{i+1}.txt" for i in range(238)]

for session in sessions:
    locutors = [f"Locuteur_{session}_{i+1}" for i in range(8)]
    
    for locutor in locutors:
        
        for file in files:
            print(f"{session}-{locutor}-{file}")
            
            # Trouve le bon chemin pour les labels (pas toujours écrit de façon homogène)
            
            session_path = os.path.join(
                    full_db_path, f"Session {session}", locutor)
            
            session_dirs = os.listdir(session_path)
            
            lab_dir = [dir for dir in session_dirs if "phone" in dir.lower()][0]

            phonem_transcript = os.path.join(
                full_db_path, f"Session {session}", locutor, lab_dir, file)
            phonems = get_phonems(phonem_transcript)
            
            transcript = {
                "session" : session,
                "locutor" : locutor,
                "file" : file,
                "phonem_transcript" : phonems
            }
            
            all_transcripts.append(transcript)

I-Locuteur_I_1-1.txt
I-Locuteur_I_1-2.txt
I-Locuteur_I_1-3.txt
I-Locuteur_I_1-4.txt
I-Locuteur_I_1-5.txt
I-Locuteur_I_1-6.txt
I-Locuteur_I_1-7.txt
I-Locuteur_I_1-8.txt
I-Locuteur_I_1-9.txt
I-Locuteur_I_1-10.txt
I-Locuteur_I_1-11.txt
I-Locuteur_I_1-12.txt
I-Locuteur_I_1-13.txt
I-Locuteur_I_1-14.txt
I-Locuteur_I_1-15.txt
I-Locuteur_I_1-16.txt
I-Locuteur_I_1-17.txt
I-Locuteur_I_1-18.txt
I-Locuteur_I_1-19.txt
I-Locuteur_I_1-20.txt
I-Locuteur_I_1-21.txt
I-Locuteur_I_1-22.txt
I-Locuteur_I_1-23.txt
I-Locuteur_I_1-24.txt
I-Locuteur_I_1-25.txt
I-Locuteur_I_1-26.txt
I-Locuteur_I_1-27.txt
I-Locuteur_I_1-28.txt
I-Locuteur_I_1-29.txt
I-Locuteur_I_1-30.txt
I-Locuteur_I_1-31.txt
I-Locuteur_I_1-32.txt
I-Locuteur_I_1-33.txt
I-Locuteur_I_1-34.txt
I-Locuteur_I_1-35.txt
I-Locuteur_I_1-36.txt
I-Locuteur_I_1-37.txt
I-Locuteur_I_1-38.txt
I-Locuteur_I_1-39.txt
I-Locuteur_I_1-40.txt
I-Locuteur_I_1-41.txt
I-Locuteur_I_1-42.txt
I-Locuteur_I_1-43.txt
I-Locuteur_I_1-44.txt
I-Locuteur_I_1-45.txt
I-Locuteur_I_1-46.t

In [57]:
import pandas as pd

In [58]:
transcripts_df = pd.DataFrame.from_dict(all_transcripts)

In [68]:
transcripts_df

Unnamed: 0,session,locutor,file,phonem_transcript
0,I,Locuteur_I_1,1.txt,"[m, a, S, deux, m, i, z, E, R, u, s, i]"
1,I,Locuteur_I_1,2.txt,"[v, w, a, l, a, d, e, b, u, Z, i]"
2,I,Locuteur_I_1,3.txt,"[d, O, n, U~, p, deux, t, i, k, u]"
3,I,Locuteur_I_1,4.txt,"[i, l, a, d, y, g, u]"
4,I,Locuteur_I_1,5.txt,"[E, l, m, e, t, R, i, p, a]"
...,...,...,...,...
3803,II,Locuteur_II_8,234.txt,"[y, n, k, e, b, e, k, w, a, z, p, l, neuf, R, ..."
3804,II,Locuteur_II_8,235.txt,"[U~, k, u, deux, R, deux, e, U~, p, e, t, y, d..."
3805,II,Locuteur_II_8,236.txt,"[s, y, R, l, deux, z, i, N, g, S, a, k, U~, U~..."
3806,II,Locuteur_II_8,237.txt,"[a, Z, deux, n, a~, t, w, a, n, l, deux, neuf,..."


In [67]:
all_phonems = []
for transcript in transcripts:
    phonem_transcript = transcript["phonem_transcript"]
    

[{'session': 'I',
  'locutor': 'Locuteur_I_1',
  'file': '1.txt',
  'phonem_transcript': ['m',
   'a',
   'S',
   'deux',
   'm',
   'i',
   'z',
   'E',
   'R',
   'u',
   's',
   'i']},
 {'session': 'I',
  'locutor': 'Locuteur_I_1',
  'file': '2.txt',
  'phonem_transcript': ['v',
   'w',
   'a',
   'l',
   'a',
   'd',
   'e',
   'b',
   'u',
   'Z',
   'i']},
 {'session': 'I',
  'locutor': 'Locuteur_I_1',
  'file': '3.txt',
  'phonem_transcript': ['d', 'O', 'n', 'U~', 'p', 'deux', 't', 'i', 'k', 'u']},
 {'session': 'I',
  'locutor': 'Locuteur_I_1',
  'file': '4.txt',
  'phonem_transcript': ['i', 'l', 'a', 'd', 'y', 'g', 'u']},
 {'session': 'I',
  'locutor': 'Locuteur_I_1',
  'file': '5.txt',
  'phonem_transcript': ['E', 'l', 'm', 'e', 't', 'R', 'i', 'p', 'a']},
 {'session': 'I',
  'locutor': 'Locuteur_I_1',
  'file': '6.txt',
  'phonem_transcript': ['y',
   'n',
   'R',
   'e',
   'p',
   'o~',
   's',
   'a~',
   'b',
   'i',
   'g',
   'y']},
 {'session': 'I',
  'locutor': 'Locute

- https://keras.io/examples/audio/ctc_asr/#:~:text=CTC%20is%20an%20algorithm%20used,transcript%20align%20to%20the%20audio

# 