In [2]:
import os
import csv
import librosa
import pandas as pd
import IPython.display as ipd
from pydub.utils import mediainfo

# Make TIMIT Great Again

In [3]:
def load_phn_file(phn_path):
    with open(phn_path, 'r') as file:
        phonemes = [line.strip().split() for line in file]
        return [(int(start), int(end), phoneme) for start, end, phoneme in phonemes]

def load_wrd_file(wrd_path):
    with open(wrd_path, 'r') as file:
        words = [line.strip().split() for line in file]
        return [(int(start), int(end), word) for start, end, word in words]

def load_txt_file(txt_path):
    with open(txt_path, 'r') as file:
        text = file.read().strip()
        text = ''.join(filter(lambda x: not x.isdigit(), text)).strip()
        return text

def extract_phonemes_for_word(word_start, word_end, phn_data):
    return [phoneme for start, end, phoneme in phn_data if start >= word_start and end <= word_end]

def process_timit_sentence(audio_path, phn_path, wrd_path, txt_path):
    phn_data = load_phn_file(phn_path)
    wrd_data = load_wrd_file(wrd_path)
    sentence = load_txt_file(txt_path)

    word_phonemes = []
    for word_start, word_end, word in wrd_data:
        phonemes = extract_phonemes_for_word(word_start, word_end, phn_data)
        word_phonemes.append(phonemes)

    return audio_path, sentence, word_phonemes

def process_timit_dataset(timit_dir, csv_output_path):
    data = []
    for root, _, files in os.walk(timit_dir):
        for file in files:
            if file.lower().endswith('.wav.wav'):
                audio_path = os.path.join(root, file)
                base_path = audio_path.replace('.WAV.wav', '')

                phn_path = base_path + '.PHN'
                wrd_path = base_path + '.WRD'
                txt_path = base_path + '.TXT'

                if os.path.exists(phn_path) and os.path.exists(wrd_path) and os.path.exists(txt_path):
                    print(f"Processing: {audio_path}")
                    sentence_audio_path, sentence, word_phonemes = process_timit_sentence(
                        audio_path, phn_path, wrd_path, txt_path)
                    data.append([sentence_audio_path, sentence, word_phonemes])

    with open(csv_output_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['audio_path', 'sentence', 'word_phonemes'])
        writer.writerows(data)

timit_dir = '/home/morph/Desktop/FINAL/TIMIT'
csv_output_path = '/home/morph/Desktop/FINAL/TIMIToutput/timit_dataset.csv'

process_timit_dataset(timit_dir, csv_output_path)

print("Обработка завершена. Датасет сохранен в", csv_output_path)


Processing: /home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW0/SI1271.WAV.wav
Processing: /home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW0/SX326.WAV.wav
Processing: /home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW0/SX146.WAV.wav
Processing: /home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW0/SI1406.WAV.wav
Processing: /home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW0/SA2.WAV.wav
Processing: /home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW0/SI2036.WAV.wav
Processing: /home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW0/SX416.WAV.wav
Processing: /home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW0/SX236.WAV.wav
Processing: /home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW0/SX56.WAV.wav
Processing: /home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW0/SA1.WAV.wav
Processing: /home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/MMRP0/SX54.WAV.wav
Processing: /home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/MMRP0/SX144.WAV.wav
Processing: /home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/MMRP0/SA2.WAV.wav
Processing: /home/morph/Desktop/FINAL/TIMIT

In [4]:
df = pd.read_csv('/home/morph/Desktop/FINAL/TIMIToutput/timit_dataset.csv')
df.head()

Unnamed: 0,audio_path,sentence,word_phonemes
0,/home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW...,This has been attributed to helium film flow i...,"[['dh', 'ih', 's'], ['hh', 'eh', 'z'], ['bcl',..."
1,/home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW...,Steve collects rare and novel coins.,"[['s', 'tcl', 't', 'iy', 'v'], ['kcl', 'k', 'a..."
2,/home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW...,Cyclical programs will never compile.,"[['s', 'ih', 'kcl', 'k', 'l', 'ix', 'kcl', 'k'..."
3,/home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW...,"Receiving no answer, they set the fire.","[['r', 'iy', 's', 'iy', 'v', 'ih', 'ng'], ['n'..."
4,/home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW...,Don't ask me to carry an oily rag like that.,"[['d', 'ow', 'n'], ['ae', 's', 'kcl'], ['m', '..."


In [5]:
def get_duration_in_minutes(audio_path):
    # Получаем информацию о медиапотоке (длительность в секундах)
    audio_info = mediainfo(audio_path)
    duration_seconds = float(audio_info['duration'])
    return duration_seconds / 60  # переводим в минуты

df['duration'] = df['audio_path'].apply(get_duration_in_minutes)

total_duration = df['duration'].sum()

print(f'Общая длительность всего датасета: {total_duration:.2f} минут')

Общая длительность всего датасета: 323.00 минут


In [6]:
print(df['sentence'][122])
print(df['word_phonemes'][122])

The kid has no manners, boys.
[['dh', 'eh'], ['kcl', 'k', 'ih', 'dcl', 'd'], ['hv', 'ae', 's'], ['n', 'ow'], ['m', 'ae', 'n', 'er', 's'], ['bcl', 'b', 'oy', 's']]


In [7]:
ipd.Audio(df['audio_path'][122])

In [8]:
TIMIT_TO_GRUUT = {
    'iy': 'i',     # как в "beat"
    'ih': 'ɪ',     # как в "bit"
    'eh': 'ɛ',     # как в "bet"
    'ae': 'æ',     # как в "bat"
    'uh': 'ʊ',     # как в "book"
    'ah': 'ʌ',     # как в "but"
    'uw': 'u',     # как в "boot"
    'aa': 'ɑ',     # как в "father"
    'ey': 'e',     # как в "bait"
    'ay': 'aɪ',    # как в "bite"
    'oy': 'ɔɪ',    # как в "boy"
    'aw': 'aʊ',    # как в "cow"
    'ow': 'o',     # как в "boat"
    'l': 'l',      # как в "lamp"
    'r': 'ɹ',      # как в "run"
    'y': 'j',      # как в "yes"
    'w': 'w',      # как в "wet"
    'er': 'ɚ',     # как в "butter"
    'm': 'm',      # как в "man"
    'n': 'n',      # как в "no"
    'ng': 'ŋ',     # как в "song"
    'ch': 't͡ʃ',   # как в "chop"
    'jh': 'd͡ʒ',   # как в "judge"
    'dh': 'ð',     # как в "this"
    'b': 'b',      # как в "bat"
    'd': 'd',      # как в "dog"
    'dx': 'ɾ',     # как в "butter" (flap)
    'g': 'ɡ',      # как в "go"
    'p': 'p',      # как в "pat"
    't': 't',      # как в "top"
    'k': 'k',      # как в "cat"
    'z': 'z',      # как в "zoo"
    'v': 'v',      # как в "van"
    'f': 'f',      # как в "fan"
    'th': 'θ',     # как в "think"
    's': 's',      # как в "see"
    'sh': 'ʃ',     # как в "shoe"
    'hh': 'h',     # как в "hat"
    'ix': 'ɪ',     # как в безударном слоге
    'ax': 'ə',     # как в "sofa"
    'ux': 'u',     # альтернативный "uw"
    'ao': 'ɔ',     # как в "thought"
    'el': 'l̩',    # слоговой "l"
    'axr': 'ɚ',    # как в "butter"
    'em': 'm̩',    # слоговой "m"
    'nx': 'n̩',    # слоговой "n"
    'en': 'n̩',    # альтернативный "nx"
    'eng': 'ŋ̩',   # слоговой "ng"
    'zh': 'ʒ',    # как в "measure"
    'hv': 'h',     # альтернативный "hh"
    'ax-h': 'ə',   # безударный звук

    # Пропуски и шумы
    'pcl': '',   # plosive closure
    'tcl': '',   # plosive closure
    'kcl': '',   # plosive closure
    'qcl': '',   # plosive closure
    'bcl': '',   # plosive closure
    'dcl': '',   # plosive closure
    'gcl': '',   # plosive closure
    '#h': '',    # шум
    'pau': '',   # пауза
    'epi': '',   # эпентетический звук (вставной)
    'h#': '',    # тишина
    'q': ''      # гортанная смычка
}


def convert_timit_to_gruut(word_phonemes):
    converted_phonemes = []
    for word in word_phonemes:
        converted_word = ''.join([TIMIT_TO_GRUUT.get(phn, '') for phn in word])
        converted_phonemes.append(converted_word)
    return converted_phonemes

df['gruut_phonemes'] = df['word_phonemes'].apply(eval).apply(convert_timit_to_gruut)


df.head()

Unnamed: 0,audio_path,sentence,word_phonemes,duration,gruut_phonemes
0,/home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW...,This has been attributed to helium film flow i...,"[['dh', 'ih', 's'], ['hh', 'eh', 'z'], ['bcl',...",0.081067,"[ðɪs, hɛz, bɛn, ɪt͡ʃɪbuɾɪ, tu, hɪliɪm, fɪlm, f..."
1,/home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW...,Steve collects rare and novel coins.,"[['s', 'tcl', 't', 'iy', 'v'], ['kcl', 'k', 'a...",0.047894,"[stiv, kəlɛs, ɹɛɹ, ɪn, nɑvl̩, kɔɪns]"
2,/home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW...,Cyclical programs will never compile.,"[['s', 'ih', 'kcl', 'k', 'l', 'ix', 'kcl', 'k'...",0.043841,"[sɪklɪkl̩, pɹoɡɹæmz, wl̩, nɛvɚ, kəmpɑl]"
3,/home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW...,"Receiving no answer, they set the fire.","[['r', 'iy', 's', 'iy', 'v', 'ih', 'ng'], ['n'...",0.042347,"[ɹisivɪŋ, no, ænsɚ, ðe, sɛ, ðɪ, faɪɚ]"
4,/home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW...,Don't ask me to carry an oily rag like that.,"[['d', 'ow', 'n'], ['ae', 's', 'kcl'], ['m', '...",0.044588,"[don, æs, mɪ, ɾɪ, kɛɹi, ɪn, ɔli, ɹæɡ, laɪ, ðæ]"


In [9]:
TIMIT_TO_PHONEMIZER = {
    'iy': 'i',     # как в "beat"
    'ih': 'ɪ',     # как в "bit"
    'eh': 'ɛ',     # как в "bet"
    'ae': 'æ',     # как в "bat"
    'uh': 'ʊ',     # как в "book"
    'ah': 'ʌ',     # как в "but"
    'uw': 'u',     # как в "boot"
    'aa': 'ɑ',     # как в "father"
    'ey': 'e',     # как в "bait"
    'ay': 'aɪ',    # как в "bite"
    'oy': 'ɔɪ',    # как в "boy"
    'aw': 'aʊ',    # как в "cow"
    'ow': 'o',     # как в "boat"
    'l': 'l',      # как в "lamp"
    'r': 'ɹ',      # как в "run"
    'y': 'j',      # как в "yes"
    'w': 'w',      # как в "wet"
    'er': 'ɚ',     # как в "butter"
    'm': 'm',      # как в "man"
    'n': 'n',      # как в "no"
    'ng': 'ŋ',     # как в "song"
    'ch': 'tʃ',    # как в "chop"
    'jh': 'dʒ',    # как в "judge"
    'dh': 'ð',     # как в "this"
    'b': 'b',      # как в "bat"
    'd': 'd',      # как в "dog"
    'dx': 'ɾ',     # как в "butter" (flap)
    'g': 'ɡ',      # как в "go"
    'p': 'p',      # как в "pat"
    't': 't',      # как в "top"
    'k': 'k',      # как в "cat"
    'z': 'z',      # как в "zoo"
    'v': 'v',      # как в "van"
    'f': 'f',      # как в "fan"
    'th': 'θ',     # как в "think"
    's': 's',      # как в "see"
    'sh': 'ʃ',     # как в "shoe"
    'hh': 'h',     # как в "hat"
    'ix': 'ɪ',     # как в безударном слоге
    'ax': 'ə',     # как в "sofa"
    'ux': 'u',     # альтернативный "uw"
    'ao': 'ɔ',     # как в "thought"
    'el': 'l̩',    # слоговой "l"
    'axr': 'ɚ',    # как в "butter"
    'em': 'm̩',    # слоговой "m"
    'nx': 'n̩',    # слоговой "n"
    'en': 'n̩',    # альтернативный "nx"
    'eng': 'ŋ̩',   # слоговой "ng"
    'zh': 'ʒ',    # как в "measure"
    'hv': 'h',     # альтернативный "hh"
    'ax-h': 'ə',   # безударный звук
    'pcl': '',     # plosive closure
    'tcl': '',     # plosive closure
    'kcl': '',     # plosive closure
    'qcl': '',     # plosive closure
    'bcl': '',     # plosive closure
    'dcl': '',     # plosive closure
    'gcl': '',     # plosive closure
    '#h': '',      # шум
    'pau': ' ',    # пауза
    'epi': '',     # эпентетический звук (вставной)
    'h#': ' ',     # тишина
    'q': 'ʔ'       # гортанная смычка
}

def convert_timit_to_phonemizer(word_phonemes):
    converted_phonemes = []
    for word in word_phonemes:
        converted_word = ''.join([TIMIT_TO_PHONEMIZER.get(phn, '') for phn in word])
        converted_phonemes.append(converted_word)
    return converted_phonemes

df['phonemizer_phonemes'] = df['word_phonemes'].apply(eval).apply(convert_timit_to_phonemizer)


In [10]:
for i in range(5):
    print(df['sentence'][i])
    print(df['word_phonemes'][i])
    print(df['gruut_phonemes'][i])
    print(df['phonemizer_phonemes'][i])
    print('')

This has been attributed to helium film flow in the vapor pressure thermometer.
[['dh', 'ih', 's'], ['hh', 'eh', 'z'], ['bcl', 'b', 'eh', 'n'], ['ih', 'tcl', 'ch', 'ih', 'bcl', 'b', 'ux', 'dx', 'ih', 'dcl'], ['t', 'ux'], ['hv', 'ih', 'l', 'iy', 'ix', 'm'], ['f', 'ih', 'l', 'm'], ['f', 'l', 'ow'], ['ax', 'n'], ['dh', 'ix'], ['v', 'ey', 'pcl', 'p', 'ax'], ['pcl', 'p', 'r', 'eh', 'sh', 'er'], ['th', 'ax', 'm', 'aa', 'm', 'ix', 'dx', 'axr']]
['ðɪs', 'hɛz', 'bɛn', 'ɪt͡ʃɪbuɾɪ', 'tu', 'hɪliɪm', 'fɪlm', 'flo', 'ən', 'ðɪ', 'vepə', 'pɹɛʃɚ', 'θəmɑmɪɾɚ']
['ðɪs', 'hɛz', 'bɛn', 'ɪtʃɪbuɾɪ', 'tu', 'hɪliɪm', 'fɪlm', 'flo', 'ən', 'ðɪ', 'vepə', 'pɹɛʃɚ', 'θəmɑmɪɾɚ']

Steve collects rare and novel coins.
[['s', 'tcl', 't', 'iy', 'v'], ['kcl', 'k', 'ax', 'l', 'eh', 'kcl', 's'], ['r', 'eh', 'r'], ['q', 'ix', 'n'], ['n', 'aa', 'v', 'el'], ['kcl', 'k', 'oy', 'n', 's']]
['stiv', 'kəlɛs', 'ɹɛɹ', 'ɪn', 'nɑvl̩', 'kɔɪns']
['stiv', 'kəlɛs', 'ɹɛɹ', 'ʔɪn', 'nɑvl̩', 'kɔɪns']

Cyclical programs will never compile.
[['s'

In [11]:
df.to_csv('/home/morph/Desktop/FINAL/TIMIToutput/timit_dataset.csv', index=False)

In [12]:
df

Unnamed: 0,audio_path,sentence,word_phonemes,duration,gruut_phonemes,phonemizer_phonemes
0,/home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW...,This has been attributed to helium film flow i...,"[['dh', 'ih', 's'], ['hh', 'eh', 'z'], ['bcl',...",0.081067,"[ðɪs, hɛz, bɛn, ɪt͡ʃɪbuɾɪ, tu, hɪliɪm, fɪlm, f...","[ðɪs, hɛz, bɛn, ɪtʃɪbuɾɪ, tu, hɪliɪm, fɪlm, fl..."
1,/home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW...,Steve collects rare and novel coins.,"[['s', 'tcl', 't', 'iy', 'v'], ['kcl', 'k', 'a...",0.047894,"[stiv, kəlɛs, ɹɛɹ, ɪn, nɑvl̩, kɔɪns]","[stiv, kəlɛs, ɹɛɹ, ʔɪn, nɑvl̩, kɔɪns]"
2,/home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW...,Cyclical programs will never compile.,"[['s', 'ih', 'kcl', 'k', 'l', 'ix', 'kcl', 'k'...",0.043841,"[sɪklɪkl̩, pɹoɡɹæmz, wl̩, nɛvɚ, kəmpɑl]","[sɪklɪkl̩, pɹoɡɹæmz, wl̩, nɛvɚ, kəmpɑl]"
3,/home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW...,"Receiving no answer, they set the fire.","[['r', 'iy', 's', 'iy', 'v', 'ih', 'ng'], ['n'...",0.042347,"[ɹisivɪŋ, no, ænsɚ, ðe, sɛ, ðɪ, faɪɚ]","[ɹisivɪŋ, no, ænsɚ, ðe, sɛʔ, ðɪ, faɪɚ]"
4,/home/morph/Desktop/FINAL/TIMIT/TRAIN/DR1/FDAW...,Don't ask me to carry an oily rag like that.,"[['d', 'ow', 'n'], ['ae', 's', 'kcl'], ['m', '...",0.044588,"[don, æs, mɪ, ɾɪ, kɛɹi, ɪn, ɔli, ɹæɡ, laɪ, ðæ]","[don, æs, mɪ, ɾɪ, kɛɹi, ɪn, ʔɔli, ɹæɡ, laɪ, ðæ]"
...,...,...,...,...,...,...
6295,/home/morph/Desktop/FINAL/TIMIT/TEST/DR4/FSEM0...,It is awe-inspiring.,"[['q', 'ix', 'dx'], ['ix', 'z'], ['q', 'aa'], ...",0.044588,"[ɪɾ, ɪz, ɑ, ɪnspaɪɚɪŋ]","[ʔɪɾ, ɪz, ʔɑ, ɪnspaɪɚɪŋ]"
6296,/home/morph/Desktop/FINAL/TIMIT/TEST/DR4/FSEM0...,Rob sat by the pond and sketched the stray geese.,"[['r', 'aa', 'bcl', 'b'], ['s', 'eh', 'tcl'], ...",0.051841,"[ɹɑb, sɛ, baɪ, ðə, pɑn, ən, skɛt͡ʃ, ðɪ, stɹe, ...","[ɹɑb, sɛ, baɪ, ðə, pɑn, ən, skɛtʃ, ðɪ, stɹe, ɡis]"
6297,/home/morph/Desktop/FINAL/TIMIT/TEST/DR4/FSEM0...,At right is a casual style in a crushed unline...,"[['q', 'ae', 'q'], ['r', 'ay', 'q'], ['ix', 'z...",0.062187,"[æ, ɹaɪ, ɪz, ɪ, kæʒl̩, staɪl, ən, ə, kɹʌʃt, ʌn...","[ʔæʔ, ɹaɪʔ, ɪz, ɪ, kæʒl̩, staɪl, ən, ə, kɹʌʃt,..."
6298,/home/morph/Desktop/FINAL/TIMIT/TEST/DR4/FSEM0...,Beg that guard for one gallon of gas.,"[['b', 'ey', 'gcl'], ['dh', 'ae', 'tcl'], ['g'...",0.050347,"[be, ðæ, ɡɑɹ, fɚ, wʌn, ɡælən, ʌv, ɡæs]","[be, ðæ, ɡɑɹ, fɚ, wʌn, ɡælən, ʌv, ɡæs]"
