# Run IBM ASR

In [188]:
import stt.ibm.stt as ibm_stt
import tools.txtgrid_master.TextGrid_Master as tgm
import librosa
import soundfile as sf
import json
import pandas as pd
import numpy as np
from collections import defaultdict

In [190]:
wav_file = 'samples/325/325_Primary_34-01.wav'
wav_16_file = 'samples/325/325_Primary_34-01_16k.wav'
wav_44_file = 'samples/325/325_Primary_34-01_44k.wav'
json_file = 'samples/325/325_Primary_34-01.json'
txtgrid_file = 'samples/325/325_Primary_34-01.TextGrid'

In [75]:
#Get duration
dur_in_secs = librosa.get_duration(filename=wav_16_file)

In [7]:
#Convert file to 16k mono
y, sr = librosa.load(wav_file, sr=16000, mono=True)
sf.write(wav_16_file,y,sr)

In [None]:
#Run ibm asr
#model_str could be 'Multimedia' or ['BroadbandModel']
results = ibm_stt.stt_audio_file(wav_16_file,model_str='BroadbandModel')
with open(json_file,'w') as fjson:
    json.dump(results, fjson)

In [9]:
#Load results from json
with open(json_file) as fjson:
    results = json.load(fjson)

In [11]:
def get_speakers_number(results):
    if isinstance(results, list):
        results = results[0]
    return len(set([x['speaker'] for x in results['speaker_labels']]))

In [12]:
get_speakers_number(results)

3

In [24]:
df_spkrs = pd.DataFrame.from_records(results[0]['speaker_labels'])

In [43]:
timestamps=[]
_ = [timestamps.extend(x['alternatives'][0]['timestamps']) for x in results[0]['results']]
df_words = pd.DataFrame.from_records(timestamps,columns=['word','from','to'])

In [47]:
df_results = pd.merge(df_spkrs, df_words, on=['from','to'])

In [82]:
n_spkrs = df_results.speaker.unique().shape[0]
tiersDict = {}
for spk in df_results.speaker.unique():
    tierName = 'spk{}'.format(spk)
    df_results_spk = df_results[df_results.speaker==spk]
    stTime, endTime, labels = [a.squeeze() for a in np.split(df_results_spk[['from','to','word']].values,3,1)]
    tiersDict[tierName] = (list(stTime), list(endTime), list(labels))    

In [83]:
tgm.WriteTxtGrdFromDict(txtgrid_file,tiersDict,0,dur_in_secs,sFilGab='')

# Kaldi ASR

In [1]:
import tools.txtgrid_master.TextGrid_Master as tgm
import librosa
import soundfile as sf
import tools.comm_asr_from_txtgrid as cm
from importlib import reload
reload(cm)

<module 'tools.comm_asr_from_txtgrid' from '/media/Windows/root/AusKidTalkv2/tools/comm_asr_from_txtgrid.py'>

In [6]:
spkrID = '378'
ibm_txtgrid  = '/media/Windows/root/AusKidTalkv2/samples/children/'+spkrID+'/primary_16b_task1_'+spkrID+'_ibm.TextGrid'
init_txtgrid = '/media/Windows/root/AusKidTalkv2/samples/children/'+spkrID+'/primary_16b_task1_'+spkrID+'.TextGrid'
m_txtgrid = '/media/Windows/root/AusKidTalkv2/samples/children/'+spkrID+'/primary_16b_task1_'+spkrID+'_m.TextGrid' #merge ibm+init
wav_44 = '/media/Windows/root/AusKidTalkv2/samples/children/'+spkrID+'/primary_16b_task1_'+spkrID+'.wav'
wav_16 = '/media/Windows/root/AusKidTalkv2/samples/children/'+spkrID+'/primary_16b_task1_'+spkrID+'_16k.wav'

In [None]:
#Convert from 44k to 16 k
y, sr = librosa.load(wav_44, sr=16000, mono=True)
sf.write(wav_16,y,sr)

Note that textgrid from ibm output could be on transcript or word levels

In [27]:
#Merge ibm txtgrid
tgm.MergeTxtGrids([ibm_txtgrid, init_txtgrid], sOutputFile=m_txtgrid, sWavFile=wav_44)

AssertionError: Either End times or Start Times of tier 0-s1 not in order

TODO:
- relax the time interval boundries of the ibm before kaldi
- do kaldi 4 times:
-- 1st on the init intervals
-- 2nd on the intervals of the 3 text grids of ibm
-- if intervals from init not overlabed with any interval of the 3 ibm, copy it to all of them, could be marked with **
-- Note *RELAX THE TIME BOUNDRY OF IBM*

In [11]:
#Merged
data_I = cm.get_valid_data(init_txtgrid)
data_s0_m = cm.get_valid_data(ibm_txtgrid, sPromptTier='s0',offset=0.5,bMerge=True)
data_s1_m = cm.get_valid_data(ibm_txtgrid, sPromptTier='s1', offset=0.5,bMerge=True)
data_s2_m = cm.get_valid_data(ibm_txtgrid, sPromptTier='s2', offset=0.5,bMerge=True)

In [19]:
for r in data_I.iterrows():
    if (data_s0_m[((data_s0_m.start_time > r[1].start_time) & (data_s0_m.start_time < r[1].end_time)) | 
                 ((data_s0_m.start_time < r[1].start_time) & (data_s0_m.end_time > r[1].start_time))].size == 0 and
       data_s1_m[((data_s1_m.start_time > r[1].start_time) & (data_s1_m.start_time < r[1].end_time)) | 
                 ((data_s1_m.start_time < r[1].start_time) & (data_s1_m.end_time > r[1].start_time))].size == 0 and
       data_s2_m[((data_s2_m.start_time > r[1].start_time) & (data_s2_m.start_time < r[1].end_time)) | 
                 ((data_s2_m.start_time < r[1].start_time) & (data_s2_m.end_time > r[1].start_time))].size == 0):
        data_s0_m.loc[-1] = r[1]
        data_s1_m.loc[-1] = r[1]
        data_s2_m.loc[-1] = r[1]
        data_s0_m.index = data_s0_m.index+1
        data_s1_m.index = data_s1_m.index+1
        data_s2_m.index = data_s2_m.index+1
        #print(r[1].label)
data_s0_m.sort_values('start_time',inplace=True)
data_s1_m.sort_values('start_time',inplace=True)
data_s2_m.sort_values('start_time',inplace=True)

In [5]:
data_s0 = cm.get_valid_data(ibm_txtgrid, sPromptTier='s0',offset=0.5,bMerge=False)
data_s1 = cm.get_valid_data(ibm_txtgrid, sPromptTier='s1', offset=0.5,bMerge=False)
data_s2 = cm.get_valid_data(ibm_txtgrid, sPromptTier='s2', offset=0.5,bMerge=False)

In [None]:
lang = 'en-AU'
taskID = 'task1'
resDir = '/media/Windows/root/AusKidTalkv2/output/init_'+spkrID
asr_engine='kaldi'
kaldi_model='model5'
cm.process_data(sWaveFile=wav_16,data=data_I, lang= lang, spkr_ID=spkrID,rcrd_ID=taskID, out_dir=resDir, asr_engine=asr_engine, forced_upload=False, kaldi_model=kaldi_model)

In [None]:
lang = 'en-AU'
taskID = 'task1'
resDir = '/media/Windows/root/AusKidTalkv2/output/s0_'+spkrID
asr_engine='kaldi'
kaldi_model='model5'
cm.process_data(sWaveFile=wav_16,data=data_s0_m, lang= lang, spkr_ID=spkrID,rcrd_ID=taskID, out_dir=resDir, asr_engine=asr_engine, forced_upload=False, kaldi_model=kaldi_model)

In [None]:
lang = 'en-AU'
taskID = 'task1'
resDir = '/media/Windows/root/AusKidTalkv2/output/s1_'+spkrID
asr_engine='kaldi'
kaldi_model='model5'
cm.process_data(sWaveFile=wav_16,data=data_s1_m, lang= lang, spkr_ID=spkrID,rcrd_ID=taskID, out_dir=resDir, asr_engine=asr_engine, forced_upload=False, kaldi_model=kaldi_model)

In [None]:
lang = 'en-AU'
taskID = 'task1'
resDir = '/media/Windows/root/AusKidTalkv2/output/s2_'+spkrID
asr_engine='kaldi'
kaldi_model='model5'
cm.process_data(sWaveFile=wav_16,data=data_s2_m, lang= lang, spkr_ID=spkrID,rcrd_ID=taskID, out_dir=resDir, asr_engine=asr_engine, forced_upload=False, kaldi_model=kaldi_model)

In [25]:
import shutil
s0_txtgrid = '/media/Windows/root/AusKidTalkv2/output/s0_{0}/{0}_{1}_{2}_{3}_concat.TextGrid'.format(spkrID, taskID, asr_engine, lang)
s1_txtgrid = '/media/Windows/root/AusKidTalkv2/output/s1_{0}/{0}_{1}_{2}_{3}_concat.TextGrid'.format(spkrID, taskID, asr_engine, lang)
s2_txtgrid = '/media/Windows/root/AusKidTalkv2/output/s2_{0}/{0}_{1}_{2}_{3}_concat.TextGrid'.format(spkrID, taskID, asr_engine, lang)
s0_txtgrid_dest = '/media/Windows/root/AusKidTalkv2/output/{0}_s0_{1}_{2}.TextGrid'.format(spkrID, taskID, asr_engine)
s1_txtgrid_dest = '/media/Windows/root/AusKidTalkv2/output/{0}_s1_{1}_{2}.TextGrid'.format(spkrID, taskID, asr_engine)
s2_txtgrid_dest = '/media/Windows/root/AusKidTalkv2/output/{0}_s2_{1}_{2}.TextGrid'.format(spkrID, taskID, asr_engine)
shutil.copyfile(s0_txtgrid,s0_txtgrid_dest)
shutil.copyfile(s1_txtgrid,s1_txtgrid_dest)
shutil.copyfile(s2_txtgrid,s2_txtgrid_dest)

'/media/Windows/root/AusKidTalkv2/output/328_s2_task1_kaldi.TextGrid'

In [7]:
#Merge s0, s1, s2 in one file
taskID = 'task1'
asr_engine='kaldi'
lang = 'en-AU'
s0_txtgrid = '/media/Windows/root/AusKidTalkv2/samples/children/output/s0_{0}/{0}_{1}_{2}_{3}_concat.TextGrid'.format(spkrID, taskID, asr_engine, lang)
s1_txtgrid = '/media/Windows/root/AusKidTalkv2/samples/children/output/s1_{0}/{0}_{1}_{2}_{3}_concat.TextGrid'.format(spkrID, taskID, asr_engine, lang)
s2_txtgrid = '/media/Windows/root/AusKidTalkv2/samples/children/output/s2_{0}/{0}_{1}_{2}_{3}_concat.TextGrid'.format(spkrID, taskID, asr_engine, lang)
dest_file = '/media/Windows/root/AusKidTalkv2/samples/children/output/{0}_{1}_{2}_fixed.TextGrid'.format(spkrID, taskID, asr_engine)
tgm.MergeTxtGrids([s0_txtgrid,s1_txtgrid,s2_txtgrid],dest_file,sWavFile=wav_44, aSlctdTiers=[{'kaldi-words':'s0-kaldi-words'},{'kaldi-words':'s1-kaldi-words'},{'kaldi-words':'s2-kaldi-words'}])

In [25]:
s0_txtgrid = '/media/Windows/root/AusKidTalkv2/output/s0_{0}/{0}_{1}_{2}_{3}_concat.TextGrid'.format(spkrID, taskID, asr_engine, lang)
s1_txtgrid = '/media/Windows/root/AusKidTalkv2/output/s1_{0}/{0}_{1}_{2}_{3}_concat.TextGrid'.format(spkrID, taskID, asr_engine, lang)
s2_txtgrid = '/media/Windows/root/AusKidTalkv2/output/s2_{0}/{0}_{1}_{2}_{3}_concat.TextGrid'.format(spkrID, taskID, asr_engine, lang)
init_txtgrid = '/media/Windows/root/AusKidTalkv2/output/init_{0}/{0}_{1}_{2}_{3}_concat.TextGrid'.format(spkrID, taskID, asr_engine, lang)
s0_dTiers = tgm.ParseTxtGrd(s0_txtgrid)
s1_dTiers = tgm.ParseTxtGrd(s1_txtgrid)
s2_dTiers = tgm.ParseTxtGrd(s2_txtgrid)
init_dTiers = tgm.ParseTxtGrd(init_txtgrid)
import pandas as pd
df_s0 = pd.DataFrame.from_dict({'st':s0_dTiers['kaldi-words'][0],'et':s0_dTiers['kaldi-words'][1],'label':s0_dTiers['kaldi-words'][2]})
df_s1 = pd.DataFrame.from_dict({'st':s1_dTiers['kaldi-words'][0],'et':s1_dTiers['kaldi-words'][1],'label':s1_dTiers['kaldi-words'][2]})
df_s2 = pd.DataFrame.from_dict({'st':s2_dTiers['kaldi-words'][0],'et':s2_dTiers['kaldi-words'][1],'label':s2_dTiers['kaldi-words'][2]})
df_init = pd.DataFrame.from_dict({'st':init_dTiers['kaldi-words'][0],'et':init_dTiers['kaldi-words'][1],'label':init_dTiers['kaldi-words'][2]})

In [6]:
dTiers = tgm.ParseTxtGrd(s0_txtgrid)

In [8]:
{k:k for k in dTiers.keys()}

{'kaldi-words': 'kaldi-words'}

In [42]:
for r in data_I.iterrows():
    if (data_s0_m[((data_s0_m.start_time > r[1].start_time) & (data_s0_m.start_time < r[1].end_time)) | 
                 ((data_s0_m.start_time < r[1].start_time) & (data_s0_m.end_time > r[1].start_time))].size == 0 and
       data_s1_m[((data_s1_m.start_time > r[1].start_time) & (data_s1_m.start_time < r[1].end_time)) | 
                 ((data_s1_m.start_time < r[1].start_time) & (data_s1_m.end_time > r[1].start_time))].size == 0 and
       data_s2_m[((data_s2_m.start_time > r[1].start_time) & (data_s2_m.start_time < r[1].end_time)) | 
                 ((data_s2_m.start_time < r[1].start_time) & (data_s2_m.end_time > r[1].start_time))].size == 0):
        print(r[1].label)

In [34]:
data_s1_m

Unnamed: 0,start_time,end_time,label
1,20.34,24.89,sure there is not working seventeen months
3,33.10,35.15,a dark
5,36.16,38.18,dark
7,39.65,41.62,fish
9,42.35,44.42,car
...,...,...,...
285,1083.10,1085.25,to it
287,1086.91,1089.23,three X.
289,1090.24,1092.57,before it
291,1094.78,1097.09,on the clock


In [35]:
data_I

Unnamed: 0,start_time,end_time,label
1,28.6455,39.206,duck
3,39.2325,42.608,fish
5,42.6255,45.034,car
7,45.0600,48.729,key
9,48.7460,51.770,boat
...,...,...,...
253,1086.7335,1089.131,Three eggs
255,1090.3165,1093.537,Four eggs
257,1094.6565,1097.208,One o clock
259,1098.3300,1100.705,Two o clock


In [21]:
1.19+0.24

1.43

In [20]:
data_s1_m[(data_s1_m.start_time>2060) & (data_s1_m.start_time<2080)]

Unnamed: 0,start_time,end_time,label
401,2066.5,2068.91,it kept up
403,2073.56,2076.32,branch issues
405,2079.66,2081.83,leave


In [7]:
from os.path import join
import glob
import tools.txtgrid_master.TextGrid_Master as tgm
resDir = '/media/Windows/root/AusKidTalkv2/samples/children/output/s1_343/'
taskID = 'task1'
asr_engine = 'kaldi'
lang= 'en-AU'
results_pattern = join(resDir,'{0}_{1}_*_{2}_{3}_relative.TextGrid'.format(spkrID, taskID, asr_engine, lang))
lTxtGrids = glob.glob(results_pattern)
dConcatTiers = tgm.ConcatTxtGrids(lTxtGrids)

Get tierNames from texgrid...
Merging kaldi-words


In [9]:
import pandas as pd
d = pd.DataFrame.from_dict({'st':dConcatTiers['kaldi-words'][0],'et':dConcatTiers['kaldi-words'][1],'label':dConcatTiers['kaldi-words'][2]})

Unnamed: 0,st,et,label
0,28.35,29.21,sil
1,29.21,29.28,
2,29.28,30.11,snake
3,30.11,30.12,
4,30.12,30.72,sil
...,...,...,...
1579,2527.51,2527.54,o
1580,2527.54,2527.56,
1581,2527.56,2528.50,clock
1582,2528.50,2528.98,sil


In [14]:
d_st = d.sort_values('st')

In [27]:
import numpy as np
diff = d_st.et.values[0:d_st.shape[0]-1] - d_st.et.values[1:]
idx = np.where(diff > 0)[0]
idx = np.r_[idx,idx-1,idx+1]
idx.sort()

In [None]:
s_t = 0
e_t = 0
l = ''
for r in d_st.iterrows():
    if r.label != l:
        s_t

In [30]:
d_st.reset_index()

Unnamed: 0,index,st,et,label
0,0,232.23,232.87,sil
1,1,232.87,233.46,bead
2,2,233.45,234.01,sil
3,3,234.01,234.21,sil
4,4,302.93,303.62,ways
...,...,...,...,...
673,673,1587.47,1587.76,four
674,674,1587.76,1587.80,sil
675,675,1587.80,1588.61,o'clock
676,676,1588.61,1589.36,sil


In [26]:
np.r_[idx,idx-1].sort()

In [14]:
tgm.WriteTxtGrdFromDict('tmp.txtgrid', dConcatTiers_msil, 0, dConcatTiers_msil['{0}-words'.format(asr_engine)][1][-1])

AssertionError: Either End times or Start Times of tier kaldi-words not in order

In [13]:
dConcatTiers_msil = tgm.Merge_sil(dConcatTiers)

In [None]:
dConcatTiers_msil

In [16]:
import pandas as pd
d_msil = pd.DataFrame.from_dict({'st':dConcatTiers_msil['kaldi-words'][0],'et':dConcatTiers_msil['kaldi-words'][1],'label':dConcatTiers_msil['kaldi-words'][2]})

In [20]:
import numpy as np
d_st = d_msil.sort_values('st')
diff = d_st.et.values[0:d_st.shape[0]-1] - d_st.et.values[1:]
idx = np.where(diff > 0)[0]
idx = np.r_[idx,idx-1,idx+1,idx+2]
idx.sort()

In [21]:
d_st.loc[idx]

Unnamed: 0,st,et,label
318,563.81,563.86,
319,563.86,564.19,fifteen
320,563.9,564.05,if
321,564.05,564.08,sil
505,918.61,919.21,sil
506,919.2,919.43,no
507,919.21,919.22,sil
508,919.43,919.83,longer
582,1068.27,1070.09,sil
583,1069.93,1070.41,yeah


In [24]:
import tools.stt.kaldi.stt as k_stt
response, response_ph = k_stt.stt_audio_file('/media/Windows/root/AusKidTalkv2/samples/children/output/s1_343/343_task1_407.wav', model='model5')

In [30]:
response

Unnamed: 0,fileName,channel,startTime,dur,symb,conf
0,343_task1_407.wav,1,0.01,1.17,sil,0.74
1,343_task1_407.wav,1,1.19,0.24,the,0.56
2,343_task1_407.wav,1,1.42,0.98,sil,1.0


In [31]:
dTiers = {}
response = response[response.dur > 0]

In [33]:
response = response.drop_duplicates(subset='startTime', keep='last')

In [48]:
dTiers = cm.kaldi_words_to_dict(response)

In [49]:
dTiers_fixed = {}
for tierName in dTiers:
    df = pd.DataFrame.from_dict({'st':dTiers[tier][0],'et':dTiers[tier][1],'label':dTiers[tier][2]})
    for i in range(df.shape[0]-1):
        if df.loc[i].et > df.loc[i+1].st:
            df.loc[i,'et'] = df.loc[i+1].st
    dTiers_fixed[tierName] = [df.st.values, df.et.values, df.label.values]    

In [50]:
dTiers_fixed

{'kaldi-words': [array([0.01, 1.19, 1.42]),
  array([1.18, 1.42, 2.4 ]),
  array(['sil', 'the', 'sil'], dtype=object)]}

In [51]:
dTiers

{'kaldi-words': [array([0.01, 1.19, 1.42]),
  array([1.18, 1.43, 2.4 ]),
  array(['sil', 'the', 'sil'], dtype=object)]}

# Beep detection and Timestamps

In [191]:
#Get Beep times
config = 'scripts/beep.ini'
#Convert file to 44k mono and 16 bit for beep detection
y, sr = librosa.load(wav_file, sr=44000, mono=True)
sf.write(wav_44_file,y,sr)
lBeepTimes = init.GetBeepTimesML(config, wav_44_file)

100%|████████████████████████████████████████████████████████████████████████████████████▉| 393582/393583 [47:02<00:00, 139.43it/s]


In [192]:
lBeepTimes

array([  55.34,  703.93, 2486.5 , 2863.83, 3502.47])

# SQL

In [137]:
#MySQL database connection configuration
UserName = 'unsw'
Password = 'UNSWspeech'
HostIP = '184.168.98.156'
#DatabaseName = 'auskidtalk_prod'
DatabaseName = 'auskid_talk_prod_2'

In [138]:
import mysql.connector
connector = mysql.connector.connect(user=UserName, password=Password,
                              host=HostIP,
                              database=DatabaseName,buffered=True)
cursor = connector.cursor(buffered=True, dictionary=True)
query = ("SHOW TABLES")
cursor.execute(query)
results = cursor.fetchall()

In [139]:
results

[{'Tables_in_auskid_talk_prod_2': 'SESSION_PROGRESS'},
 {'Tables_in_auskid_talk_prod_2': 'Version_history'},
 {'Tables_in_auskid_talk_prod_2': 'child'},
 {'Tables_in_auskid_talk_prod_2': 'experiment'},
 {'Tables_in_auskid_talk_prod_2': 'location'},
 {'Tables_in_auskid_talk_prod_2': 'minigames_data'},
 {'Tables_in_auskid_talk_prod_2': 'ra'},
 {'Tables_in_auskid_talk_prod_2': 'review_questions_list'},
 {'Tables_in_auskid_talk_prod_2': 'task1'},
 {'Tables_in_auskid_talk_prod_2': 'task2_random_sequence'},
 {'Tables_in_auskid_talk_prod_2': 'task_start_end_times'},
 {'Tables_in_auskid_talk_prod_2': 'tasks_data'},
 {'Tables_in_auskid_talk_prod_2': 'words'}]

In [159]:
connector = mysql.connector.connect(user=UserName, password=Password,
                              host=HostIP,
                              database=DatabaseName,buffered=True)
cursor = connector.cursor(buffered=True, dictionary=True)
iChildID = 67
query = ("SELECT * FROM task_start_end_times WHERE child_id={0}".format(iChildID))
cursor.execute(query)
results = cursor.fetchall()

In [160]:
pdChild_Task = pd.DataFrame.from_dict(results)

In [161]:
pdChild_Task

Unnamed: 0,child_id,ra_id,task1_start_time,task1_end_time,task1_start_time_2,task1_end_time_2,task2_start_time,task2_end_time,task3_start_time,task3_end_time,task4_start_time,task4_end_time,task5_start_time,task5_end_time
0,67,18,"2022-02-09 13:40:37.049,2022-02-09 14:23:32.605,0","2022-02-09 14:05:10.641,2022-02-09 14:23:32.606,0","2022-02-09 14:05:52.838,2022-02-09 14:23:32.606,0","0,2022-02-09 14:28:39.132,0","0,2022-02-09 15:06:51.994,0","0,2022-02-09 15:16:59.165,0","0,2022-02-09 15:18:22.043,0","0,2022-02-09 15:22:09.437,0","0,2022-02-09 15:22:48.263,2022-02-09 15:38:00.469","0,0,2022-02-09 15:38:08.765","0,0,2022-02-09 15:39:11.964","0,0,2022-02-09 15:47:55.545"


In [165]:
connector = mysql.connector.connect(user=UserName, password=Password,
                              host=HostIP,
                              database=DatabaseName,buffered=True)
cursor = connector.cursor(buffered=True, dictionary=True)
query = ("SELECT * FROM experiment WHERE id={0} AND task_id=1".format(iChildID))
cursor.execute(query)
results = cursor.fetchall()

In [166]:
pdChild = pd.DataFrame.from_dict(results)

In [167]:
pdChild

Unnamed: 0,id,task_id,word_id,answer_value,answer_time,task1_attempt_count,task1_audio_cue_offset,audio_cue_onset,task2_sentence_offset,task2_start_signal_time_used,task2_child_starts_speaking,task3_time_spent_on_picture,task4_button_tap_time,task4_child_start_speaking,task4_audio_cue_offset,task1_retry1_timestamp,task1_retry2_timestamp,is_repeated,version_number
0,67,1,1,1,2022-02-09 13:43:39.965,0,2022-02-09 13:43:28.658,2022-02-09 13:43:28.613,,0,,0,,,,,,0,1
1,67,1,2,3,2022-02-09 13:47:54.859,2,2022-02-09 13:47:48.479,2022-02-09 13:43:39.976,,0,,0,,,,2022-02-09 13:44:42.237,2022-02-09 13:47:46.514,0,1
2,67,1,3,1,2022-02-09 13:53:39.248,2,2022-02-09 13:48:53.319,2022-02-09 13:47:54.871,,0,,0,,,,2022-02-09 13:48:17.058,2022-02-09 13:48:51.860,0,1
3,67,1,4,1,2022-02-09 13:53:41.280,0,2022-02-09 13:53:39.293,2022-02-09 13:53:39.254,,0,,0,,,,,,0,1
4,67,1,5,1,2022-02-09 13:53:42.991,0,2022-02-09 13:53:41.331,2022-02-09 13:53:41.284,,0,,0,,,,,,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,67,1,126,1,2022-02-09 14:27:19.418,0,2022-02-09 14:26:52.830,2022-02-09 14:26:50.429,,0,,0,,,,,,0,2
128,67,1,127,1,2022-02-09 14:28:23.355,0,2022-02-09 14:27:22.080,2022-02-09 14:27:19.421,,0,,0,,,,,,0,2
129,67,1,128,1,2022-02-09 14:28:27.010,0,2022-02-09 14:28:25.919,2022-02-09 14:28:23.366,,0,,0,,,,,,0,2
130,67,1,129,1,2022-02-09 14:28:34.583,0,2022-02-09 14:28:29.253,2022-02-09 14:28:27.015,,0,,0,,,,,,0,2


In [143]:
connector = mysql.connector.connect(user=UserName, password=Password,
                              host=HostIP,
                              database=DatabaseName,buffered=True)
cursor = connector.cursor(buffered=True, dictionary=True)
query = ("SELECT * FROM child")
cursor.execute(query)
results = cursor.fetchall()

In [144]:
df = pd.DataFrame.from_dict(results)

In [145]:
df

Unnamed: 0,id,ra_id,name,location_id,token
0,1,0,,0,
1,2,0,,0,
2,3,0,,0,
3,4,0,,0,
4,5,0,,0,
...,...,...,...,...,...
1316,1317,0,,0,
1317,1318,0,,3,eozL7Z3URUq4jKFCyD0a6O:APA91bESTAxR2nG0_69y8t3...
1318,1319,0,,1,
1319,1320,0,,1,


In [146]:
connector = mysql.connector.connect(user=UserName, password=Password,
                              host=HostIP,
                              database=DatabaseName,buffered=True)
cursor = connector.cursor(buffered=True, dictionary=True)
query = ("SELECT * FROM SESSION_PROGRESS WHERE current_task=5 AND current_question=41")
cursor.execute(query)
results = cursor.fetchall()
df = pd.DataFrame.from_dict(results)

In [157]:
df.child_id

0       733
1        65
2        81
3       460
4       461
       ... 
267    1147
268    1089
269    1318
270      62
271      67
Name: child_id, Length: 272, dtype: int64

In [148]:
connector = mysql.connector.connect(user=UserName, password=Password,
                              host=HostIP,
                              database=DatabaseName,buffered=True)
cursor = connector.cursor(buffered=True, dictionary=True)
iChildID = 719
query = ("SELECT * FROM task_start_end_times")
cursor.execute(query)
results = cursor.fetchall()
df = pd.DataFrame.from_dict(results)

In [158]:
df.iloc[271]

child_id                                                             67
ra_id                                                                18
task1_start_time      2022-02-09 13:40:37.049,2022-02-09 14:23:32.605,0
task1_end_time        2022-02-09 14:05:10.641,2022-02-09 14:23:32.606,0
task1_start_time_2    2022-02-09 14:05:52.838,2022-02-09 14:23:32.606,0
task1_end_time_2                            0,2022-02-09 14:28:39.132,0
task2_start_time                            0,2022-02-09 15:06:51.994,0
task2_end_time                              0,2022-02-09 15:16:59.165,0
task3_start_time                            0,2022-02-09 15:18:22.043,0
task3_end_time                              0,2022-02-09 15:22:09.437,0
task4_start_time      0,2022-02-09 15:22:48.263,2022-02-09 15:38:00.469
task4_end_time                              0,0,2022-02-09 15:38:08.765
task5_start_time                            0,0,2022-02-09 15:39:11.964
task5_end_time                              0,0,2022-02-09 15:47

In [156]:
df.iloc[]

Unnamed: 0,child_id,ra_id,task1_start_time,task1_end_time,task1_start_time_2,task1_end_time_2,task2_start_time,task2_end_time,task3_start_time,task3_end_time,task4_start_time,task4_end_time,task5_start_time,task5_end_time
0,733,8,2021-02-03 12:38:37.926,2021-02-03 12:45:36.928,2021-02-03 12:46:56.977,2021-02-03 12:53:37.236,2021-02-03 12:54:12.705,2021-02-03 13:01:44.392,2021-02-03 13:02:22.403,2021-02-03 13:04:41.652,2021-02-03 13:05:20.518,2021-02-03 13:10:18.636,2021-02-03 13:11:48.058,0
1,65,11,"2021-02-03 15:09:58.050,2021-02-03 15:13:07.99...","2021-04-07 14:23:32.759,0,2021-02-03 15:17:47....","2021-04-07 14:23:32.760,0,2021-02-03 15:18:24....","0,0,0,2021-02-03 15:25:39.769,0,0,0,0,0,2021-0...","0,0,0,2021-02-03 15:26:18.378,2021-02-03 15:27...","0,0,0,2021-02-03 15:26:35.570,0,2021-02-03 15:...","0,0,0,0,0,2021-02-03 15:33:03.279,0,0,0,2021-0...","0,0,0,0,0,2021-02-03 15:33:36.397,0,0,0,2021-0...","0,0,0,0,0,2021-02-03 15:34:15.567,2021-02-03 1...","0,0,0,0,0,2021-02-03 15:34:39.762,2021-02-03 1...","0,0,0,0,0,2021-02-03 15:35:18.451,2021-02-03 1...","0,0,0,0,0,2021-02-03 15:35:39.819,0,2021-02-03..."
2,81,4,"2021-02-03 17:06:05.591,0","2021-02-03 17:09:05.312,0","2021-02-03 17:44:05.343,0",00,00,00,00,00,00,00,00,00
3,460,13,2021-02-06 09:59:26.978,2021-02-06 10:06:59.157,2021-02-06 10:07:10.333,2021-02-06 10:18:43.904,2021-02-06 10:19:57.787,2021-02-06 10:25:30.675,2021-02-06 10:26:59.724,2021-02-06 10:29:48.323,2021-02-06 10:31:29.118,2021-02-06 10:36:51.911,2021-02-06 10:37:49.746,2021-02-06 10:43:04.249
4,461,13,2021-02-06 10:55:58.749,2021-02-06 11:01:47.540,2021-02-06 11:01:57.722,2021-02-06 11:11:43.373,2021-02-06 11:13:01.530,2021-02-06 11:18:37.708,2021-02-06 11:19:58.089,2021-02-06 11:22:24.944,2021-02-06 11:23:47.022,2021-02-06 11:28:46.113,2021-02-06 11:29:36.290,2021-02-06 11:34:40.612
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
267,1147,8,2022-01-19 11:18:03.957,2022-01-19 11:23:35.346,2022-01-19 11:24:17.025,2022-01-19 11:31:28.449,2022-01-19 11:32:09.421,2022-01-19 11:38:19.688,2022-01-19 11:39:04.632,2022-01-19 11:42:42.265,2022-01-19 11:43:24.860,2022-01-19 11:49:18.583,2022-01-19 11:50:02.196,2022-01-19 11:53:33.293
268,1089,8,2022-01-19 14:25:46.126,2022-01-19 14:30:26.879,2022-01-19 14:31:05.425,2022-01-19 14:38:54.876,2022-01-19 14:42:27.268,2022-01-19 14:48:40.637,2022-01-19 14:49:23.374,2022-01-19 14:52:28.448,2022-01-19 14:53:10.441,2022-01-19 14:58:15.254,2022-01-19 14:58:56.293,2022-01-19 15:02:07.217
269,1318,8,2022-01-25 14:42:02.540,2022-01-25 14:46:51.479,2022-01-25 14:47:29.501,2022-01-25 14:55:57.626,2022-01-25 14:58:00.553,2022-01-25 15:03:51.294,2022-01-25 15:04:32.247,2022-01-25 15:08:24.967,2022-01-25 15:09:03.396,2022-01-25 15:14:31.493,2022-01-25 15:15:17.361,2022-01-25 15:18:43.286
270,62,11,2022-02-02 12:20:17.849,2022-02-02 12:20:28.775,0,0,2022-02-02 12:20:31.588,2022-02-02 12:20:38.699,2022-02-02 12:20:41.137,2022-02-02 12:20:51.396,2022-02-02 12:20:54.306,2022-02-02 12:21:06.955,2022-02-02 12:21:10.297,2022-02-02 12:21:19.780


# Kaldi_code

In [14]:
import sys, os
sys.path.insert(0,os.path.abspath('tools/'))

import shutil
import txtgrid_master.TextGrid_Master as tgm
import librosa
import soundfile as sf
import comm_asr_from_txtgrid as cm
from importlib import reload
from collections import defaultdict

#TODO replace with argparse
dir = '/media/Windows/root/AusKidTalkv2/samples/test1/325/'
childID = '325'
taskID = 'task1'
lang = 'en-AU'
asr_engine='kaldi'

offset=0.5 #Amount used to relax the interval boudries of ibm Textgrid


tmp_dir = os.path.join(dir,'tmp','kaldi')
if not os.path.isdir(dir):
    #print("{} is not exist".format(dir))
    sys.exit("{} is not exist".format(dir))

#tmp_dir = os.path.join(dir,'tmp')
if not os.path.isdir(tmp_dir):
    os.makedirs(tmp_dir, exist_ok=True)

#TODO check wav file and ibm textgrid are exist
ibm_txtgrid  = os.path.join(dir,'{}_{}_ibm.TextGrid'.format(childID, taskID))
prompt_txtgrid = os.path.join(dir,'{}_{}_prompt.TextGrid'.format(childID, taskID))
wav_file = os.path.join(dir,'{}_{}.wav'.format(childID, taskID))

#Get number of detected speakers by ibm
dTiers = tgm.ParseTxtGrd(ibm_txtgrid)
speakers_tiers = dTiers.keys()

In [15]:
data_prompt = cm.get_valid_data(prompt_txtgrid) #Get non-sil intervals from prompt TextGrid

In [16]:
data_ibm = {}
for spk in speakers_tiers:
    data_ibm[spk] = cm.get_valid_data(ibm_txtgrid, sPromptTier=spk,offset=offset,bMerge=True)

In [17]:
nOverlaps = defaultdict(lambda :0)
for r in data_prompt.iterrows():
    bAdd = True
    for spk in speakers_tiers:
        df = data_ibm[spk]
        crnt_overlap =  df[((df.start_time > r[1].start_time) & (df.start_time < r[1].end_time)) |
                 ((df.start_time < r[1].start_time) & (df.end_time > r[1].start_time))].shape[0]
        nOverlaps[spk] += crnt_overlap
        if crnt_overlap > 0:
            bAdd = False
    if bAdd:
        for spk in speakers_tiers:
            data_ibm[spk].loc[-1] = r[1]
            data_ibm[spk].index = data_ibm[spk].index+1
for spk in speakers_tiers:
    data_ibm[spk].sort_values('start_time',inplace=True)

In [None]:
for spk in speakers_tiers:
    print('processing {}'.format(spk))
    df_data = data_ibm[spk]
    resDir = os.path.join(tmp_dir,spk)
    cm.process_data(sWaveFile=wav_file,data=df_data, lang= lang, spkr_ID=childID, rcrd_ID=taskID, out_dir=resDir, asr_engine=asr_engine, forced_upload=False)


In [23]:
textgrid_files = []
aSelectedTiers = []
dest_file = os.path.join(dir,'{0}_{1}_{2}.TextGrid'.format(childID, taskID, asr_engine))
for spk in speakers_tiers:
    textgrid_files.append(os.path.join(tmp_dir,spk,'{0}_{1}_{2}_{3}_concat.TextGrid'.format(childID, taskID, asr_engine, lang)))
    aSelectedTiers.append({'kaldi-words':'{}-kaldi-words'.format(spk)})
tgm.MergeTxtGrids(textgrid_files,dest_file,sWavFile=wav_file, aSlctdTiers=aSelectedTiers)

In [22]:
textgrid_files

['/media/Windows/root/AusKidTalkv2/samples/test1/325/tmp/kaldi/spk1/325_task1_kaldi_en-AU_concat.TextGrid',
 '/media/Windows/root/AusKidTalkv2/samples/test1/325/tmp/kaldi/spk2/325_task1_kaldi_en-AU_concat.TextGrid',
 '/media/Windows/root/AusKidTalkv2/samples/test1/325/tmp/kaldi/spk0/325_task1_kaldi_en-AU_concat.TextGrid']

In [None]:

tgm.MergeTxtGrids([s0_txtgrid,s1_txtgrid,s2_txtgrid],dest_file,sWavFile=wav_44, aSlctdTiers=[{'kaldi-words':'s0-kaldi-words'},{'kaldi-words':'s1-kaldi-words'},{'kaldi-words':'s2-kaldi-words'}])

In [13]:
nOverlaps

defaultdict(<function __main__.<lambda>()>,
            {'spk1': 108, 'spk2': 140, 'spk0': 27})

In [None]:
data_s0_m.loc[-1] = r[1]
        data_s1_m.loc[-1] = r[1]
        data_s2_m.loc[-1] = r[1]
        data_s0_m.index = data_s0_m.index+1
        data_s1_m.index = data_s1_m.index+1
        data_s2_m.index = data_s2_m.index+1

In [16]:
df = data_ibm['spk2']

In [17]:
df.shape

(196, 3)

# Draft

In [1]:
!pip install docker

Collecting docker
  Downloading docker-5.0.3-py2.py3-none-any.whl (146 kB)
[K     |████████████████████████████████| 146 kB 3.3 MB/s eta 0:00:01
Installing collected packages: docker
Successfully installed docker-5.0.3


In [3]:
import docker
client = docker.from_env()
client.images.list()

[<Image: 'kaldi-word:v1'>,
 <Image: ''>,
 <Image: '767753862555.dkr.ecr.us-east-2.amazonaws.com/wav2vecv4-gpu:latest', 'wav2vecv4-gpu:latest'>,
 <Image: '767753862555.dkr.ecr.us-east-2.amazonaws.com/wav2vecv4_debug:latest', 'wav2vecv4_debug:latest'>,
 <Image: '767753862555.dkr.ecr.us-east-2.amazonaws.com/wav2vecv4:latest', 'wav2vecv4:latest'>,
 <Image: 'huggingface/transformers-pytorch-latest-gpu:latest'>,
 <Image: '767753862555.dkr.ecr.us-east-2.amazonaws.com/wav2vec_long:latest', 'wav2vec_long:latest'>,
 <Image: ''>,
 <Image: 'wav2vec:latest'>,
 <Image: 'hugface:latest'>,
 <Image: 'ubuntu:18.04'>,
 <Image: 'huggingface/transformers-pytorch-cpu:latest'>,
 <Image: 'g2p:latest', 'g2p:v3.0'>,
 <Image: 'g2p:v2.0'>,
 <Image: 'g2p:v1.0'>,
 <Image: 'ubuntu:latest'>,
 <Image: 'kaldiasr/kaldi:latest'>]

In [6]:
speechfile = '325_task1_186.wav'
output = client.containers.run("kaldi-word:v1", "bash decode.sh /opt/tmp/{}".format(speechfile), volumes=['/media/Windows/root/AusKidTalkv2/samples/children/output/s0_325/:/opt/tmp'])

In [None]:
stdin_open
speechfile = '325_task1_186.wav'
output = client.containers.run("kaldi-word:v1", "bash decode.sh /opt/tmp/{}".format(speechfile), volumes=['/media/Windows/root/AusKidTalkv2/samples/children/output/s0_325/:/opt/tmp'])

In [12]:
import pandas as pd
from io import StringIO
ctmNames = ['fileName', 'channel', 'startTime', 'dur', 'symb', 'conf']
pd.read_csv(StringIO(output.decode('ascii')), delim_whitespace=True, names=ctmNames)

Unnamed: 0,fileName,channel,startTime,dur,symb,conf
0,325_task1_186.wav,1,0.0,0.53,sil,0.88
1,325_task1_186.wav,1,0.54,0.72,one,1.0
2,325_task1_186.wav,1,1.26,0.75,sil,1.0


In [84]:
import tools.Initiate_Alignment.InitAlign as init

In [85]:
tTasks, dTaskPrompts = init.GetTimeStampsSQL(325)

In [86]:
tTasks

TaskTimes(task1=(0.0, 1589.6789999008179), task2=(1697.8110001087189, 2361.9990000724792), task3=(2430.90700006485, 2656.6200001239777), task4=(2808.191999912262, 3120.755000114441), task5=(3446.805999994278, 3874.7139999866486))

In [87]:
dTaskPrompts

defaultdict(list,
            {1: [Prompt(taskID=1, wordID=1, word='duck', answerTime=54.77300000190735, cueOnset=44.25600004196167, cueOffset=44.28999996185303),
              Prompt(taskID=1, wordID=2, word='fish', answerTime=63.95199990272522, cueOnset=54.782999992370605, cueOffset=54.83500003814697),
              Prompt(taskID=1, wordID=3, word='car', answerTime=70.87199997901917, cueOnset=63.954999923706055, cueOffset=63.98300004005432),
              Prompt(taskID=1, wordID=4, word='key', answerTime=74.6930000782013, cueOnset=70.875, cueOffset=70.90300011634827),
              Prompt(taskID=1, wordID=5, word='boat', answerTime=135.16899991035461, cueOnset=74.69700002670288, cueOffset=110.34999990463257),
              Prompt(taskID=1, wordID=6, word='snake', answerTime=138.69799995422363, cueOnset=135.17499995231628, cueOffset=135.21000003814697),
              Prompt(taskID=1, wordID=7, word='cow', answerTime=142.5920000076294, cueOnset=138.70099997520447, cueOffset=138.7300000

In [70]:
stTime, endTime, labels = [a.squeeze() for a in np.split(df_results[df_results.speaker == 0][['from','to','word']].values,3,1)]

In [71]:
tgm.WriteTxtGrdFromDict()

array([1075.51, 1236.8, 1262.36, 1262.47, 1262.85, 1262.97, 1263.26,
       1271.6, 1274.73, 1274.83, 1275.16, 1275.34, 1275.83, 1300.31,
       1300.52, 1300.68, 1300.94, 1301.25, 1306.45, 1306.58, 1310.59,
       1310.77, 1311.0, 1311.36, 1312.18, 1322.95, 1326.06, 1326.19,
       1326.54, 1326.83, 1326.91, 1327.53, 1327.76, 1327.98, 1330.97,
       1331.1, 1331.47, 1331.59, 1332.05, 1332.56, 1340.36, 1340.7,
       1341.44, 1341.63, 1341.79, 1342.18, 1342.33, 1349.95, 1350.32,
       1350.56, 1359.91, 1360.02, 1364.32, 1364.5, 1364.71, 1364.98,
       1365.14, 1365.28, 1368.37, 1368.5, 1369.01, 1369.11, 1369.5,
       1369.94, 1380.06, 1380.26, 1380.74, 1381.17, 1381.37, 1383.91,
       1384.1, 1384.51, 1384.81, 1384.89, 1385.28, 1385.45, 1386.07,
       1386.41, 1386.54, 1390.81, 1391.34, 1391.54, 1391.6, 1392.25,
       1392.45, 1392.55, 1392.82, 1392.99, 1393.08, 1393.34, 1393.41,
       1398.11, 1398.25, 1398.87, 1399.19, 1402.14, 1402.41, 1402.49,
       1413.44, 1413.55, 1422.

In [44]:
df_words

Unnamed: 0,word,from,to
0,sorry,10.25,10.91
1,we,11.18,11.32
2,didn't,11.32,11.61
3,play,11.61,11.80
4,a,11.80,11.90
...,...,...,...
2243,no,3931.96,3932.15
2244,one,3932.15,3932.37
2245,level,3932.37,3932.70
2246,complete,3932.70,3933.61


Unnamed: 0,speaker,confidence,final,from,to,word
0,1,0.37,False,10.25,10.91,sorry
1,1,0.35,False,11.18,11.32,we
2,1,0.35,False,11.32,11.61,didn't
3,1,0.35,False,11.61,11.80,play
4,1,0.35,False,11.80,11.90,a
...,...,...,...,...,...,...
2243,1,0.28,False,3931.96,3932.15,no
2244,1,0.28,False,3932.15,3932.37,one
2245,1,0.28,False,3932.37,3932.70,level
2246,1,0.28,False,3932.70,3933.61,complete


In [30]:
results[0]['results'][0]['timestamps']

KeyError: 'timestamps'

In [42]:
pd.DataFrame.from_records(timestamps,columns=['word','to','from'])

Unnamed: 0,word,to,from
0,sorry,10.25,10.91
1,we,11.18,11.32
2,didn't,11.32,11.61
3,play,11.61,11.80
4,a,11.80,11.90
...,...,...,...
2243,no,3931.96,3932.15
2244,one,3932.15,3932.37
2245,level,3932.37,3932.70
2246,complete,3932.70,3933.61


In [36]:
l = results[0]['results'][0]['alternatives'][0]['timestamps']

In [37]:
l

[['sorry', 10.25, 10.91],
 ['we', 11.18, 11.32],
 ["didn't", 11.32, 11.61],
 ['play', 11.61, 11.8],
 ['a', 11.8, 11.9],
 ['few', 11.9, 12.63],
 ['games', 12.66, 13.15],
 ['and', 13.15, 13.38],
 ['you', 13.38, 13.49],
 ['know', 13.49, 13.68],
 ['that', 13.68, 13.93],
 ['this', 13.93, 14.15],
 ['can', 14.15, 14.37],
 ['grew', 14.37, 14.69],
 ['his', 14.69, 14.93],
 ['again', 14.93, 15.27],
 ['to', 15.27, 15.58],
 ['you', 15.58, 15.72],
 ['can', 15.72, 15.92],
 ['actually', 15.92, 16.38],
 ['press', 16.38, 16.78],
 ['different', 17.44, 17.78],
 ['parts', 17.78, 18.02],
 ['of', 18.02, 18.1],
 ['the', 18.1, 18.2],
 ['islands', 18.2, 18.67],
 ['making', 18.67, 18.98],
 ['jump', 18.98, 19.24],
 ['around', 19.24, 19.73],
 ['sometimes', 19.87, 20.77],
 ['sometimes', 21.15, 21.51],
 ['it', 21.51, 21.59],
 ["doesn't", 21.59, 21.85],
 ['get', 21.85, 21.97],
 ['replaced', 21.97, 22.29],
 ['other', 22.62, 22.88],
 ['times', 22.88, 23.17],
 ['from', 23.17, 23.31],
 ['very', 23.31, 23.67],
 ['good', 2

In [168]:
import pyAudioAnalysis.ShortTermFeatures as sF

In [169]:
wavfile = '/media/Windows/root/test.wav'

In [181]:
import wave,struct, os
#Flags
use_delta = False
fWindowSize = 0.02
fStepSize = 0.01
Context = (-2,-1,0,1,2)
working_dir = 'BeepDetModel/'
egs_dir = os.path.join(working_dir,'egs')
nFeatures = 68 if use_delta else 34
with wave.open(wavfile) as fWav:
    assert fWav.getsampwidth() == 2, 'Only 16 bit resolution supported'
    iFrameRate = fWav.getframerate()
    nTotalFrames = fWav.getnframes()
    data = fWav.readframes(nTotalFrames)
    data = [ i[0] for i in struct.iter_unpack('h',data)]
    data = np.asarray(data)

In [182]:
aFeatures, lFeatures_names = sF.feature_extraction(data,iFrameRate,int(fWindowSize*iFrameRate),int(fStepSize*iFrameRate),deltas=use_delta)

In [187]:
sF.energy(data)

27619427.0680353

In [1]:
import sys

In [13]:
from subprocess import check_output

In [18]:
f = open('/media/Windows/root/AusKidTalkv2/samples/children/output/s0_325/325_task1_186.wav','r')
d = check_output(['docker','run','-i','kaldi-word:v1','bash','decode.sh'], stdin=f)
f.close()

In [1]:
import tools.stt.kaldi_docker.stt as kaldi_stt

In [5]:
response, _ = kaldi_stt.stt_audio_file('/media/Windows/root/AusKidTalkv2/samples/children/output/s0_325/325_task1_186.wav')

In [7]:
response

Unnamed: 0,fileName,channel,startTime,dur,symb,conf
0,tmpSpeech.wav,1,0.0,0.53,sil,0.88
1,tmpSpeech.wav,1,0.54,0.72,one,1.0
2,tmpSpeech.wav,1,1.26,0.75,sil,1.0


In [2]:
sys.path

['/media/Windows/root/AusKidTalkv2',
 '/usr/lib/python39.zip',
 '/usr/lib/python3.9',
 '/usr/lib/python3.9/lib-dynload',
 '',
 '/home/mostafa/.local/lib/python3.9/site-packages',
 '/usr/local/lib/python3.9/dist-packages',
 '/usr/lib/python3/dist-packages',
 '/usr/lib/python3.9/dist-packages',
 '/home/mostafa/.local/lib/python3.9/site-packages/IPython/extensions',
 '/home/mostafa/.ipython']

In [3]:
import os

In [4]:
os.path.abspath('..')

'/media/Windows/root'

In [5]:
pwd

'/media/Windows/root/AusKidTalkv2'

In [6]:
c = [3,4]

In [7]:
c.append(0,8)

TypeError: list.append() takes exactly one argument (2 given)

In [8]:
import os

In [None]:
os.makedirs()