In [3]:
from google.cloud import speech
from google.cloud import storage
import wave
import io
def frame_rate_channel(audio_file_name):
    print(audio_file_name)
    with wave.open(audio_file_name, "rb") as wave_file:
        frame_rate = wave_file.getframerate()
        channels = wave_file.getnchannels()
        return frame_rate,channels
def Config_GGC(sample_rate_hertz = 44100,
                audio_channel_count = 1,
                model = None,
                enable_automatic_punctuation=True):
    if model != None:
        config_wav_enhanced = speech.RecognitionConfig(
            sample_rate_hertz = sample_rate_hertz,
            enable_automatic_punctuation=enable_automatic_punctuation,
            language_code = 'vi-VN',
            audio_channel_count=audio_channel_count,
            model = model,
        )
    else :
        config_wav_enhanced = speech.RecognitionConfig(
            sample_rate_hertz = sample_rate_hertz,
            enable_automatic_punctuation=True,
            language_code = 'vi-VN',
            audio_channel_count=audio_channel_count
        )
    return config_wav_enhanced
## Config_GGC that doesn't have punctuation
def Config_noPunc(sample_rate_hertz = 44100,
                audio_channel_count = 1,
                model = None,
                enable_automatic_punctuation=True):
    if model is not None:
        config_wav_enhanced = speech.RecognitionConfig(
            sample_rate_hertz = sample_rate_hertz,
            language_code = 'vi-VN',
            audio_channel_count=audio_channel_count,
            model = model,
        )
    else :
        config_wav_enhanced = speech.RecognitionConfig(
            sample_rate_hertz = sample_rate_hertz,
            language_code = 'vi-VN',
            audio_channel_count=audio_channel_count
        )
    return config_wav_enhanced
# Một số Function hỗ trợ xử lý văn bản đầu ra
def is_digit(word):
    try:
        int(word)
        return True
    except ValueError:
        pass
    return False


def ConvertDate(text):
    month=' tháng '
    year=' năm '
    for index in range(0,len(text)):
        try:
            if (text.index(month,index)==index):
                dateNum = text[index -1]
                monthNum = text[index + len(month)]
                if is_digit(dateNum) and is_digit(monthNum):
                    text=text[:index] + text[index+len(month)-1:]
                    temp = list(text)
                    temp[index]='/'
                    text = "".join(temp)
        except Exception as e:
            if str(e) in 'substring not found':
                pass
            else:
                raise e
        try:
            if (text.index(year,index)==index):
                monthNum = text[index -1]
                yearNum = text[index + len(year)]
                if is_digit(monthNum) and is_digit(yearNum):
                    text=text[:index] + text[index+len(year)-1:]
                    temp = list(text)
                    temp[index]='/'
                    text = "".join(temp)
        except Exception as e:
            if str(e) in 'substring not found':
                pass
            else:
                raise e
    return text
def Transcribe_Short_Audio(Audio_wav,config_wav_enhanced):
    client = speech.SpeechClient()
    with io.open(Audio_wav, "rb") as audio_file:
        content = audio_file.read()

    audio = speech.RecognitionAudio(content=content)
    # print(type(audio))
    
    response = client.recognize(config=config_wav_enhanced, audio=audio)
    text = []
    for i, result in enumerate(response.results):
        alternative = result.alternatives[0]
        alter=ConvertDate(alternative.transcript+ '\n')
        text.append(alter)
    return text

In [4]:
path = r'Splited_speaker\23-06-2022 14 12 36\2.wav'

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'Speech2Text.json'
rate,channel=frame_rate_channel(path)
config = Config_GGC(sample_rate_hertz = rate,
                 audio_channel_count = channel)
path = [path] * 50

Splited_speaker\23-06-2022 14 12 36\2.wav


In [5]:
from concurrent.futures import ThreadPoolExecutor
from threading import current_thread
import time

threadnum=4
def func_thread(path):
    now = time.time()
    text=Transcribe_Short_Audio(path,config)
    thread = current_thread()
    end_time = time.time()-now
    x = [thread.name,end_time]
    return x
 


now2 = time.time()
with ThreadPoolExecutor(threadnum) as executor:
    futures = executor.map(func_thread,path)
    Qlist = [x for x in futures]
end_time2 = time.time()-now2

print(f'Kết quả threads = {threadnum}')
full_time = 0
for x in Qlist:
    print(x[0] , ' || ',x[1])
    full_time += x[1]

print(f'Số lần đưa lên GGC : {len(Qlist)}, multi time: {end_time2}, full time: {full_time}')

Kết quả threads = 4
ThreadPoolExecutor-0_0  ||  9.3568115234375
ThreadPoolExecutor-0_1  ||  9.843492984771729
ThreadPoolExecutor-0_2  ||  11.759594678878784
ThreadPoolExecutor-0_3  ||  10.650372505187988
ThreadPoolExecutor-0_0  ||  8.815887928009033
ThreadPoolExecutor-0_1  ||  8.843299388885498
ThreadPoolExecutor-0_3  ||  8.325021982192993
ThreadPoolExecutor-0_2  ||  9.065549373626709
ThreadPoolExecutor-0_0  ||  8.91063928604126
ThreadPoolExecutor-0_1  ||  9.89406442642212
ThreadPoolExecutor-0_3  ||  9.872071981430054
ThreadPoolExecutor-0_2  ||  9.220657110214233
ThreadPoolExecutor-0_0  ||  8.869789361953735
ThreadPoolExecutor-0_1  ||  9.55379319190979
ThreadPoolExecutor-0_3  ||  9.1752188205719
ThreadPoolExecutor-0_2  ||  8.66318154335022
ThreadPoolExecutor-0_0  ||  8.743327617645264
ThreadPoolExecutor-0_3  ||  8.411436080932617
ThreadPoolExecutor-0_1  ||  9.637392282485962
ThreadPoolExecutor-0_2  ||  9.318017959594727
ThreadPoolExecutor-0_0  ||  10.14578127861023
ThreadPoolExecutor-0

In [6]:
from concurrent.futures import ThreadPoolExecutor
from threading import current_thread
import time

threadnum=8
def func_thread(path):
    now = time.time()
    text=Transcribe_Short_Audio(path,config)
    thread = current_thread()
    end_time = time.time()-now
    x = [thread.name,end_time]
    return x
 


now2 = time.time()
with ThreadPoolExecutor(threadnum) as executor:
    futures = executor.map(func_thread,path)
    Qlist = [x for x in futures]
end_time2 = time.time()-now2

print(f'Kết quả threads = {threadnum}')
full_time = 0
for x in Qlist:
    print(x[0] , ' || ',x[1])
    full_time += x[1]

print(f'Số lần đưa lên GGC : {len(Qlist)}, multi time: {end_time2}, full time: {full_time}')

Kết quả threads = 8
ThreadPoolExecutor-1_0  ||  9.942192792892456
ThreadPoolExecutor-1_1  ||  11.446874856948853
ThreadPoolExecutor-1_2  ||  12.879981994628906
ThreadPoolExecutor-1_3  ||  14.352685689926147
ThreadPoolExecutor-1_4  ||  12.835451364517212
ThreadPoolExecutor-1_5  ||  14.398686647415161
ThreadPoolExecutor-1_6  ||  13.752931594848633
ThreadPoolExecutor-1_7  ||  13.337661743164062
ThreadPoolExecutor-1_0  ||  7.981951475143433
ThreadPoolExecutor-1_1  ||  9.31909465789795
ThreadPoolExecutor-1_4  ||  9.84632658958435
ThreadPoolExecutor-1_2  ||  9.66334581375122
ThreadPoolExecutor-1_7  ||  10.117734432220459
ThreadPoolExecutor-1_6  ||  10.383197784423828
ThreadPoolExecutor-1_3  ||  12.334721326828003
ThreadPoolExecutor-1_5  ||  10.182599544525146
ThreadPoolExecutor-1_0  ||  8.38379693031311
ThreadPoolExecutor-1_1  ||  8.969838857650757
ThreadPoolExecutor-1_2  ||  8.889282703399658
ThreadPoolExecutor-1_4  ||  10.589187622070312
ThreadPoolExecutor-1_7  ||  8.822363138198853
Thread

In [7]:
from concurrent.futures import ThreadPoolExecutor
from threading import current_thread
import time

threadnum=10
def func_thread(path):
    now = time.time()
    text=Transcribe_Short_Audio(path,config)
    thread = current_thread()
    end_time = time.time()-now
    x = [thread.name,end_time]
    return x
 


now2 = time.time()
with ThreadPoolExecutor(threadnum) as executor:
    futures = executor.map(func_thread,path)
    Qlist = [x for x in futures]
end_time2 = time.time()-now2

print(f'Kết quả threads = {threadnum}')
full_time = 0
for x in Qlist:
    print(x[0] , ' || ',x[1])
    full_time += x[1]

print(f'Số lần đưa lên GGC : {len(Qlist)}, multi time: {end_time2}, full time: {full_time}')

Kết quả threads = 10
ThreadPoolExecutor-2_0  ||  9.490137815475464
ThreadPoolExecutor-2_1  ||  12.607067823410034
ThreadPoolExecutor-2_2  ||  14.196170568466187
ThreadPoolExecutor-2_3  ||  14.133343935012817
ThreadPoolExecutor-2_4  ||  13.428093910217285
ThreadPoolExecutor-2_5  ||  14.680651903152466
ThreadPoolExecutor-2_6  ||  13.662641048431396
ThreadPoolExecutor-2_7  ||  18.39099907875061
ThreadPoolExecutor-2_8  ||  14.961735010147095
ThreadPoolExecutor-2_9  ||  14.562702417373657
ThreadPoolExecutor-2_0  ||  8.992788076400757
ThreadPoolExecutor-2_1  ||  9.228157043457031
ThreadPoolExecutor-2_4  ||  9.784301280975342
ThreadPoolExecutor-2_6  ||  9.07339596748352
ThreadPoolExecutor-2_3  ||  14.608352422714233
ThreadPoolExecutor-2_2  ||  10.212292432785034
ThreadPoolExecutor-2_9  ||  10.561554193496704
ThreadPoolExecutor-2_5  ||  11.292017936706543
ThreadPoolExecutor-2_8  ||  10.236616849899292
ThreadPoolExecutor-2_7  ||  8.998309850692749
ThreadPoolExecutor-2_0  ||  9.591556072235107
T

In [8]:
from concurrent.futures import ThreadPoolExecutor
from threading import current_thread
import time

threadnum=16
def func_thread(path):
    now = time.time()
    text=Transcribe_Short_Audio(path,config)
    thread = current_thread()
    end_time = time.time()-now
    x = [thread.name,end_time]
    return x
 


now2 = time.time()
with ThreadPoolExecutor(threadnum) as executor:
    futures = executor.map(func_thread,path)
    Qlist = [x for x in futures]
end_time2 = time.time()-now2

print(f'Kết quả threads = {threadnum}')
full_time = 0
for x in Qlist:
    print(x[0] , ' || ',x[1])
    full_time += x[1]

print(f'Số lần đưa lên GGC : {len(Qlist)}, multi time: {end_time2}, full time: {full_time}')

Kết quả threads = 16
ThreadPoolExecutor-3_0  ||  10.798264026641846
ThreadPoolExecutor-3_1  ||  15.088694095611572
ThreadPoolExecutor-3_2  ||  12.004292488098145
ThreadPoolExecutor-3_3  ||  17.795286655426025
ThreadPoolExecutor-3_4  ||  16.93712091445923
ThreadPoolExecutor-3_5  ||  17.094093084335327
ThreadPoolExecutor-3_6  ||  14.561445474624634
ThreadPoolExecutor-3_7  ||  19.435311555862427
ThreadPoolExecutor-3_8  ||  19.10573172569275
ThreadPoolExecutor-3_9  ||  17.704259157180786
ThreadPoolExecutor-3_10  ||  19.96686887741089
ThreadPoolExecutor-3_11  ||  20.55033779144287
ThreadPoolExecutor-3_12  ||  19.53667163848877
ThreadPoolExecutor-3_13  ||  18.904133081436157
ThreadPoolExecutor-3_14  ||  18.50031590461731
ThreadPoolExecutor-3_15  ||  20.049274444580078
ThreadPoolExecutor-3_0  ||  8.610327959060669
ThreadPoolExecutor-3_2  ||  9.010387659072876
ThreadPoolExecutor-3_6  ||  9.079496145248413
ThreadPoolExecutor-3_1  ||  9.108426094055176
ThreadPoolExecutor-3_4  ||  9.0230917930603

In [9]:
from concurrent.futures import ThreadPoolExecutor
from threading import current_thread
import time

threadnum=20
def func_thread(path):
    now = time.time()
    text=Transcribe_Short_Audio(path,config)
    thread = current_thread()
    end_time = time.time()-now
    x = [thread.name,end_time]
    return x
 


now2 = time.time()
with ThreadPoolExecutor(threadnum) as executor:
    futures = executor.map(func_thread,path)
    Qlist = [x for x in futures]
end_time2 = time.time()-now2

print(f'Kết quả threads = {threadnum}')
full_time = 0
for x in Qlist:
    print(x[0] , ' || ',x[1])
    full_time += x[1]

print(f'Số lần đưa lên GGC : {len(Qlist)}, multi time: {end_time2}, full time: {full_time}')

Kết quả threads = 20
ThreadPoolExecutor-4_0  ||  17.900283813476562
ThreadPoolExecutor-4_1  ||  11.388800144195557
ThreadPoolExecutor-4_2  ||  15.22531509399414
ThreadPoolExecutor-4_3  ||  24.27676033973694
ThreadPoolExecutor-4_4  ||  14.579591989517212
ThreadPoolExecutor-4_5  ||  22.17240881919861
ThreadPoolExecutor-4_6  ||  16.83590841293335
ThreadPoolExecutor-4_7  ||  18.476003408432007
ThreadPoolExecutor-4_8  ||  17.389713525772095
ThreadPoolExecutor-4_9  ||  17.204346179962158
ThreadPoolExecutor-4_10  ||  22.185781002044678
ThreadPoolExecutor-4_11  ||  20.439692497253418
ThreadPoolExecutor-4_12  ||  23.934638738632202
ThreadPoolExecutor-4_13  ||  22.191534757614136
ThreadPoolExecutor-4_14  ||  21.000506162643433
ThreadPoolExecutor-4_15  ||  24.31283140182495
ThreadPoolExecutor-4_16  ||  20.603747606277466
ThreadPoolExecutor-4_17  ||  21.02185344696045
ThreadPoolExecutor-4_18  ||  20.50039267539978
ThreadPoolExecutor-4_19  ||  20.44745922088623
ThreadPoolExecutor-4_1  ||  12.100388

In [10]:
from concurrent.futures import ThreadPoolExecutor
from threading import current_thread
import time

threadnum=30
def func_thread(path):
    now = time.time()
    text=Transcribe_Short_Audio(path,config)
    thread = current_thread()
    end_time = time.time()-now
    x = [thread.name,end_time]
    return x
 


now2 = time.time()
with ThreadPoolExecutor(threadnum) as executor:
    futures = executor.map(func_thread,path)
    Qlist = [x for x in futures]
end_time2 = time.time()-now2

print(f'Kết quả threads = {threadnum}')
full_time = 0
for x in Qlist:
    print(x[0] , ' || ',x[1])
    full_time += x[1]

print(f'Số lần đưa lên GGC : {len(Qlist)}, multi time: {end_time2}, full time: {full_time}')

Kết quả threads = 30
ThreadPoolExecutor-5_0  ||  12.321434736251831
ThreadPoolExecutor-5_1  ||  27.7842538356781
ThreadPoolExecutor-5_2  ||  12.81221604347229
ThreadPoolExecutor-5_3  ||  14.373048782348633
ThreadPoolExecutor-5_4  ||  21.387300729751587
ThreadPoolExecutor-5_5  ||  23.893422603607178
ThreadPoolExecutor-5_6  ||  33.317320823669434
ThreadPoolExecutor-5_7  ||  17.841492176055908
ThreadPoolExecutor-5_8  ||  23.627087831497192
ThreadPoolExecutor-5_9  ||  27.492358922958374
ThreadPoolExecutor-5_10  ||  23.48190951347351
ThreadPoolExecutor-5_11  ||  21.27254009246826
ThreadPoolExecutor-5_12  ||  27.85570001602173
ThreadPoolExecutor-5_13  ||  24.28964138031006
ThreadPoolExecutor-5_14  ||  28.825579404830933
ThreadPoolExecutor-5_15  ||  30.37171244621277
ThreadPoolExecutor-5_16  ||  30.834768295288086
ThreadPoolExecutor-5_17  ||  31.054606914520264
ThreadPoolExecutor-5_18  ||  24.73774766921997
ThreadPoolExecutor-5_19  ||  30.377290964126587
ThreadPoolExecutor-5_20  ||  30.415505