In [1]:
# Imports the Google Cloud client library
from google.cloud import storage
import os
import re
from google.cloud import speech_v1
from google.cloud.speech_v1 import enums

PROJECT_NAME = "<your project name>"
KEY_FILE = "<key file.json>"
BUCKET_NAME = "<bucket name>"
RECOG_DATA = "<local recognition folder>"

# Instantiates a client
storage_client = storage.Client(project=PROJECT_NAME).from_service_account_json(KEY_FILE)

def list_blobs(bucket_name):
    """Lists all the blobs in the bucket."""

    blob_lists = []
    bucket = storage_client.get_bucket(bucket_name)
    blobs = bucket.list_blobs()

    for blob in blobs:
        blob_lists.append(blob.name)
    return blob_lists


def download_blob(bucket_name, source_blob_name, destination_file_name):
    """Downloads a blob from the bucket."""
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.get_blob(source_blob_name)

    blob.download_to_filename(destination_file_name)

    print('Blob {} downloaded to {}.'.format(
        source_blob_name,
        destination_file_name))


def join_waves(inputs, output):
    '''
    inputs : list of filenames
    output : output filename
    '''
    try:
        fps = [wave.open(f, 'r') for f in inputs]
        fpw = wave.open(output, 'w')

        fpw.setnchannels(fps[0].getnchannels())
        fpw.setsampwidth(fps[0].getsampwidth())
        fpw.setframerate(fps[0].getframerate())
        
        for fp in fps:
            fpw.writeframes(fp.readframes(fp.getnframes()))
            fp.close()
        fpw.close()

    except wave.Error as e:
        print(e)

    except Exception as e:
        print('unexpected error -> ' + str(e))

In [None]:
# 後々の処理で使うのでこれは実行必須
# all_blobs(type list) : ['conversation_history/20191218012459_040c000111cf_V-01-101-R.wav', 'conversation_history/20191218012512_040c000111cf_V-02-501-R.wav', 'conversation_history/20191218012525_040c000111cf_V-03-101-R.wav'.....]
all_blobs = list_blobs(BUCKET_NAME)

In [3]:
# GCSにある音声ファイルを指定して、テキスト化（複数候補出力）したい場合はこちらを実行
# 関数の宣言
# wavfileはgs://<bucketname>/<filename>

PHRASES_NUMBER = ['$OOV_CLASS_DIGIT_SEQUENCE']
ID_CHECK_STATUS_1 = 'V-02-101-R'
ID_CHECK_STATUS_2 = 'V-03-101-R'
ID_CHECK_STATUS_3 = 'V-04-101-R'
TEL_CHECK_STATUS_1 = 'V-02-201-R'
TEL_CHECK_STATUS_2 = 'V-03-201-R'

PATTERN_DIST = {ID_CHECK_STATUS_1: r".*", ID_CHECK_STATUS_2: r".*", ID_CHECK_STATUS_3: r".*", TEL_CHECK_STATUS_1: r"(?<!.)\D*0[0-9,-]{9,12}\D*(?!.)", TEL_CHECK_STATUS_2: r"(?<!.)\D*0[0-9,-]{9,12}\D*(?!.)"}
BYPASS_CONFIDENCE_CHECK = r".*"

def func_call_speechapi_client(wavfile, phrases):
    text_result = ""
    max_confidence = 0
    match_count = 0
    client = speech_v1.SpeechClient().from_service_account_json(KEY_FILE)
    encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16
    sample_rate_hertz = 8000
    language_code = 'ja-JP'
    if phrases == "":
        config = {'encoding': encoding, 'sample_rate_hertz': sample_rate_hertz, 'language_code': language_code, 'max_alternatives':30 }
    else:
        print("phrases:{}".format(phrases))
        config = {'encoding': encoding, 'sample_rate_hertz': sample_rate_hertz, 'language_code': language_code, 'max_alternatives':30, 'speech_contexts': [{ 'phrases': phrases}]}

    audio = {'uri': wavfile}

    response = client.recognize(config, audio)
    
    for results in response.results:
        for result in results.alternatives:
            text_result += "候補text: {}, 精度: {:.2f}\n".format(result.transcript, result.confidence)      
    print(text_result)
    
    filename = wavfile.split("/")[-1][:-3] + "txt"
    with open(RECOG_DATA+filename, mode='w') as f:
        f.write(text_result)
    
    return text_result


def func_call_speechapi_client_beta(wavfile, phrases, rec_sts):
    text_result = ""
    max_confidence = 0
    match_count = 0
    client = speech_v1p1beta1.SpeechClient().from_service_account_json(KEY_FILE)
    encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16
    sample_rate_hertz = 8000
    language_code = 'ja-JP'
    
    if phrases == "":
        config = {'encoding': encoding, 'sample_rate_hertz': sample_rate_hertz, 'language_code': language_code, 'max_alternatives':30 }
    else:
        print("phrases:{}".format(phrases))
        config = {'encoding': encoding, 'sample_rate_hertz': sample_rate_hertz, 'language_code': language_code, 'max_alternatives':30, 'speech_contexts': [{ 'phrases': phrases}]}

    audio = {'uri': wavfile}

    response = client.recognize(config, audio)
    
    for results in response.results:
        for result in results.alternatives:
            if PATTERN_DIST[rec_sts] == BYPASS_CONFIDENCE_CHECK:
                text_result += "候補text: {}, 精度: {:.2f}\n".format(result.transcript, result.confidence)     
            else:
                if (re.match(PATTERN_DIST[rec_sts], result.transcript)):
                    text_result += "候補text: {}, 精度: {:.2f}\n".format(result.transcript, result.confidence)   
    print(text_result)
    return text_result

In [None]:
# GCSにある音声ファイルを指定して、テキスト化（複数候補出力）したい場合はこちらを実行
# recognitionフォルダ配下に、テキスト保存

for blob in all_blobs:
    wavfile_uri = "gs://{}/{}".format(BUCKET_NAME,blob)
    print(wavfile_uri)
    rec_sts = blob.split("_")[3].split(".")[0]
    if rec_sts == 'V-02-101-R' or rec_sts == 'V-02-201-R' or rec_sts == 'V-03-101-R' or rec_sts == 'V-03-201-R' or rec_sts == 'V-04-101-R':
        text = func_call_speechapi_client_beta(wavfile_uri, PHRASES_NUMBER, rec_sts)
    else:
        text = func_call_speechapi_client(wavfile_uri, '')