# Individual config of trigger and output bucket

In [1]:
#You can create the buckets manually or with the following code.
#In case you create it manually or you already have created the buckets, CHANGE the following variables names to the existing bucket ids.
import uuid
import os
trigger_Bucket= "trigger-"+str(uuid.uuid4())   # create a global unique ID for your trigger bucket
Output_Bucket= "output-"+str(uuid.uuid4())     # create a global unique ID for your output bucket
PROJECT = "cloudfunction-268018" # REPLACE WITH YOUR PROJECT ID
REGION = "us-central1" # REPLACE WITH YOUR BUCKET REGION e.g. us-central1

In [4]:
os.environ['PROJECT'] = PROJECT
os.environ['REGION'] = REGION
os.environ['trigger_Bucket'] = trigger_Bucket
os.environ['Output_Bucket'] = Output_Bucket

In [None]:
%%bash
gcloud config set project ${PROJECT}
gcloud config set compute/region ${REGION}
gsutil mb -l ${REGION} gs://${trigger_Bucket}
gsutil mb -l ${REGION} gs://${Output_Bucket}

In [None]:
#BUCKET = "cloudfunction-268018qwiklabs-gcp-02-19352bf770e7" # REPLACE WITH YOUR BUCKET NAME
#os.environ['BUCKET'] = BUCKET

In [None]:
#%%bash
#gsutil mb -l ${REGION} gs://${BUCKET}

# Create dependencies file (requirements.txt) and execution file (main.py) for deployment

In [6]:
%%writefile requirements.txt
pydub
google-cloud-translate
google-cloud-speech
google-cloud-storage

Overwriting requirements.txt


IMPORTNAT NOTE: Change OUTPUT_BUCKET to your created Output bucket. Following line gives you the ID of the output_bucket

In [7]:
print(Output_Bucket)

output-39f8058a-242b-44ff-b2b3-b86aa36643fc


In [117]:
%%writefile main.py
OUTPUT_BUCKET = "output-39f8058a-242b-44ff-b2b3-b86aa36643fc" ##CHANGE: Insert your output-bucket id
TARGET_LANGUAGE = "de" # Language code for the target language of the translation. In this case german
FLAG_TRANSLATE=True # set false to not create an additional translation of the subtitles

import datetime
from pydub import AudioSegment
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
from google.cloud import storage
from google.cloud import translate_v2 as translate
from google.cloud import storage
import tempfile
from werkzeug.utils import secure_filename
import os
import sys

client = storage.Client()

def get_file_path(filename):
  """
  returns secure cloud functions node file destination path.
  Should be usually tmp/[audiofile]. tmp folder is only accessible folder in CFunctions node to write and read. 
  """
  file_name = secure_filename(filename)
  return os.path.join(tempfile.gettempdir(), file_name)

def transcribe_gcs_with_word_time_offsets(event_metadata,INPUT_BUCKET,filename,punctuation):
  """
  1. load audio file from trigger bucket
  2. Getting audio file in right format - set 1 channel; retrieve framerate for API
  3. Return Speech-to-text object 
  """

  #assemble global file uri
  gcs_uri = 'gs://' + INPUT_BUCKET + '/' + filename
  #load bucket audio file to tmp folder
  tmp_path_name = get_file_path(filename)
  bucket = client.get_bucket(event_metadata['bucket'])
  blob = bucket.get_blob(event_metadata['name'])
  blob.download_to_filename(tmp_path_name)
  print("filesize before convertion:",os.path.getsize(tmp_path_name))
  if (os.path.getsize(tmp_path_name)>10000000):
    print("filesize to big->abort")
    os.remove(tmp_path_name)
    sys.exit(0)
  client_speech = speech.SpeechClient()

  # if .wav format, transform to .mp3
  if filename.lower().endswith('.wav'):
    sound = AudioSegment.from_wav(tmp_path_name)
    sound = sound.set_channels(1)
    os.remove(tmp_path_name)
    tmp_path_name=tmp_path_name+".mp3"
    filename=filename+".mp3"
    sound.export(tmp_path_name, format="mp3")
    print("filesize after convertion:",os.path.getsize(tmp_path_name))
    if (os.path.getsize(tmp_path_name)>10000000):
        print("filesize to big after conversion->abort")
        os.remove(tmp_path_name)
        sys.exit(0)
    
  sound = AudioSegment.from_mp3(tmp_path_name)
  frame_rate=sound.frame_rate
  channels=sound.channels
  sound = sound.set_channels(1)
  #upload to storage for URI access on audio file by google cloud speech api   
  name_test="post"+filename
  bucket_tmp_path = get_file_path(name_test)
  sound.export(bucket_tmp_path, format="mp3")
  bucket = client.get_bucket(OUTPUT_BUCKET)
  newblob = bucket.blob(name_test)    
  newblob.upload_from_filename(bucket_tmp_path)
  gcs_uri = 'gs://' + OUTPUT_BUCKET + '/' + "post"+filename
  audio = speech.types.RecognitionAudio(uri=gcs_uri)

  #general configuration of api NOTE! HERE YOU CAN CHANGE THE LANGUAGE_CODE, MODEL AND OTHER SETTINGS
  config = speech.types.RecognitionConfig(
        encoding=speech.enums.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED,
        sample_rate_hertz=frame_rate,
        audio_channel_count=channels,
        language_code='en-US',
        model='default',
        enable_word_time_offsets=True,
        enable_automatic_punctuation=punctuation)
  #send config and audio to the API to retrieve the text
  operation = client_speech.long_running_recognize(config, audio)

  #delete local tmp file we used earlier (only deletes the cloud function tmp file) 
  os.remove(tmp_path_name)
  print('2. Waiting for transcription to complete. For File:', filename)
  result = operation.result(timeout=400)
  return result, filename
  
def process_to_file(result,filename):  
    """
    1. initiates an output file of a srt format
    2. generated text file will be parsed word by word and a sentence is generated
    3. A sentence represents a caption time-frame and follows two closing conditions
      a) Not longer than 4 seconds (SUB_FRAME variable to tune it)
      b) If last retrieved word, sentence finished (end_flag controls it)
      c) Amount of words: Not implemented in this version but you simple count the len(sentence) and set a closing condition on a var e.g. MAX_LEN
    """  
    #set output file tmp path
    path_name_output = get_file_path(filename+"output.txt")
    SUB_FRAME=4 #length of sentence in seconds
    
    #open local tmp file and leave it open. The print commands will insert the lines in the tmp file f.
    f = open(path_name_output, "a")
    timer=0.0       # Timer to set current sentence time reference point 
    caption_index=1 # Current sentence caption number
    for result in result.results:
        alternative = result.alternatives[0]
        
        # Transcript and confidence show the whole text + the confidence of the ML recognition
        #print('Transcript: {}'.format(alternative.transcript), file=f)
        #print('Confidence: {}'.format(alternative.confidence), file=f)
        sentence=""     # Initial sentence 
        itera=0         # Current word loop index

        end_flag=False  # Flag gets set True when last word will be looped over
        
        for word_info in alternative.words:
            print("word_info.word",word_info.word)
            itera+=1

            #if last word, set end_flag True
            if(itera>=len(alternative.words)):
              end_flag=True
            
            #get word start- and end-time; end_time_p contains transformation in seconds
            word = word_info.word
            start_time = word_info.start_time
            end_time = word_info.end_time
            end_time_p=end_time.seconds + end_time.nanos * 1e-9
            print("first check timer:", timer,"<="," end-time:",end_time_p ,"sub_frame:",SUB_FRAME)
            if(timer-end_time_p<=-SUB_FRAME and end_flag==False):
              print(caption_index, file=f)
              date_start = datetime.datetime.fromtimestamp(timer).strftime('%H:%M:%S,%f')[:-3]
              date_end = datetime.datetime.fromtimestamp(end_time_p).strftime('%H:%M:%S,%f')[:-3]
              print(date_start,"-->",date_end, file=f)
              print(sentence, file=f)
              print("", file=f)
              timer=end_time_p
            
              #init new sentence with current word
              sentence=word+" "
              caption_index+=1
                
            #else if last loop->create last caption output
            elif(end_flag==True):
              sentence=sentence+word+" "
              date_start = datetime.datetime.fromtimestamp(timer).strftime('%H:%M:%S,%f')[:-3]
              date_end = datetime.datetime.fromtimestamp(end_time_p).strftime('%H:%M:%S,%f')[:-3]
              print(caption_index , file=f)
              print(date_start,"-->",date_end , file=f)
              print(sentence, file=f)
              print("", file=f)
              timer=end_time_p
              sentence=""
              caption_index+=1
                
            #else, just add the word to the sentence and take next word
            else:
              sentence=sentence+word+" "

    #close and save file to tmp folder
    f.close()
    return path_name_output

def process_to_file_translate(result,filename):
    #set output file tmp path
    print('Waiting for translation to complete. For File:', filename)
    target_language=TARGET_LANGUAGE
    path_name_output_translate = get_file_path(filename+"output_translated.txt")
    #SUB_FRAME=4 #length of sentence in seconds
    result=result
    timer=0.0
    caption_index=1
    f = open(path_name_output_translate, "a")
    for result in result.results:
        alternative = result.alternatives[0]
        #print('Transcript: {}'.format(alternative.transcript))
        #print('Confidence: {}'.format(alternative.confidence))
        sentence=""
        itera=0
        end_flag=False
        for word_info in alternative.words:
            itera+=1
            word = word_info.word
            end_time_p=word_info.end_time.seconds + word_info.end_time.nanos * 1e-9
            sentence=sentence+word+" "
            if(word.endswith(('.', '?'))):
              translated_sentence=translate_text(sentence, target_language)
              print(caption_index, file=f)
              date_start = datetime.datetime.fromtimestamp(timer).strftime('%H:%M:%S,%f')[:-3]
              date_end = datetime.datetime.fromtimestamp(end_time_p).strftime('%H:%M:%S,%f')[:-3]
              print(date_start,"-->",date_end, file=f)
              print(translated_sentence, file=f)
              print(sentence, file=f)
              print("", file=f)
              timer=end_time_p
              sentence=""
              caption_index+=1

    f.close()
    return path_name_output_translate  
  
def make_subtitles(event_metadata, context):
    INPUT_BUCKET = event_metadata['bucket']
    FILENAME = event_metadata['name']
    FILE_URI = event_metadata['name']
    # Speech-To-Text function
    print('1. retrieved data:',FILENAME)
    punctuation=True # set False for transcription without translation. Less computing
    result_fetch_translate, FILENAME=transcribe_gcs_with_word_time_offsets(event_metadata,INPUT_BUCKET,FILENAME, punctuation)
    print('transcription complete for:',FILENAME) 
    result_fetch=result_fetch_translate
    # Manipuating retrieved path of Text-to-.srt file 
    foutput_file_name=process_to_file(result_fetch,FILENAME)

    # Upload the .srt file in output bucket
    bucket = client.get_bucket(OUTPUT_BUCKET)
    newblob = bucket.blob(FILENAME+".txt")  
    newblob.upload_from_filename(foutput_file_name)
    if FLAG_TRANSLATE:
        print('3. init translation for:',FILENAME) 
        foutput_file_name_translate=process_to_file_translate(result_fetch_translate,FILENAME)
        print('translation complete for:',FILENAME) 
        newblob2 = bucket.blob(FILENAME+"translated.txt")  
        newblob2.upload_from_filename(foutput_file_name_translate)
        #remove local stored files
        os.remove(foutput_file_name_translate)
        
    #remove local stored files
    os.remove(foutput_file_name)


def translate_text(text, target_language_p):
    translate_client = translate.Client()
    result = translate_client.translate(
        text, target_language=target_language_p)

    #print(u'Text: {}'.format(result['input']))
    #print(u'Translation: {}'.format(result['translatedText']))
    #print(u'Detected source language: {}'.format(result['detectedSourceLanguage']))
    return result['translatedText']


Overwriting main.py


## Deploy cloud function

 The following command should show you the mian.py and requirements.txt file. For the deployment we need the main.py and requirements.txt in the same current working folder or you will get module import errors.

Now we have the two required files. Next we deploy the cloud function via bash command.

"make_subtitles"= name of main exectuion function

"trigger-resource"= id of trigger_bucket to listen for incoming new audio files

"google.storage.object.finalize"= one of 4 options to trigger the cloud function. In this case on the creation of new files


In [118]:
%%bash
TARGET_STORAGE=gs://${trigger_Bucket}
gcloud functions deploy make_subtitles --region ${REGION} --runtime python37 --timeout 200s --trigger-resource $TARGET_STORAGE --trigger-event google.storage.object.finalize

availableMemoryMb: 256
entryPoint: make_subtitles
eventTrigger:
  eventType: google.storage.object.finalize
  failurePolicy: {}
  resource: projects/_/buckets/trigger-caaef4f5-eccf-4405-bd3d-595622299e5f
  service: storage.googleapis.com
ingressSettings: ALLOW_ALL
labels:
  deployment-tool: cli-gcloud
name: projects/cloudfunction-268018/locations/us-central1/functions/make_subtitles
runtime: python37
serviceAccountEmail: cloudfunction-268018@appspot.gserviceaccount.com
sourceUploadUrl: https://storage.googleapis.com/gcf-upload-us-central1-af29b935-e25b-48ea-9946-84d6982eec89/c4d1e942-b9c3-4466-a982-6e134f8052b0.zip?GoogleAccessId=service-410169410576@gcf-admin-robot.iam.gserviceaccount.com&Expires=1581595370&Signature=FA3ullB1Yh1e2tP9DQvd93nO7x0gtK9E%2FDDIW1jVZriXxQfY0misMBMPnQcQ8NQXwpsXD6GGmCHaXFr7wz3bMzuD9jZBIYDaqi02c3np%2BbyGi54AB%2FwM1ZHZbKkaMkCdDvA5WQWcb7Wby5pVwL%2BClFxwS%2B2rRy69RLPmphL7NHdBf9VSsk0FGKbGe4KDLy2%2BQcNYKyHI%2FZRfiPJU%2FRaRIQ6K74auT7qnhSqB99jvgRK5JlusYCcShZAFv7IeufLj

Allow unauthenticated invocations of new function [make_subtitles]? 
(y/N)?  
Deploying function (may take a while - up to 2 minutes)...
...........................................done.


after the function is deployed(aprox. 2min), you can upload a file to the target folder. Upload your file and afterwards set the correct name

In [119]:
File_name2="output2-1channel.mp3"
os.environ['File_name2'] = File_name2

In [120]:
%%bash
LOCAL_FILE=${File_name2}
INPUT=gs://${trigger_Bucket}
gsutil rm $INPUT/$LOCAL_FILE
gsutil cp $LOCAL_FILE $INPUT

Removing gs://trigger-caaef4f5-eccf-4405-bd3d-595622299e5f/output2-1channel.mp3...
/ [1 objects]                                                                   
Operation completed over 1 objects.                                              
Copying file://output2-1channel.mp3 [Content-Type=audio/mpeg]...
/ [1 files][  1.3 MiB/  1.3 MiB]                                                
Operation completed over 1 objects/1.3 MiB.                                      


In [106]:
!rm -r results
!mkdir results

In [None]:
import time
time.sleep(200)

In [107]:
%%bash
OUTPUT_BUCKET=${Output_Bucket}
gsutil -m cp -R gs://$OUTPUT_BUCKET results/

Copying gs://output-39f8058a-242b-44ff-b2b3-b86aa36643fc/output2-1channel.wav.mp3.txt...
Copying gs://output-39f8058a-242b-44ff-b2b3-b86aa36643fc/output2-1channellow.wav.mp3.txt...
Copying gs://output-39f8058a-242b-44ff-b2b3-b86aa36643fc/output2-1channel.mp3.txt...
Copying gs://output-39f8058a-242b-44ff-b2b3-b86aa36643fc/output2-2channel.mp3.txt...
Copying gs://output-39f8058a-242b-44ff-b2b3-b86aa36643fc/output2-2channel.wav.mp3.txt...
Copying gs://output-39f8058a-242b-44ff-b2b3-b86aa36643fc/output2-std-1c-24k.mp3.txt...
Copying gs://output-39f8058a-242b-44ff-b2b3-b86aa36643fc/output2.mp3.txt...
Copying gs://output-39f8058a-242b-44ff-b2b3-b86aa36643fc/output2.mp3translated.txt...
Copying gs://output-39f8058a-242b-44ff-b2b3-b86aa36643fc/postoutput2-1channellow.wav.mp3...
Copying gs://output-39f8058a-242b-44ff-b2b3-b86aa36643fc/postoutput2-1channel.mp3...
Copying gs://output-39f8058a-242b-44ff-b2b3-b86aa36643fc/postoutput2.mp3...     
Copying gs://output-39f8058a-242b-44ff-b2b3-b86aa3664

## [ALTERNATIVE] Following code creates a zipfile, which can be used for the manual deployment in google cloud functions via the google cloud console web UI

In [None]:
!pip install zipfile36

Defaulting to user installation because normal site-packages is not writeable
Collecting zipfile36
  Downloading zipfile36-0.1.3-py3-none-any.whl (20 kB)
Installing collected packages: zipfile36
Successfully installed zipfile36-0.1.3


In [None]:
from zipfile import ZipFile

with ZipFile('cloud_function.zip', 'w') as zipObj:
   # Add multiple files to the zip
   zipObj.write('main.py')
   zipObj.write('requirements.txt')