# Step1 - Download Youtube Video and separate audio file


In [1]:
# pip install pytube
# pip install moviepy
# conda install -c menpo ffmpeg 
import uuid
from pytube import YouTube
from moviepy.editor import VideoFileClip
from urllib.parse import urlparse, parse_qs
import os

# Test url Video - https://youtu.be/UzZFdEY4vJ0
youtube_url = input("Enter a youtube url to be translated. Note: Video should be < 20 mins in duration. ")
# Allowed  code for original language: 'en-US' (English USA) and  'es-US' (Spanish USA)
original_language_code = 'en-US'
# Allowed codes : en - English, es - Spanish, zh - Chinese, fr - French, ru - Russian , de - German , it - italian, 
# tr - Turkish, ja - Japanese, pt - Portugese
translated_language_code = 'zh'        
                                       

def get_id(url):
    u_pars = urlparse(url)
    quer_v = parse_qs(u_pars.query).get('v')
    if quer_v:
        return quer_v[0]
    pth = u_pars.path.split('/')
    if pth:
        return pth[-1]
    
youtube_id = get_id(youtube_url)
youtube_filename = "%s_%s__%s__%s.mp4"%(youtube_id,str(uuid.uuid4()),original_language_code,translated_language_code)
print (youtube_filename)

def download_video(url):   
    if not os.path.isfile(youtube_filename):
        yt = YouTube(youtube_url)
        yt.streams.first().download(filename=youtube_filename.split('.')[0])
    return youtube_filename

video_filename = download_video(youtube_url)
print ("Finished downloading %s"%(video_filename))
audio_file_name = video_filename.replace(".mp4",".wav")
videoclip = VideoFileClip(video_filename)
if not os.path.isfile(audio_file_name):
    videoclip.audio.write_audiofile(audio_file_name,ffmpeg_params=['-ac','1'])
    
print ("Finished converting to audio %s"%(audio_file_name))

Enter a youtube url to be translated. Note: Video should be < 20 mins in duration. https://youtu.be/UzZFdEY4vJ0
UzZFdEY4vJ0_ef21aeae-931f-455c-872f-dbee486091cf__en-US__zh.mp4
Finished downloading UzZFdEY4vJ0_ef21aeae-931f-455c-872f-dbee486091cf__en-US__zh.mp4
[MoviePy] Writing audio in UzZFdEY4vJ0_ef21aeae-931f-455c-872f-dbee486091cf__en-US__zh.wav


100%|██████████████████████████████████████████████████████████████████████████████| 783/783 [00:00<00:00, 2596.70it/s]


[MoviePy] Done.
Finished converting to audio UzZFdEY4vJ0_ef21aeae-931f-455c-872f-dbee486091cf__en-US__zh.wav


# Step2 -  Upload audio file to S3 (Once uploaded - Lambda trigger generates transcription from S3 bucket)

In [2]:
import csv
# credentials
access_key_id =''
secret_access_key =''
with open('video_translate_credentials.csv', 'r') as csvfile:
    spamreader = csv.DictReader(csvfile, delimiter=',')
    row2 = next(spamreader)
    access_key_id = (row2['Access key ID'])
    secret_access_key = (row2['Secret access key'])
    
    

In [3]:
import boto3 
from botocore.exceptions import ClientError
# Upload audio file to S3 server if it doesn't exist already
s3 = boto3.client("s3",
                        aws_access_key_id=access_key_id,
                        aws_secret_access_key=secret_access_key,
                        region_name='us-east-1')
bucket_name = "video-translation"


def file_exists(s3, bucket, key):
    try:
        s3.head_object(Bucket=bucket, Key=key)
    except ClientError as e:
        return int(e.response['Error']['Code']) != 404
    return True

if file_exists(s3, bucket_name, audio_file_name):
    print (audio_file_name," already exists in bucket: ",bucket_name)
else:
    print ("Uploading file .....  ",audio_file_name, " to S3")
    s3.upload_file(audio_file_name, bucket_name, audio_file_name)
    print ("Finished Uploading file .....  ",audio_file_name, "to S3" )

Uploading file .....   UzZFdEY4vJ0_ef21aeae-931f-455c-872f-dbee486091cf__en-US__zh.wav  to S3
Finished Uploading file .....   UzZFdEY4vJ0_ef21aeae-931f-455c-872f-dbee486091cf__en-US__zh.wav to S3


# Step3 - Check Transcription Status

In [4]:
import time
time.sleep(5)
job_name = str(audio_file_name.split('.')[0])
print (job_name)
transcribe = boto3.client('transcribe',
                     aws_access_key_id=access_key_id,
                    aws_secret_access_key=secret_access_key,
                    region_name='us-east-1')
while True:
    status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
    if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
        break
    print("Transcription not ready yet...")
    time.sleep(5)
print("Job status: ",status['TranscriptionJob']['TranscriptionJobStatus'])

UzZFdEY4vJ0_ef21aeae-931f-455c-872f-dbee486091cf__en-US__zh
Transcription not ready yet...
Transcription not ready yet...
Transcription not ready yet...
Transcription not ready yet...
Transcription not ready yet...
Transcription not ready yet...
Transcription not ready yet...
Transcription not ready yet...
Transcription not ready yet...
Transcription not ready yet...
Transcription not ready yet...
Transcription not ready yet...
Transcription not ready yet...
Transcription not ready yet...
Transcription not ready yet...
Job status:  COMPLETED


# Step4 - Download subtitle file from S3 (Once transcription is ready a new Lambda is triggered to generate subtitle file)

In [5]:
translated_srt_filename = youtube_filename.replace(".mp4","_translated.srt")
original_srt_filename = youtube_filename.replace(".mp4","_original.srt")
translated_bucket_name = "video-translation-output-translated-srt"
original_srt_bucket_name = "video-translation-output-original-srt"
print (translated_srt_filename)
while True:
    if file_exists(s3, translated_bucket_name, translated_srt_filename):
        print ("Translated Subtitle file exists")
        break
    print("Translated Subtitle file not ready yet...")
    time.sleep(5)
    
s3.download_file(translated_bucket_name,translated_srt_filename, translated_srt_filename)
s3.download_file(original_srt_bucket_name,original_srt_filename, original_srt_filename)

UzZFdEY4vJ0_ef21aeae-931f-455c-872f-dbee486091cf__en-US__zh_translated.srt
Translated Subtitle file exists


# Step5 - Download final audio file  (Once subtitle file is ready a new Lambda is triggered to generate final audio file)

In [6]:
final_audio_name = youtube_filename.replace(".mp4","_final.wav")
audio_bucket_name = "video-translation-final-audio-output"
print (final_audio_name)
while True:
    if file_exists(s3, audio_bucket_name, final_audio_name):
        print ("final audio file exists")
        break
    print("final audio file not ready yet...")
    time.sleep(5)
    
s3.download_file(audio_bucket_name,final_audio_name, final_audio_name)

UzZFdEY4vJ0_ef21aeae-931f-455c-872f-dbee486091cf__en-US__zh_final.wav
final audio file not ready yet...
final audio file exists


# Step6 - Stitch the downloaded audio file(translated) to the original video

In [7]:
# Attach translated speech to original file
from moviepy.audio.io.AudioFileClip import AudioFileClip
videoclip = VideoFileClip(youtube_filename)
translated_audio = AudioFileClip(final_audio_name)
new_clip = videoclip.set_audio(translated_audio)
translated_video = youtube_filename.replace(".mp4","_translated.mp4")
new_clip.write_videofile(translated_video)

[MoviePy] >>>> Building video UzZFdEY4vJ0_ef21aeae-931f-455c-872f-dbee486091cf__en-US__zh_translated.mp4
[MoviePy] Writing audio in UzZFdEY4vJ0_ef21aeae-931f-455c-872f-dbee486091cf__en-US__zh_translatedTEMP_MPY_wvf_snd.mp3


100%|██████████████████████████████████████████████████████████████████████████████| 783/783 [00:00<00:00, 1367.55it/s]


[MoviePy] Done.
[MoviePy] Writing video UzZFdEY4vJ0_ef21aeae-931f-455c-872f-dbee486091cf__en-US__zh_translated.mp4


100%|███████████████████████████████████████████████████████████████████████████████▊| 426/427 [00:09<00:00, 44.73it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: UzZFdEY4vJ0_ef21aeae-931f-455c-872f-dbee486091cf__en-US__zh_translated.mp4 

