In [59]:
# Amazon Transcribe Documentation
# https://docs.aws.amazon.com/transcribe/latest/APIReference/API_StartTranscriptionJob.html

In [60]:
#Add correct path to the Python libraries
try:
    import sys
    sys.path.append('/var/lang/lib/python37.zip')
    sys.path.append('/var/lang/lib/python3.7')
    sys.path.append('/var/lang/lib/python3.7/lib-dynload')
    sys.path.append('/var/lang/lib/python3.7/site-packages')
    sys.path.remove('/opt/.sagemakerinternal/conda/lib/python3.7/site-packages')
except:
    pass

In [61]:
#Import libraries
import boto3
import json
from time import gmtime, strftime, sleep
import uuid
import datetime as dt

In [62]:
#Create boto3 sesesion
boto3_session = boto3.Session(region_name='us-east-1')

In [63]:
#Transcribe
transcribe = boto3_session.client('transcribe')

In [64]:
def transcribe_post(post_id):
    #Start transcription job
    job_id = uuid.uuid4()
    response = transcribe.start_transcription_job(
        TranscriptionJobName=f'post-ingestion-transcription-{post_id}-{job_id}',
        LanguageCode='en-US',        
        MediaFormat='mp4',
        Media={
            'MediaFileUri': f's3://now-app-media-service/{post_id}.mp4'
        },
        OutputBucketName='now-app-media-service',
        OutputKey= f'transcripts/python/{post_id}.json',
        Settings={        
            'ShowSpeakerLabels': True,
            'MaxSpeakerLabels': 10,
            'ChannelIdentification': False,
            'ShowAlternatives': False            
        },
        Subtitles={
            'Formats': [
                'srt'
            ],
            'OutputStartIndex': 1
        }    
    )
    
    #wait until the job is finished
    while response['TranscriptionJob']['TranscriptionJobStatus'] not in ('COMPLETED', 'FAILED'):
        response = transcribe.get_transcription_job(
            TranscriptionJobName=f'post-ingestion-transcription-{post_id}-{job_id}'
        )
        # print(
        #     response['TranscriptionJob']['TranscriptionJobStatus'] + " - " + strftime("%d-%H-%M-%S", gmtime())
        # )
        sleep(1)
        
    #print the transcription
    s3 = boto3_session.resource('s3')
    content_object = s3.Object('now-app-media-service', f'transcripts/python/{post_id}.json')
    file_content = content_object.get()['Body'].read().decode('utf-8')
    json_content = json.loads(file_content)
    
    print(post_id + ': ' + json_content['results']['transcripts'][0]['transcript'])
    print('-----------------------------------------------------------------------')
    
    #Read the subtitles in srt file
    srt_content_object = s3.Object('now-app-media-service', f'transcripts/python/{post_id}.srt')
    srt_file_content = srt_content_object.get()['Body'].read().decode('utf-8')
    
    #Save to DynamoDB
    put_item={}
    put_item['id']=post_id
    put_item['postTranscript']=json_content['results']['transcripts'][0]['transcript']
    put_item['postSubtitles']=srt_file_content
    put_item['postSpeakersNumber']=json_content['results']['speaker_labels']['speakers']
    put_item['postedTimestamp'] = str(dt.datetime.now().isoformat())
    
    prod_post_table = boto3_session.resource('dynamodb').Table('prod-post')
    prod_post_table.put_item(Item=put_item)

In [None]:
transcribe_post('N26')
transcribe_post('N27')
transcribe_post('N28')
transcribe_post('N29')
transcribe_post('N31')
transcribe_post('N32')
transcribe_post('N33')
transcribe_post('N34')
transcribe_post('N35')
transcribe_post('N36')
transcribe_post('N37')
transcribe_post('N38')
transcribe_post('N39')
transcribe_post('N40')
transcribe_post('N42')
transcribe_post('N43')
transcribe_post('N44')
transcribe_post('N45')
transcribe_post('N46')
transcribe_post('N47')

N26: Over the weekend here in Newport, police officers getting in a skirmish with several men here on the same street in town. Now video taken that was shared with AbC six, showing the officers pressing a man against a trolley, as well as pushing another one into a telephone pole, and then a third man getting punched in the face by police off Newport. Police tell me that the incidents stem from two arrests made that night with four local individuals involved. They do say that the chief has been made aware of these incidents and they are thoroughly investigating this. At this time, I am told that there is no word on if the officers in the video have been identified for any conduct that could come with them.
-----------------------------------------------------------------------
N27: More major renovations to the high school athletic fields in North Attleborough hit a snag back in 2019. The bleachers at the high school were found unsafe and closed. Since then, the North Attleborough High