Import some modules and add you key.
the key is stored in another file that is of this format
API_KEY_ASSEMBLYAI = 'key1'

In [1]:
import requests
import time
from api_secrets import API_KEY_ASSEMBLYAI

Add the endpoints and header data for the requests

In [2]:
upload_endpoint = 'https://api.assemblyai.com/v2/upload'
transcript_endpoint = 'https://api.assemblyai.com/v2/transcript'

headers_auth_only = {'authorization': API_KEY_ASSEMBLYAI}

headers = {
    "authorization": API_KEY_ASSEMBLYAI,
    "content-type": "application/json"
}

CHUNK_SIZE = 5_242_880  # 5MB


upload a file to assembly ai and return a url

In [3]:
def upload(filename):
    def read_file(filename):
        with open(filename, 'rb') as f:
            while True:
                data = f.read(CHUNK_SIZE)
                if not data:
                    break
                yield data

    upload_response = requests.post(upload_endpoint, headers=headers_auth_only, data=read_file(filename))
    return upload_response.json()['upload_url']


use the url and make a request to transcribe it

In [4]:
def transcribe(audio_url):
    transcript_request = {
        'audio_url': audio_url
    }

    transcript_response = requests.post(transcript_endpoint, json=transcript_request, headers=headers)
    return transcript_response.json()['id']

use the id to get the url to poll from Assembly AI

returns a url to the polling url

In [5]:
def poll(transcript_id):
    polling_endpoint = transcript_endpoint + '/' + transcript_id
    polling_response = requests.get(polling_endpoint, headers=headers)
    return polling_response.json()

use the polling reponse url and poll until the transcription is completed

In [6]:
def get_transcription_result_url(url):
    transcribe_id = transcribe(url)
    while True:
        data = poll(transcribe_id)
        if data['status'] == 'completed':
            return data, None
        elif data['status'] == 'error':
            return data, data['error']
            
        print("waiting for 30 seconds")
        time.sleep(30)

use the transcription function and save the data

In [7]:
def save_transcript(url, title):
    data, error = get_transcription_result_url(url)
    
    if data:
        filename = title + '.txt'
        with open(filename, 'w') as f:
            f.write(data['text'])
        print('Transcript saved')
    elif error:
        print("Error!!!", error)

put it all together

In [9]:
filename = "test.mp4"
audio_url = upload(filename)

save_transcript(audio_url, 'file_title')

waiting for 30 seconds
Transcript saved
