In [41]:
import argparse
import os
import utils
import json
import time
import requests

AAI_API_KEY = os.getenv("AAI_API_KEY")

def transcribe(audio_file):
    if AAI_API_KEY is None:
        raise RuntimeError("AAI_API_KEY environment variable not set. Try setting it now.")

    # Create header with authorization along with content-type
    header = {
        'authorization': AAI_API_KEY,
        'content-type': 'application/json'
    }

    upload_url = utils.upload_file(audio_file, header)

    # Request a transcription
    transcript_response = utils.request_transcript(upload_url, header)

    # Create a polling endpoint that will let us check when the transcription is complete
    polling_endpoint = utils.make_polling_endpoint(transcript_response)

    # Wait until the transcription is complete
    while True:
        polling_response = requests.get(polling_endpoint, headers=header)
        polling_response = polling_response.json()

        if polling_response['status'] == 'completed':
            break

        time.sleep(5)


    # Save and print transcript
    with open('transcript.json', 'w') as f:
        f = json.dumps(polling_response)

    return polling_response, polling_endpoint


In [43]:
transcript_response, polling_endpoint = transcribe('audio.mp3')

In [44]:
transcript_response

{'id': 'r7l6gsk8al-449b-4cdf-9305-268aaf7a5d67',
 'language_model': 'assemblyai_default',
 'acoustic_model': 'assemblyai_default',
 'language_code': 'en_us',
 'status': 'completed',
 'audio_url': 'https://cdn.assemblyai.com/upload/c247a77e-e574-4c8a-a211-0625294a8c9c',
 'text': "AssemblyAI is a deep learning company that builds powerful APIs to help you transcribe and understand audio. The most common use case for the API is to automatically convert prerecorded audio and video files, as well as real time audio streams into text transcriptions. Our APIs convert audio and video into text using powerful deep learning models that we research and develop end to end in house. Millions of podcasts, zoom recordings, phone calls, or video files are being transcribed with AssemblyAI every single day. But where AssemblyAI really excels is with helping you understand your data. So let's say we transcribe Joe Biden's State of the Union using assembly. AI's API. With our Auto Chapter feature, you ca

In [45]:
paragraphs = utils.get_paragraphs(polling_endpoint, header)

In [47]:
for p in paragraphs:
    print(p['text'])

AssemblyAI is a deep learning company that builds powerful APIs to help you transcribe and understand audio. The most common use case for the API is to automatically convert prerecorded audio and video files, as well as real time audio streams into text transcriptions. Our APIs convert audio and video into text using powerful deep learning models that we research and develop end to end in house. Millions of podcasts, zoom recordings, phone calls, or video files are being transcribed with AssemblyAI every single day. But where AssemblyAI really excels is with helping you understand your data.
So let's say we transcribe Joe Biden's State of the Union using assembly. AI's API. With our Auto Chapter feature, you can generate time coded summaries of the key moments of your audio file. For example, with the State of the Union address, we get chapter summaries like this auto Chapters automatically segments your audio or video files into chapters and provides a summary for each of these chapte

In [None]:
import cohere
COHERE_API_KEY = os.getenv("AAI_API_KEY")
co = cohere.Client(api_key)