## Introduction
We are going to use:

portaudio (mac) and pyaudio (python) for listening to any scanner source

SoX for removing silence from clips and creating new files for each clip

The section below records to .wav:

In [None]:

import pyaudio
import wave
from datetime import datetime 

DATE = datetime.now().strftime("%Y_%m_%d-%I:%M:%S_%p")

FRAMES_PER_BUFFER = 3200
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 60
WAV_OUTPUT_FILENAME = "audio_output.wav"
p = pyaudio.PyAudio()
 
# starts recording
stream = p.open(
   format=FORMAT,
   channels=CHANNELS,
   rate=RATE,
   input=True,
   frames_per_buffer=FRAMES_PER_BUFFER
)

print("* recording")

frames = []

for i in range(0, int(RATE / FRAMES_PER_BUFFER * RECORD_SECONDS)):
    data = stream.read(FRAMES_PER_BUFFER)
    frames.append(data)

print("* done recording")

stream.stop_stream()
stream.close()
p.terminate()

wf = wave.open(WAV_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()

I made a file which has some spaces and white noise in it.

Now we want to use SoX to split it up into different files.

Awesome guide here: https://digitalcardboard.com/blog/2009/08/25/the-sox-of-silence/

sox audio_output.wav audio_output_processed_.wav silence 1 0.5 1% 1 5.0 1% : newfile : restart

Run this from within python:

In [None]:
import subprocess

bashCommand = "sox audio_output.wav audio_output_processed_.wav silence 1 0.5 1% 1 5.0 1% : newfile : restart"
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()

Now to send it to the AssemblyAI API for transcripting:


In [None]:
import sys
from configure import auth_key
import requests
import pprint
from time import sleep

FILENAME = "/Users/stevenoneill/Documents/projects/python/audio_output.wav"
 
# store global constants
headers = {
   "authorization": "auth_key",
   "content-type": "application/json"
}
transcript_endpoint = "https://api.assemblyai.com/v2/transcript"
upload_endpoint = 'https://api.assemblyai.com/v2/upload'
 
# make a function to pass the mp3 to the upload endpoint
def read_file(filename):
   with open(filename, 'rb') as _file:
       while True:
           data = _file.read(5242880)
           if not data:
               break
           yield data
 
# upload our audio file
upload_response = requests.post(
   upload_endpoint,
   headers=headers, data=read_file(FILENAME)
)
print('Audio file uploaded')
 
# send a request to transcribe the audio file
transcript_request = {'audio_url': upload_response.json()['upload_url']}
transcript_response = requests.post(transcript_endpoint, json=transcript_request, headers=headers)
print('Transcription Requested')
pprint.pprint(transcript_response.json())
# set up polling
polling_response = requests.get(transcript_endpoint+"/"+transcript_response.json()['id'], headers=headers)
filename = transcript_response.json()['id'] + '.txt'
# if our status isn’t complete, sleep and then poll again
while polling_response.json()['status'] != 'completed':
   sleep(30)
   polling_response = requests.get(transcript_endpoint+"/"+transcript_response.json()['id'], headers=headers)
   print("File is", polling_response.json()['status'])
with open(filename, 'w') as f:
   f.write(polling_response.json()['text'])
print('Transcript saved to', filename)

In [8]:
import subprocess
bashCommand = "sox audio_output.wav audio_output_processed_.mp3 silence 1 0.1 1% 1 1.0 1% : newfile : restart"
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()