ceate Sequence audio2text-input

scramjetorg · Feb 27, 2024 · 049d8af · 049d8af
1 parent cd1a91e
commit 049d8af
Show file tree

Hide file tree

Showing 4 changed files with 108 additions and 0 deletions.
diff --git a/python/audio2text-input/README.md b/python/audio2text-input/README.md
@@ -0,0 +1,46 @@
+# Audio to Text Sequence
+
+This Sequence demonstrates how to turn voice data into summaries with <a href="https://www.assemblyai.com/" target="_blank">AssemblyAI</a> Speech models. Audio is sent to Scramjet Transform Hub using data streaming with an audio transcript as an output.
+
+
+Requirements
+For this Sequence to run properly on your Linux machine use the following command to start <a href="https://docs.scramjet.org/transform-hub/installation" target="_blank">STH</a>.
+
+```bash
+$ DEVELOPMENT=true sth --runtime-adapter=process
+```
+
+**NOTE:** To run this Sequence, you'll need your <a href="https://www.assemblyai.com/" target="_blank">AssemblyAI</a> token, which must be included when executing the start command
+
+
+## Install and Run
+
+Install the <a href="https://docs.scramjet.org/platform/self-hosted-installation/" target="_blank">Scramjet Transform Hub </a> (STH) locally or use 
+<a href="https://docs.scramjet.org/platform/get-started/" target="_blank">Scramjet's Cloud Platform</a> environment for the Sequence deployment.
+For more information on the below commands check the 
+<a href="https://docs.scramjet.org/platform/cli-reference/#useful-commands" target="_blank">CLI reference</a> section on Scramjet's Website.
+
+On the Linux terminal execute the following commands:
+
+```bash
+# Create a directory __pypackages__ in the same directory as main.py
+~/audio2text-input$ mkdir __pypackages__
+
+# Install dependencies in the __pypackages__ folder. 
+~/audio2text-input$ pip3 install -t __pypackages__ -r requirements.txt
+
+# Pack the audio2text-input folder into a gzip format
+~$ si sequence pack audio2text-input
+
+# Send the audio2text-input.tar.gz Sequence to the Scramjet's Transform-Hub, with a return <Sequence-id> value
+~$ si sequence send audio2text-input.tar.gz --progress
+
+# Start the Sequence with argument, you'll need your AssemblyAI token
+~$ si seq start - --args=[\"token\"] 
+
+# Send the audio file as input
+~$ si instance input <Instance-id> local/path/to/audio.wav -e -t application/octet-stream
+
+# Return list of S3 Bucket objects as output
+~$ si instance output <Instance-id>
+```
diff --git a/python/audio2text-input/main.py b/python/audio2text-input/main.py
@@ -0,0 +1,42 @@
+import requests
+import time
+from scramjet.streams import Stream
+import json
+
+async def run(context, input, args1):
+    audio_file = await input.reduce(lambda a, b: a+b)
+    base_url = "https://api.assemblyai.com/v2"
+
+    headers = {
+        "authorization": args1
+    }
+
+    response = requests.post(
+        base_url + "/upload",
+        headers=headers,
+        data=audio_file 
+    )
+    upload_url = response.json()["upload_url"]
+    data = {
+        "audio_url": upload_url  
+    }
+    url = base_url + "/transcript"
+    response = requests.post(url, json=data, headers=headers)
+
+    transcript_id = response.json()['id']
+    polling_endpoint = f"https://api.assemblyai.com/v2/transcript/{transcript_id}"
+
+    while True:
+        transcription_result = requests.get(polling_endpoint, headers=headers).json()
+
+        if transcription_result['status'] == 'completed':
+            break
+
+        elif transcription_result['status'] == 'error':
+            raise RuntimeError(f"Transcription failed: {transcription_result['error']}")
+
+        else:
+            time.sleep(3)
+
+
+    return Stream.read_from(f"{transcription_result['text']} \n")
diff --git a/python/audio2text-input/package.json b/python/audio2text-input/package.json
@@ -0,0 +1,15 @@
+{
+    "name": "py-transcript",
+    "version": "1.0.0",
+    "main": "./main.py",
+    "author": "Ray_Nawfal",
+    "license": "GPL-3.0",
+    "description": "Transcript audio file using AssemblyAI API.",
+    "engines": {
+      "python3": "3.8.0"
+    },
+    "scripts": {
+      "build": "mkdir -p dist/__pypackages__/ && pip3 install -t dist/__pypackages__/ -r requirements.txt && cp -t ./dist/ *.py *.json *.wav", 
+      "clean": "rm -rf ./dist"
+    }
+}
diff --git a/python/audio2text-input/requirements.txt b/python/audio2text-input/requirements.txt
@@ -0,0 +1,5 @@
+scramjet-framework-py
+requests
+pyee
+urllib3==1.26.6
+pyOpenSSL