In [1]:
import json
import requests
import time

def transcribe_audio(auth_token, audio_file, use_disfluency_filter=False):
    config = {
        "use_multi_channel": False,
        "use_itn": False,
        "use_disfluency_filter": use_disfluency_filter,
        "use_profanity_filter": False,
        "use_paragraph_splitter": True,
        "paragraph_splitter": {
            "max": 50
        }
    }

    resp = requests.post(
        'https://openapi.vito.ai/v1/transcribe',
        headers={'Authorization': f'bearer {auth_token}'},
        data={'config': json.dumps(config)},
        files={'file': open(audio_file, 'rb')}
    )
    resp.raise_for_status()
    task_id = resp.json()['id']
    print(f"작업 ID: {task_id}")

    start_time = time.time()
    timeout = 120  # 2분 제한
    while True:
        resp = requests.get(
            f'https://openapi.vito.ai/v1/transcribe/{task_id}',
            headers={'Authorization': f'bearer {auth_token}'}
        )
        resp.raise_for_status()
        status = resp.json()['status']
        if status == 'completed':
            return resp.json()['results']
        elif status == 'failed':
            raise Exception(f"Transcription failed: {resp.json()['error']}")
        elif time.time() - start_time > timeout:
            raise Exception("Transcription timed out")
        time.sleep(5)

def save_transcription_results(results_without_filter, results_with_filter):
    combined_results = {
        "results_without_filter": results_without_filter,
        "results_with_filter": results_with_filter
    }
    with open("transcription_results.json", "w") as f:
        json.dump(combined_results, f, indent=4, ensure_ascii=False)


# 인증 토큰 받기
resp = requests.post(
    'https://openapi.vito.ai/v1/authenticate',
    data={'client_id': 'pY5fW2sMvQ9qwj5jCHSK',
          'client_secret': '0N05lmQf_kA9oSteVr4X8G6qcv3QaNju7V8x6-zk'}
)
resp.raise_for_status()
auth_token = resp.json()['access_token']

# 음성 파일 전송 및 트랜스크립션 결과 받기
results_without_filter = transcribe_audio(auth_token, 'b.wav', use_disfluency_filter=False)
results_with_filter = transcribe_audio(auth_token, 'B.m4a', use_disfluency_filter=True)

# 트랜스크립션 결과 저장
save_transcription_results(results_without_filter, results_with_filter)


작업 ID: F5JvlvvKShKQIDX3pm_qOg
작업 ID: mXcwxYmXQKyisUzx4EJm7g


TypeError: save_transcription_results() takes 1 positional argument but 2 were given

In [None]:
import json
import requests
import time

def transcribe_audio(auth_token, audio_file, use_disfluency_filter=False):
    config = {
        "use_multi_channel": False,
        "use_itn": False,
        "use_disfluency_filter": use_disfluency_filter,
        "use_profanity_filter": False,
        "use_paragraph_splitter": True,
        "paragraph_splitter": {
            "max": 50
        }
    }

    resp = requests.post(
        'https://openapi.vito.ai/v1/transcribe',
        headers={'Authorization': f'bearer {auth_token}'},
        data={'config': json.dumps(config)},
        files={'file': open(audio_file, 'rb')}
    )
    resp.raise_for_status()
    task_id = resp.json()['id']
    print(f"작업 ID: {task_id}")

    while True:
        resp = requests.get(
            f'https://openapi.vito.ai/v1/transcribe/{task_id}',
            headers={'Authorization': f'bearer {auth_token}'}
        )
        resp.raise_for_status()
        status = resp.json()['status']
        if status == 'completed':
            return resp.json()['results']
        time.sleep(5)

def get_transcription_results(audio_file):
    # 인증 토큰 받기
    resp = requests.post(
        'https://openapi.vito.ai/v1/authenticate',
        data={'client_id': 'pY5fW2sMvQ9qwj5jCHSK',
              'client_secret': '0N05lmQf_kA9oSteVr4X8G6qcv3QaNju7V8x6-zk'}
    )
    resp.raise_for_status()
    auth_token = resp.json()['access_token']

    # 음성 파일 전송 및 트랜스크립션 결과 받기
    results_without_filter = transcribe_audio(auth_token, audio_file, use_disfluency_filter=False)
    results_with_filter = transcribe_audio(auth_token, audio_file, use_disfluency_filter=True)

    return {
        "results_without_filter": results_without_filter,
        "results_with_filter": results_with_filter
    }


: 

In [5]:
# 사용자가 선택한 음성 파일 경로
audio_file = 'b.wav'

# 트랜스크립션 결과 받기
transcription_results = get_transcription_results(audio_file)

print(transcription_results)
def save_transcription_results(transcription_results):
    with open("transcription_results.json", "w") as f:
        json.dump(transcription_results, f, indent=4, ensure_ascii=False)


작업 ID: _K6xWCsLQHqa_0b9aX7TsA
작업 ID: BR0DiKHBT4SRLcul0gNVFw
{'results_without_filter': {'utterances': [{'start_at': 1038, 'duration': 9615, 'spk': 0, 'spk_type': 'NORMAL', 'msg': '아 음, 제가 음 집에 가려고 어 하는데 음 말이죠 음.'}, {'start_at': 13058, 'duration': 7720, 'spk': 0, 'spk_type': 'NORMAL', 'msg': '그 뭐냐, 부채가 제 눈앞에 떨어져서 음 에서 아팠어요.'}], 'verified': False}, 'results_with_filter': {'utterances': [{'start_at': 1038, 'duration': 8245, 'spk': 0, 'spk_type': 'NORMAL', 'msg': '제가 집에 가려고 하는데 말이죠.'}, {'start_at': 13058, 'duration': 7720, 'spk': 0, 'spk_type': 'NORMAL', 'msg': '뭐냐, 부채가 제 눈앞에 떨어져서 에서 아팠어요.'}], 'verified': False}}
