In [None]:
# ! pip install pyaudio --user
# ! pip install openai==0.28 --user

In [20]:
import openai
import whisper

In [2]:
from dotenv import dotenv_values

In [3]:
config = dotenv_values("../.env")

In [4]:
openai.api_key = config["KEY_OPENAI"]

In [5]:
# Record Some audio

import wave
import sys
import pyaudio

CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1 #if sys.platform == "darwin" else 2
RATE = 44100


def record_audio(seconds: int, filename: str):
    output_path = f"./audio/{filename}"
    with wave.open(output_path, "wb") as wf:
        p = pyaudio.PyAudio()
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(p.get_sample_size(FORMAT))
        wf.setframerate(RATE)

        stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True)

        print("Recording...")
        for index in range(0, RATE // CHUNK * seconds):
            if index % (RATE // CHUNK) == 0:
                print(f"{index // (RATE // CHUNK)} / {seconds}s")
            wf.writeframes(stream.read(CHUNK))
        print("Done")

        stream.close()
        p.terminate()
    print(f"File saved at {output_path}")
    return output_path

In [66]:
record_audio(10, "russian_record.wav")

Recording...
0 / 10s
1 / 10s
2 / 10s
3 / 10s
4 / 10s
5 / 10s
6 / 10s
7 / 10s
8 / 10s
9 / 10s
Done
File saved at ./audio/russian_record.wav


'./audio/russian_record.wav'

In [67]:
german_file = open("./audio/russian_record.wav", "rb")
german_response = openai.Audio.transcribe(
    model="whisper-1",
    file=german_file
)
german_response

<OpenAIObject at 0x28f86a1a1b0> JSON: {
  "text": "Hola, mi nombre es Juan"
}

In [68]:
model = whisper.load_model("base")

options = dict(language = 'ru', beam_size=5, best_of=5, verbose=True)
transcribe_options = dict(task="transcribe", **options)
translate_options = dict(task="translate", **options)

In [69]:
res = model.transcribe("./audio/russian_record.wav", **transcribe_options)
res['text']

[00:00.000 --> 00:04.600]  Привет, и меня зовут Хуан.


' Привет, и меня зовут Хуан.'

In [70]:
res = model.transcribe("./audio/russian_record.wav", **translate_options)
res['text']

[00:00.000 --> 00:04.880]  I'm interested in business and I want to know one thing.


" I'm interested in business and I want to know one thing."

In [54]:
french_file = open("./audio/french.wav", "rb")

In [55]:
french_response = openai.Audio.transcribe(
    model="whisper-1",
    file=french_file
)

In [56]:
french_response

<OpenAIObject at 0x28f83dce0f0> JSON: {
  "text": "Bonjour, je m'appelle Colt Steele et je voudrais un croissant."
}

In [57]:
italian_news = open("./audio/italian_news.wav", "rb")

In [58]:
italian_response = openai.Audio.transcribe(
    model="whisper-1",
    file=italian_news
)

In [59]:
italian_response

<OpenAIObject at 0x28f83dcebd0> JSON: {
  "text": "Mentre cresce l'allarme per il virus in Cina, in Italia soltanto la scorsa settimana sono stati segnalati quasi 500.000 casi di influenza stagionale, che a met\u00e0 gennaio sta dunque raggiungendo il suo picco, come era stato previsto dal Ministero della Salute. Finora gli italiani colpiti sono quasi 3 milioni."
}

In [60]:
record_audio(10, "spanish_record.wav")

Recording...
0 / 10s
1 / 10s
2 / 10s
3 / 10s
4 / 10s
5 / 10s
6 / 10s
7 / 10s
8 / 10s
9 / 10s
Done
File saved at ./audio/spanish_record.wav


'./audio/spanish_record.wav'

In [61]:
spanish_file = open("./audio/spanish_record.wav", "rb")
spanish_response = openai.Audio.transcribe(
    model="whisper-1",
    file=spanish_file
)
spanish_response

<OpenAIObject at 0x28f86a19bb0> JSON: {
  "text": "Hola, me llamo Federico, vivo en Londres y esto es una prueba que estoy haciendo para un v\u00eddeo en Youtube."
}

In [63]:
options = dict(language = 'es', beam_size=5, best_of=5, verbose=True)
transcribe_options = dict(task="transcribe", **options)
translate_options = dict(task="translate", **options)

In [64]:
res = model.transcribe("./audio/spanish_record.wav", **transcribe_options)
res['text']

[00:00.000 --> 00:08.000]  Hola, me llamo Federico, vivo el Londres y esto es una prueba que estoy haciendo para un vídeo en Youtube


' Hola, me llamo Federico, vivo el Londres y esto es una prueba que estoy haciendo para un vídeo en Youtube'

In [65]:
res = model.transcribe("./audio/spanish_record.wav", **translate_options)
res['text']

[00:00.000 --> 00:08.000]  Hello, my name is Federico, I live in Londres, and this is a proof that I am doing for a video on YouTube.


' Hello, my name is Federico, I live in Londres, and this is a proof that I am doing for a video on YouTube.'