<a href="https://colab.research.google.com/github/ryam14/Portfolio_website/blob/main/whisper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from IPython.display import HTML
from base64 import b64decode

In [None]:
RECORD = """
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Audio Recorder</title>
</head>
<body>
  <h1>Audio Recorder</h1>
  <button id="startButton">Start</button>
  <button id="endButton">End</button>
  <p id="log"></p>

  <script>
    const sleep = time => new Promise(resolve => setTimeout(resolve, time));
    const b2text = blob => new Promise(resolve => {
      const reader = new FileReader();
      reader.onloadend = e => resolve(e.srcElement.result);
      reader.readAsDataURL(blob);
    });

    let recorder = null;
    let chunks = [];
    let stream = null;

    const record = () => new Promise((resolve, reject) => {
      const startRecording = async () => {
        try {
          stream = await navigator.mediaDevices.getUserMedia({ audio: true });
          recorder = new MediaRecorder(stream);
          chunks = [];

          recorder.ondataavailable = e => chunks.push(e.data);

          recorder.start();
          document.getElementById("log").innerText = "Recording started...";
        } catch (err) {
          console.error("Error accessing microphone: ", err);
          reject(err);
        }
      };

      const stopRecording = async () => {
        if (!recorder || recorder.state !== "recording") {
          document.getElementById("log").innerText = "No active recording to stop.";
          reject("No active recording");
          return;
        }

        recorder.onstop = async () => {
          const blob = new Blob(chunks);
          const text = await b2text(blob);
          console.log("Recording stopped");
          document.getElementById("log").innerText = "Recording stopped. Check console for Base64 audio data.";
          resolve(text);
          stream.getTracks().forEach(track => track.stop());
        };

        recorder.stop();
      };

      document.getElementById("startButton").addEventListener("click", startRecording);
      document.getElementById("endButton").addEventListener("click", stopRecording);
    });
  </script>
</body>
</html>
"""

def record():
  try:
    from google.colab import output
  except ImportError:
    print('No possible to import output from google.colab')
    return ''
  else:
    print('Recording')
    # Display HTML interface
    display(HTML(RECORD))

    # Use JavaScript to start and stop recording, and return Base64 audio data
    try:
      s = output.eval_js('record()')  # This will return Base64-encoded audio data
      fname = '/content/recorded_audio.wav'
      print('Saving to', fname)

      # Decode Base64 and save as a .wav file
      b = b64decode(s.split(',')[1])
      with open(fname, 'wb') as f:
        f.write(b)
      return fname
    except Exception as e:
      print(f"An error occurred: {e}")
      return ''

In [None]:
!pip install git+https://github.com/openai/whisper.git


Collecting git+https://github.com/openai/whisper.git
  Cloning https://github.com/openai/whisper.git to /tmp/pip-req-build-7dzg18p_
  Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-7dzg18p_
  Resolved https://github.com/openai/whisper.git to commit 90db0de1896c23cbfaf0c58bc2d30665f709f170
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [None]:
import whisper

In [None]:
model = whisper.load_model('medium')

100%|█████████████████████████████████████| 1.42G/1.42G [00:19<00:00, 77.5MiB/s]
  checkpoint = torch.load(fp, map_location=device)


In [None]:
record()

Recording


Saving to /content/recorded_audio.wav


'/content/recorded_audio.wav'

In [None]:
result = model.transcribe('/content/recorded_audio.wav', verbose=True)
print(result['text'])

with open("/content/transcription.txt", "a", encoding="utf-8") as txt:
    txt.write(result["text"])

Detecting language using up to the first 30 seconds. Use `--language` to specify the language




Detected language: Japanese
[00:00.000 --> 00:13.000] 視聴率はもしの結果、今でしょの名フレーズが、YOU CANの信号流行語大賞に選ばれてからすでに11年が経った。
視聴率はもしの結果、今でしょの名フレーズが、YOU CANの信号流行語大賞に選ばれてからすでに11年が経った。
