In [None]:
# 필요한 패키지 설치
!pip install flask flask-cors flask-sock openai-whisper pyngrok soundfile

Collecting flask-cors
  Downloading flask_cors-6.0.0-py3-none-any.whl.metadata (961 bytes)
Collecting flask-sock
  Downloading flask_sock-0.7.0-py3-none-any.whl.metadata (1.6 kB)
Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pyngrok
  Downloading pyngrok-7.2.8-py3-none-any.whl.metadata (10 kB)
Collecting simple-websocket>=0.5.1 (from flask-sock)
  Downloading simple_websocket-1.1.0-py3-none-any.whl.metadata (1.5 kB)
Collecting wsproto (from simple-websocket>=0.5.1->flask-sock)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->openai-whisper)
  Downloading nvidia_cuda_nvrtc_cu12-12

In [None]:
# ngrok 인증 설정
!ngrok authtoken 2wQlh1yhomkbcOTbhwOdNCzDx99_7jecQV8VibPuWQXnH9eVX  # 여기에 ngrok 인증 토큰을 입력하세요

# ngrok 설치 확인
!ngrok --version

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
ngrok version 3.22.1
pyngrok version 7.2.8


In [None]:
# 프로젝트 디렉토리 구조 생성
!mkdir -p ./www

In [None]:
%%writefile app.py
from flask import Flask, render_template
from flask_cors import CORS
from flask_sock import Sock
import whisper
import io

app = Flask(__name__,
    template_folder='./www',
    static_folder='./www',
    static_url_path='/'
)
CORS(app)  # 모든 도메인에서의 접근을 허용
sock = Sock(app)
model = whisper.load_model("base")

@app.route('/')
def index():
    return render_template('index.html')

@sock.route('/audio')
def handle_audio(ws):
    while True:
        data = ws.receive()
        if data is None:
            break

        audio_stream = io.BytesIO(data)
        audio_stream.seek(0)  # 스트림의 시작으로 이동

        try:
            # 오디오 데이터를 .wav 파일로 저장
            with open('received_audio.wav', 'wb') as f:
                f.write(audio_stream.read())

            # Whisper 모델에 .wav 파일을 전달하여 인식
            result = model.transcribe('received_audio.wav')
            ws.send(result['text'])
        except Exception as e:
            print(f'Error: {e}')
            ws.send('Error processing audio')

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=3000, debug=True)

Writing app.py


In [None]:
%%writefile www/index.html
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Audio Recorder</title>
    <script src="https://unpkg.com/react@17/umd/react.development.js"></script>
    <script src="https://unpkg.com/react-dom@17/umd/react-dom.development.js"></script>
    <script src="https://unpkg.com/@mui/material@5.0.0/umd/material-ui.development.js"></script>
    <script src="https://unpkg.com/@babel/standalone/babel.min.js"></script>
    <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700&display=swap" />
    <link rel="stylesheet" href="styles.css">
</head>
<body>
    <div id="root"></div>
    <script type="text/babel" src="app.js"></script>
</body>
</html>

Writing www/index.html


In [None]:
%%writefile www/styles.css
body {
  margin: 0;
  font-family: 'Roboto', sans-serif;
}

.container {
  max-width: 800px;
  margin: 0 auto;
  padding: 16px;
}

.chat-bubble {
  padding: 16px;
  margin-bottom: 8px;
  background-color: #f5f5f5;
  border-radius: 4px;
  word-break: break-word;
}

.transcription-container {
  max-height: 300px;
  overflow-y: auto;
  margin-top: 16px;
}

Writing www/styles.css


In [None]:
%%writefile www/app.js
const { useState, useRef, useEffect } = React;
const {
  AppBar, Toolbar, Typography, Button, Container, Box, Paper, TextField
} = MaterialUI;

function App() {
  // 현재 접속한 URL을 기반으로 WebSocket URL 자동 생성
  const getDefaultWebsocketUrl = () => {
    const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
    const host = window.location.host;
    return `${protocol}//${host}/audio`;
  };

  const [isRecording, setIsRecording] = useState(false);
  const [transcriptions, setTranscriptions] = useState([]);
  const [websocketUrl, setWebsocketUrl] = useState(getDefaultWebsocketUrl());
  const mediaRecorderRef = useRef(null);
  const audioChunksRef = useRef([]);
  const socketRef = useRef(null);

  const handleWebSocketUrlChange = (event) => {
    setWebsocketUrl(event.target.value);
  };

  const handleStartRecording = async () => {
    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
    mediaRecorderRef.current = new MediaRecorder(stream);

    mediaRecorderRef.current.ondataavailable = (event) => {
      audioChunksRef.current.push(event.data);
    };

    mediaRecorderRef.current.onstop = () => {
      sendAudioData();
    };

    mediaRecorderRef.current.start();
    setIsRecording(true);
  };

  const handleStopRecording = () => {
    if (mediaRecorderRef.current) {
      mediaRecorderRef.current.stop();
      setIsRecording(false);
    }
  };

  const sendAudioData = () => {
    const audioBlob = new Blob(audioChunksRef.current, { type: 'audio/wav' });
    const reader = new FileReader();
    reader.onloadend = () => {
      const audioArrayBuffer = reader.result;
      if (socketRef.current && socketRef.current.readyState === WebSocket.OPEN) {
        socketRef.current.send(audioArrayBuffer);
      }
      audioChunksRef.current = [];
    };
    reader.readAsArrayBuffer(audioBlob);
  };

  const setupWebSocket = () => {
    socketRef.current = new WebSocket(websocketUrl);

    socketRef.current.onopen = () => {
      console.log('WebSocket is connected.');
    };

    socketRef.current.onmessage = (event) => {
      setTranscriptions((prev) => [...prev, event.data]);
    };

    socketRef.current.onclose = (event) => {
      console.log('WebSocket is closed.', event);
    };

    socketRef.current.onerror = (error) => {
      console.log('WebSocket error:', error);
    };
  };

  useEffect(() => {
    setupWebSocket();
    return () => {
      if (socketRef.current) {
        socketRef.current.close();
      }
    };
  }, [websocketUrl]);

  return React.createElement(
    Container,
    null,
    React.createElement(
      AppBar,
      { position: 'static' },
      React.createElement(
        Toolbar,
        null,
        React.createElement(Typography, { variant: 'h6' }, "Audio Recorder")
      )
    ),
    React.createElement(
      Box,
      { mt: 2 },
      React.createElement(TextField, {
        label: "WebSocket URL",
        variant: "outlined",
        fullWidth: true,
        value: websocketUrl,
        onChange: handleWebSocketUrlChange,
        style: { marginBottom: 16 }
      }),
      React.createElement(
        Button,
        {
          variant: 'contained',
          color: 'primary',
          onClick: handleStartRecording,
          disabled: isRecording
        },
        "Start Recording"
      ),
      React.createElement(
        Button,
        {
          variant: 'contained',
          color: 'secondary',
          onClick: handleStopRecording,
          disabled: !isRecording,
          style: { marginLeft: 16 }
        },
        "Stop Recording"
      )
    ),
    React.createElement(
      'div',
      { className: 'transcription-container' },
      transcriptions.map((text, index) =>
        React.createElement(
          'div',
          {
            key: index,
            className: 'chat-bubble'
          },
          text
        )
      )
    )
  );
}

ReactDOM.render(
  React.createElement(App),
  document.getElementById('root')
);

Writing www/app.js


In [None]:
# Python 스크립트로 서버 백그라운드에서 실행
%%writefile run_server.py
import subprocess
import time
from pyngrok import ngrok

# Flask 서버 시작
server_process = subprocess.Popen(["python", "app.py"])
print("Flask 서버가 시작되었습니다.")

# ngrok 터널 생성
http_tunnel = ngrok.connect(3000)
print(f"ngrok 터널이 생성되었습니다: {http_tunnel.public_url}")

try:
    # 앱이 계속 실행되도록 대기
    while True:
        time.sleep(1)
except KeyboardInterrupt:
    # 종료 시 프로세스 정리
    server_process.terminate()
    ngrok.kill()

# 이 코드는 Colab이 계속 실행 중일 때만 작동합니다

Writing run_server.py


In [None]:
!python run_server.py

Flask 서버가 시작되었습니다.
ngrok 터널이 생성되었습니다: https://85c3-34-34-89-80.ngrok-free.app
100%|███████████████████████████████████████| 139M/139M [00:02<00:00, 64.7MiB/s]
 * Serving Flask app 'app'
 * Debug mode: on
 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:3000
 * Running on http://172.28.0.12:3000
[33mPress CTRL+C to quit[0m
 * Restarting with stat
 * Debugger is active!
 * Debugger PIN: 347-788-323
127.0.0.1 - - [27/May/2025 01:56:45] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [27/May/2025 01:56:45] "GET /styles.css HTTP/1.1" 200 -
127.0.0.1 - - [27/May/2025 01:56:46] "GET /app.js HTTP/1.1" 200 -
127.0.0.1 - - [27/May/2025 01:56:46] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
Traceback (most recent call last):
  File "/content/run_server.py", line 16, in <module>
    time.sleep(1)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/content/run_server.py", line 20, in <module>
    ngrok.kill()
 