In [1]:
import sounddevice as sd
import numpy as np
import librosa
import tensorflow as tf
import time
from PyQt5 import QtCore, QtGui, QtWidgets
from PyQt5.QtWidgets import QApplication, QWidget
from PyQt5.QtGui import QIcon
import webrtcvad
import pyaudio
import wave
import noisereduce as nr
from faster_whisper import WhisperModel
import datetime
import os
import sys

model_path = os.path.join(os.getcwd(), 'my_model.h5')
model = tf.keras.models.load_model(model_path)

class_labels = ['air_conditioner', 'car_horn', 'children_playing', 'dog_bark', 'drilling',
                'engine_idling', 'gun_shot', 'jackhammer', 'siren', 'street_music']


def preprocess_audio(audio, sample_rate=22050):
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)

   
    if mfccs.shape[1] < 174: 
        mfccs = np.pad(mfccs, ((0, 0), (0, 174 - mfccs.shape[1])), mode='constant')
    
    return mfccs

class STTWorker(QtCore.QThread):
    text_update = QtCore.pyqtSignal(str)  
    stt_finished = QtCore.pyqtSignal(str) 

    def __init__(self, model_size="small"):
        super(STTWorker, self).__init__()
        self.vad = webrtcvad.Vad(3)
        self.model = WhisperModel(model_size, device="cpu")
        self.sample_rate = 16000
        self.frame_duration = 20
        self.frame_size = int(self.sample_rate * self.frame_duration / 1000)
        self.channels = 1
        self.running = False
        self.frames = []

        self.p = pyaudio.PyAudio()
        self.stream = self.p.open(format=pyaudio.paInt16, channels=self.channels,
                                  rate=self.sample_rate, input=True,
                                  frames_per_buffer=self.frame_size)

    def run(self):
        self.running = True
        self.frames = []
        while self.running:
            audio_frame = self.stream.read(self.frame_size)
            self.frames.append(audio_frame)

    def stop(self):
        self.running = False
        self.process_audio()

    def process_audio(self):
        audio_data = b''.join(self.frames)
        clean_segment = self.reduce_noise(audio_data)
        stt_result = self.transcribe_audio(clean_segment)
        for text_segment in stt_result:
            self.stt_finished.emit(text_segment.text)

    def reduce_noise(self, audio_data):
        audio_array = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32)
        reduced_noise = nr.reduce_noise(y=audio_array, sr=self.sample_rate)
        return reduced_noise.astype(np.int16).tobytes()

    def transcribe_audio(self, audio_data):
        audio_array = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
        segments, _ = self.model.transcribe(audio_array, language='ko')
        return segments

class SoundClassifierWorker(QtCore.QThread):
    sound_classified = QtCore.pyqtSignal(str) 

    def __init__(self, parent=None):
        super(SoundClassifierWorker, self).__init__(parent)
        self.running = True  

    def run(self):
        while self.running:
            self.classify_sound()  
            time.sleep(2)  

    def classify_sound(self):
       
        sample_rate = 22050
        duration = 2  

      
        audio = sd.rec(int(sample_rate * duration), samplerate=sample_rate, channels=1)
        sd.wait()  

      
        audio = audio.flatten()  
        mfccs = preprocess_audio(audio, sample_rate)

      
        mfccs = np.expand_dims(mfccs, axis=-1)  
        mfccs = np.expand_dims(mfccs, axis=0)   

      
        prediction = model.predict(mfccs)

     
        prediction_percentage = prediction[0] * 100  

   
        current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        
        result_text = ""

        
        for i, label in enumerate(class_labels):
            if prediction_percentage[i] > 50:  
                result_text += f"{label}: {prediction_percentage[i]:.2f}% ({current_time})\n"

        
        predicted_label = np.argmax(prediction)
        if prediction_percentage[predicted_label] < 70:  
             result_text += "No sound detected with more than 70% confidence.\n"
       

        
        self.sound_classified.emit(result_text)

    def stop(self):
        self.running = False  


class Ui_STT(object):
    def setupUi(self, STT):
        STT.setObjectName("STT")
        STT.resize(800, 600)  
        STT.setCursor(QtGui.QCursor(QtCore.Qt.ArrowCursor))

        
        self.textEdit = QtWidgets.QTextEdit(STT)
        self.textEdit.setGeometry(QtCore.QRect(20, 30, 760, 400))  
        self.textEdit.setObjectName("textEdit")
        
       
        self.pushButton = QtWidgets.QPushButton(STT)
        self.pushButton.setGeometry(QtCore.QRect(20, 450, 101, 61))
        self.pushButton.setObjectName("pushButton")
        
        self.stopButton = QtWidgets.QPushButton(STT)  
        self.stopButton.setGeometry(QtCore.QRect(20, 520, 101, 61))  
        self.stopButton.setObjectName("stopButton")
        
        self.pushButton_2 = QtWidgets.QPushButton(STT)
        self.pushButton_2.setGeometry(QtCore.QRect(140, 450, 101, 61))
        self.pushButton_2.setObjectName("pushButton_2")
        
        self.pushButton_3 = QtWidgets.QPushButton(STT)
        self.pushButton_3.setGeometry(QtCore.QRect(260, 450, 101, 61))
        self.pushButton_3.setObjectName("pushButton_3")

        self.pushButton_small = QtWidgets.QPushButton(STT)
        self.pushButton_small.setGeometry(QtCore.QRect(380, 450, 101, 61))
        self.pushButton_small.setObjectName("pushButton_small")
        
        self.pushButton_medium = QtWidgets.QPushButton(STT)
        self.pushButton_medium.setGeometry(QtCore.QRect(500, 450, 101, 61))
        self.pushButton_medium.setObjectName("pushButton_medium")

        self.newButton = QtWidgets.QPushButton(STT)  
        self.newButton.setGeometry(QtCore.QRect(620, 450, 101, 61))
        self.newButton.setObjectName("newButton")

        self.retranslateUi(STT)
        QtCore.QMetaObject.connectSlotsByName(STT)

       
        self.pushButton.clicked.connect(self.start_recording)
        self.pushButton_2.clicked.connect(self.stop_recording)
        self.pushButton_3.clicked.connect(self.clear_text)
        self.pushButton_small.clicked.connect(self.set_small_model)
        self.pushButton_medium.clicked.connect(self.set_medium_model)
        self.newButton.clicked.connect(self.start_classification)  
        self.stopButton.clicked.connect(self.stop_all)  

        self.classifier_worker = None 
        self.stt_worker = None  
        self.model_size = "small"  
        self.mode = None  

        
        STT.setStyleSheet("""
            QWidget {
                border: 2px solid gray;
                border-radius: 10px;
                padding: 5px;
            }
        """)

        
        self.pushButton.setStyleSheet("""
            QPushButton {
                color: red;
                border: 2px solid red;
                border-radius: 10px;
                background-color: transparent;
                padding: 10px;
            }
            QPushButton:hover {
                background-color: #ffcccc;
            }
        """)

       
        self.pushButton_2.setStyleSheet("""
            QPushButton {
                color: blue;
                border: 2px solid blue;
                border-radius: 10px;
                background-color: transparent;
                padding: 10px;
            }
            QPushButton:hover {
                background-color: #cceeff;
            }
        """)

       
        self.newButton.setStyleSheet("""
            QPushButton {
                color: orange;
                border: 2px solid orange;
                border-radius: 10px;
                background-color: transparent;
                padding: 10px;
            }
            QPushButton:hover {
                background-color: #ffcc99;
            }
        """)

       
        self.pushButton_3.setStyleSheet("""
            QPushButton {
                color: gray;
                border: 2px solid gray;
                border-radius: 10px;
                background-color: transparent;
                padding: 10px;
            }
            QPushButton:hover {
                background-color: #e0e0e0;
            }
        """)

        
        self.pushButton_small.setStyleSheet("""
            QPushButton {
                color: black;
                border: 2px solid black;
                border-radius: 10px;
                background-color: transparent;
                padding: 10px;
            }
            QPushButton:hover {
                background-color: #e0e0e0;
            }
        """)

       
        self.pushButton_medium.setStyleSheet("""
            QPushButton {
                color: black;
                border: 2px solid black;
                border-radius: 10px;
                background-color: transparent;
                padding: 10px;
            }
            QPushButton:hover {
                background-color: #e0e0e0;
            }
        """)

        
        self.stopButton.setStyleSheet("""
            QPushButton {
                color: black;
                border: 2px solid black;
                border-radius: 10px;
                background-color: transparent;
                padding: 10px;
            }
            QPushButton:hover {
                background-color: #e0e0e0;
            }
        """)
 
        icon_path = os.path.join(os.getcwd(), 'resources', 'free-icon-font-play-3917517.png')
        if os.path.exists(icon_path):
            STT.setWindowIcon(QtGui.QIcon(icon_path))
        else:
            print(f"아이콘 경로를 찾을 수 없습니다: {icon_path}")

    def retranslateUi(self, STT):
        _translate = QtCore.QCoreApplication.translate
        STT.setWindowTitle(_translate("STT", "PLAY"))
        self.pushButton.setText(_translate("STT", "녹음"))
        self.pushButton_2.setText(_translate("STT", "종료"))
        self.pushButton_3.setText(_translate("STT", "출력 초기화"))
        self.pushButton_small.setText(_translate("STT", "small"))
        self.pushButton_medium.setText(_translate("STT", "medium"))
        self.newButton.setText(_translate("STT", "소리 분석"))
        self.stopButton.setText(_translate("STT", "모든 작업 중지"))

    
    def start_classification(self):
        if self.mode == "STT": 
            self.stop_recording()

        if self.classifier_worker is None or not self.classifier_worker.isRunning():
            self.textEdit.append("소리 분석을 시작합니다...")
            self.classifier_worker = SoundClassifierWorker()
            self.classifier_worker.sound_classified.connect(self.update_text)
            self.classifier_worker.start()
            self.mode = "classification"  

    def start_recording(self):
        if self.mode == "classification":  
            self.stop_classification()

        if self.stt_worker is None or not self.stt_worker.isRunning():
            self.textEdit.append(f"녹음을 시작합니다... (모델: {self.model_size})")
            self.stt_worker = STTWorker(model_size=self.model_size)
            self.stt_worker.text_update.connect(self.update_text)
            self.stt_worker.stt_finished.connect(self.update_text)
            self.stt_worker.start()
            self.mode = "STT" 

    def stop_recording(self):
        if self.stt_worker is not None and self.stt_worker.isRunning():
            self.stt_worker.stop()
            self.stt_worker = None
            self.mode = None  

    def stop_classification(self):
        if self.classifier_worker is not None:
            self.classifier_worker.stop()
            self.classifier_worker = None
            self.textEdit.append("소리 분석이 중지되었습니다.")
            self.mode = None  

    def stop_all(self):
        if self.mode == "STT":
            self.stop_recording()
        elif self.mode == "classification":
            self.stop_classification()

    def clear_text(self):
        self.textEdit.clear()

    def set_small_model(self):
        self.model_size = "small"
        self.textEdit.append("모델이 'small'로 설정되었습니다.")

    def set_medium_model(self):
        self.model_size = "medium"
        self.textEdit.append("모델이 'medium'로 설정되었습니다.")

    def update_text(self, text):
        self.textEdit.append(f"{text}")

if __name__ == "__main__":
    import sys
    app = QtWidgets.QApplication(sys.argv)
    STT = QtWidgets.QWidget()
    ui = Ui_STT()
    ui.setupUi(STT)
    STT.show()
    sys.exit(app.exec_())

  from .autonotebook import tqdm as notebook_tqdm




SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


