In [None]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
import os
import glob
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GRU, Dense, BatchNormalization, Dropout
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.callbacks import EarlyStopping

# 데이터 경로 설정
# 데이터 경로 설정
positive_audio_folder = '../S207DataSet/true'  # 긍정 데이터 폴더 경로
negative_audio_folder = '../S207DataSet/false'  # 부정 데이터 폴더 경로

# 샘플링 레이트 설정
sr = 16000  # 팀원이 준비한 샘플링 레이트로 설정

# 오디오 파일을 스펙트로그램으로 변환하는 함수
def preprocess_audio(file_path, sr=16000, n_mels=128, hop_length=512):
    y, _ = librosa.load(file_path, sr=sr)
    spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, hop_length=hop_length)
    log_spectrogram = librosa.power_to_db(spectrogram, ref=np.max)
    return log_spectrogram.T  # 시간축을 앞으로 변환

# 스펙트로그램을 시각화하고 저장하는 함수
def save_spectrogram(spectrogram, save_path):
    plt.figure(figsize=(10, 4))
    plt.imshow(spectrogram, aspect='auto', origin='lower')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel-Spectrogram')
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

# 오디오 파일 리스트 가져오기
positive_audio_files = glob.glob(os.path.join(positive_audio_folder, '*.wav'))
negative_audio_files = glob.glob(os.path.join(negative_audio_folder, '*.wav'))

# 리스트가 비어 있는지 확인
print(f"Number of positive audio files: {len(positive_audio_files)}")
print(f"Number of negative audio files: {len(negative_audio_files)}")

In [None]:
import librosa
import numpy as np
from sklearn.model_selection import train_test_split

# 긍정 데이터에 대해 스펙트로그램 생성 및 저장
X = []
y = []
xidx = 0
sr = 16000
target_length = sr * 2

for audio_file in positive_audio_files:
    xidx += 1
    file_path = audio_file
    x, _ = librosa.load(file_path, sr=sr)
    x = librosa.util.fix_length(x, size=target_length)
    X.append(x)
    y.append(1)  # 긍정 레이블
    if (xidx % 100 == 0): print(f'xidx: {xidx}')

# 부정 데이터에 대해 스펙트로그램 생성 및 저장
yidx = 0
for audio_file in negative_audio_files:
    yidx += 1
    file_path = audio_file
    yData, _ = librosa.load(file_path, sr=sr)
    yData = librosa.util.fix_length(yData, size=target_length)
    X.append(yData)
    y.append(0)  # 부정 레이블
    if (yidx % 100 == 0): print(f'yidx: {yidx}')
    
print("데이터 준비 완료!")

# 데이터 배열 변환 및 학습/검증 분할
X = np.array(X)
y = np.array(y)

# 전체 데이터에서 train과 나머지(나머지 = validation + test)를 6:4로 나누기
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)

# 나머지 데이터를 다시 2:2로 나누어 validation과 test로 나누기
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print(X_train.shape)
print(X_val.shape)

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Layer, Attention, Activation, Conv1D, BatchNormalization, Dropout, GRU, Dense, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix

# Mel-spectrogram 변환을 위한 사용자 정의 레이어
class MelSpectrogramLayer(Layer):
    def __init__(self, sample_rate=16000, n_mels=128, hop_length=512, n_fft=1024, **kwargs):
        super(MelSpectrogramLayer, self).__init__(**kwargs)
        self.sample_rate = sample_rate
        self.n_mels = n_mels
        self.hop_length = hop_length
        self.n_fft = n_fft

    def call(self, inputs):
        stfts = tf.signal.stft(inputs, frame_length=self.n_fft, frame_step=self.hop_length)
        spectrogram = tf.abs(stfts) ** 2

        # Mel 필터 적용
        mel_filterbank = tf.signal.linear_to_mel_weight_matrix(
            num_mel_bins=self.n_mels,
            num_spectrogram_bins=spectrogram.shape[-1],
            sample_rate=self.sample_rate,
            lower_edge_hertz=0,
            upper_edge_hertz=self.sample_rate / 2
        )
        mel_spectrogram = tf.matmul(spectrogram, mel_filterbank)
        
        # 로그 스케일 변환
        log_mel_spectrogram = tf.math.log(mel_spectrogram + 1e-6)
        return log_mel_spectrogram

# Mel-spectrogram 레이어를 포함한 GRU 모델
def build_gru_model_with_mel(input_shape=(32000,)):
    X_input = Input(shape=input_shape)
    
    # MelSpectrogram 레이어 추가
    mel_spectrogram = MelSpectrogramLayer()(X_input)
    
    X = Conv1D(32, kernel_size=3, strides=1, padding='same', activation='relu')(mel_spectrogram)
    X = BatchNormalization()(X)
    X = Dropout(0.3)(X)
    X = GRU(64, return_sequences=True)(X)
    X = BatchNormalization()(X)
    X = Dropout(0.4)(X)
    X = GRU(64)(X)
    X = BatchNormalization()(X)
    X = Dropout(0.4)(X)
    X = Dense(1, activation='sigmoid')(X)
    
    model = Model(inputs=X_input, outputs=X)
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model



In [None]:
# 모델 학습

for bs in [16, 32, 64]:
    input_shape = (X_train.shape[1])  # 입력 형상 설정
    model = build_gru_model_with_mel(input_shape)
    model.summary()
    
    # EarlyStopping 콜백 추가
    early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
    
    history = model.fit(X_train, y_train, epochs=500, batch_size=bs, validation_data=(X_val, y_val), callbacks=[early_stopping])
    
    # 학습 및 검증 손실 시각화
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()
    plt.show()
    
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.show()
    
    print("모델 학습 완료!")
    
    # 모델 저장
    model.save(f"trigger_word_detection_model_largeDataSet_MelIntoLayer_sr16000_B{bs}_lr5e-4_pat20")
    print("모델이 저장되었습니다.")

    # 테스트 데이터에 대한 예측 및 평가
    predictions = model.predict(X_test)
    threshold = 0.5  # 임계값 설정predicted_labels = (predictions > threshold).astype(int).flatten()
    
    # 성능 평가
    from sklearn.metrics import classification_report
    print(classification_report(y_test, predicted_labels))
    print("모델 평가 완료!")


In [None]:
import tensorflow as tf

# Keras 모델을 불러오기
# model = tf.keras.models.load_model("trigger_word_detection_model_MelIntoLayer_sr32000_B32_lr5e-5_pat30.h5")
model = tf.keras.models.load_model("trigger_word_detection_model_largeDataSet_MelIntoLayer_sr16000_B64_lr5e-4_pat20", custom_objects={'MelSpectrogramLayer': MelSpectrogramLayer})

# TFLite 변환기 설정
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS, 
    tf.lite.OpsSet.SELECT_TF_OPS  # Flex delegate 사용
]

# 변환 수행
tflite_model = converter.convert()

# TFLite 모델 저장
with open("trigger_word_detection_model_largeDataSet_MelIntoLayer_sr16000_B64_lr5e-4_pat20.tflite", "wb") as f:
    f.write(tflite_model)
