In [1]:
### 라이브러리 import 
import matplotlib.pyplot as plt
import librosa.display
import librosa
import numpy as np
import glob
import os
import multiprocessing
import pandas as pd
import tensorflow as tf
import numpy as np
import parselmouth


from os.path import join
from tensorflow import keras

from keras.models import Sequential
from keras.layers import LSTM, Conv1D, Conv2D, MaxPooling1D, MaxPooling2D, Bidirectional, Flatten, GRU, Dense, Dropout, GlobalAveragePooling1D, BatchNormalization
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout, ReLU, Softmax




# 우울증 탐지 함수

In [2]:
# 데이터 전처리

def split_audio(filename, chunk_length=5):
    # Load the audio file
    y, sr = librosa.load(filename, sr=44100)

    # Calculate the number of 1-minute chunks
    total_length = librosa.get_duration(y=y, sr=sr)
    num_chunks = int(total_length / chunk_length)

    # Split the audio
    audio_chunks = []
    for i in range(num_chunks):
        start = i * chunk_length * sr
        end = (i+1) * chunk_length * sr
        audio_chunk = y[start:end]
        audio_chunks.append(audio_chunk)

    # If there are any leftovers, pad and add them as well
    if total_length > chunk_length * num_chunks:
        start = num_chunks * chunk_length * sr
        audio_chunk = np.pad(y[start:], (0, start + chunk_length * sr - len(y)))
        audio_chunks.append(audio_chunk)

    return audio_chunks


In [3]:
# 특징 추출
def extract_features(audio_file):
    sr = 44100
    # Preemphasis
    y_pre = librosa.effects.preemphasis(audio_file, coef=0.97)

    # MFCC and Mel Spectrogram parameters
    n_fft = 1024
    hop_length = 256
    win_length = 512
    window = 'hamming'
    n_mels = 128
    n_mfcc = 64

    # Extract MFCC features
    mfccs = librosa.feature.mfcc(y=y_pre, win_length = win_length , sr=sr, n_mfcc=n_mfcc, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length, window=window)

    # Extract Mel Spectrogram features
    mel_spectrogram = librosa.feature.melspectrogram(y=y_pre, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels, window=window, win_length=win_length)
    mel_spectrogram_db = librosa.power_to_db(mel_spectrogram)

    # Load audio file with Parselmouth
    sound = parselmouth.Sound(audio_file)

    # Extract pitch
    pitch = sound.to_pitch(time_step=hop_length/sr)
    pitch_values = pitch.selected_array['frequency']

    # Extract intensity
    intensity = sound.to_intensity(time_step=hop_length/sr)
    intensity_values = intensity.values[0]

    # Interpolate pitch and intensity to match the length of MFCCs
    pitch_interp = np.interp(np.linspace(0, len(pitch_values), mfccs.shape[1]), np.arange(len(pitch_values)), pitch_values)
    intensity_interp = np.interp(np.linspace(0, len(intensity_values), mfccs.shape[1]), np.arange(len(intensity_values)), intensity_values)

    # Normalize MFCCs, pitch, intensity and Mel Spectrogram
    mfccs = (mfccs - np.min(mfccs)) / (np.max(mfccs) - np.min(mfccs))
    mel_spectrogram_db = (mel_spectrogram_db - np.min(mel_spectrogram_db)) / (np.max(mel_spectrogram_db) - np.min(mel_spectrogram_db))

    if not np.isnan(pitch_interp).all() and np.nanmin(pitch_interp) != np.nanmax(pitch_interp):
        pitch_interp = (pitch_interp - np.nanmin(pitch_interp)) / (np.nanmax(pitch_interp) - np.nanmin(pitch_interp))
    else:
        pitch_interp = np.zeros_like(pitch_interp)

    intensity_interp = (intensity_interp - np.min(intensity_interp)) / (np.max(intensity_interp) - np.min(intensity_interp))

    # Stack MFCCs, Mel Spectrogram, pitch and intensity features
    features = np.vstack([mfccs, mel_spectrogram_db, pitch_interp, intensity_interp])

    return features

In [4]:
model = Sequential()

# 1. conv block
model.add(Conv2D(16, (3,3), padding='same', activation='relu', input_shape=(194, 862, 1)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.3))

# 2. conv block
model.add(Conv2D(32, (3,3), padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.3))

# 3. conv block
model.add(Conv2D(64, (3,3), padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.3))

# 4. conv block
model.add(Conv2D(128, (3,3), padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.3))

# flatten the output of the conv block
model.add(Flatten())

# output layer
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [5]:
# model_53.h5	0.703781903	0.910915911	1.434558749	0.830043495	0.080872416

model.load_weights("C:\pknu_6\model_w2_weight/model_53.h5")

In [6]:
def audio_depress(user_audio):
    features = [] 
    audio_chunks = split_audio(user_audio)
    for i in range(len(audio_chunks)):
        features.append(extract_features(audio_chunks[i]))
    
    prediction_sum = 0
    for i in range(len(features)):
        # Reshape the feature to match the input shape that model expects
        # 모델이 받아들일 수 있는 형태로 차원을 변경
        feature = np.expand_dims(features[i], axis=0)  # Add a dimension for batch size
        prediction = model.predict(feature)
        prediction_sum += prediction
        
        
                     
    predicted_class  = ((prediction_sum/len(features)) > 0.5).astype(int)
    sigmoid_value = (prediction_sum/len(features))

    dep_dict = { 0:'비우울', 1:'우울'}
    
    
    return (dep_dict[int(predicted_class)], sigmoid_value)

In [7]:
# user_audio = 'D:\pknu_6\daic-woz!\interim\P331\P331_no_silence.wav'

In [8]:
# audio_depress(user_audio)

# 감정 탐지 함수

In [9]:
# 데이터 전처리

def split_audio_35(filename, chunk_length=3):
    # Load the audio file
    y, sr = librosa.load(filename, sr=22050)

    # Calculate the number of 1-minute chunks
    total_length = librosa.get_duration(y=y, sr=sr)
    num_chunks = int(total_length / chunk_length)

    # Split the audio
    audio_chunks = []
    for i in range(num_chunks):
        start = i * chunk_length * sr
        end = (i+1) * chunk_length * sr
        audio_chunk = y[start:end]
        audio_chunks.append(audio_chunk)

    # If there are any leftovers, pad and add them as well
    if total_length > chunk_length * num_chunks:
        start = num_chunks * chunk_length * sr
        audio_chunk = np.pad(y[start:], (0, start + chunk_length * sr - len(y)))
        audio_chunks.append(audio_chunk)

    return audio_chunks

### 정규화
def nor(audio_np):
    normed_wav = audio_np / max(np.abs(audio_np))
    return normed_wav

def extract_mfccs(audio_file):
    
    y_pre = librosa.effects.preemphasis(audio_file, coef=0.97)
    
    n_fft = 1024
    hop_length = 256
    win_length = 512
    window = 'hamming'
    n_mels = 128
    n_mfcc = 64

    mfccs = librosa.feature.mfcc(y=y_pre,win_length = win_length , sr=22050, n_mfcc=n_mfcc, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length, window=window)
    return mfccs

                     
                     

In [10]:
def preprocess_audio_file(file_path):
    audio_nor = nor(file_path)
    mfcc = extract_mfccs(audio_nor)
    audio_seq = pad_sequences(mfcc, padding='post' , truncating='post', maxlen=300, dtype='float32')
    return audio_seq


In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout, ReLU, Softmax

model1 = Sequential()

# 1. conv block
model1.add(Conv2D(16, (3,3), padding='same', activation='relu', input_shape=(64, 300, 1)))
model1.add(BatchNormalization())
model1.add(MaxPooling2D(pool_size=(2, 2)))
model1.add(Dropout(0.3))

# 2. conv block
model1.add(Conv2D(32, (3,3), padding='same', activation='relu'))
model1.add(BatchNormalization())
model1.add(MaxPooling2D(pool_size=(2, 2)))
model1.add(Dropout(0.3))

# 3. conv block
model1.add(Conv2D(64, (3,3), padding='same', activation='relu'))
model1.add(BatchNormalization())
model1.add(MaxPooling2D(pool_size=(2, 2)))
model1.add(Dropout(0.3))

# 4. conv block
model1.add(Conv2D(128, (3,3), padding='same', activation='relu'))
model1.add(BatchNormalization())
model1.add(MaxPooling2D(pool_size=(2, 2)))
model1.add(Dropout(0.3))

# flatten the output of the conv block
model1.add(Flatten())

# output layer
model1.add(Dense(7, activation='softmax'))

model1.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [12]:
model1.load_weights("C:/pknu_6/model5_weight/model_15.h5")

In [13]:
def audio_emo(user_audio):
    emotions = ['기쁨', '슬픔', '분노', '불안', '상처', '당황', '중립'] # 이 부분은 당신의 모델에 따라 다를 수 있음
    
    features = [] 
    audio_chunks = split_audio_35(user_audio)
    for i in range(len(audio_chunks)):
        features.append(preprocess_audio_file(audio_chunks[i]))
    
    predictions = []
    for i in range(len(features)):
        # Reshape the feature to match the input shape that model expects
        feature = np.expand_dims(features[i], axis=0)  # Add a dimension for batch size
        prediction = model1.predict(feature)
        predictions.append(prediction)
    
    # Convert list of predictions to numpy array
    predictions = np.array(predictions)

    # Calculate average prediction
    average_prediction = np.mean(predictions, axis=0)

    # Find the emotion with the highest average prediction
    max_index = np.argmax(average_prediction)
    dominant_emotion = emotions[max_index]

    return average_prediction, dominant_emotion


In [14]:
# {'기쁨': 0, '슬픔': 1, '분노': 2, '불안': 3, '상처': 4, '당황': 5, '중립': 6}

In [15]:
# from flask import Flask, jsonify, request
# from flask_ngrok import run_with_ngrok
# import requests
# from pyngrok import ngrok

# app = Flask(__name__)
# app.config['JSONIFY_PRETTYPRINT_REGULAR'] = False
# ngrok.kill()
# run_with_ngrok(app)  # Start ngrok when app is run

# @app.route('/', methods=['POST'])
# def get_depress():
#     data = request.get_json()  # POST 요청으로 전달된 JSON 데이터를 가져옵니다.

#     user_input = data['user_input']  # 'text'는 POST 요청에서 전달된 텍스트 필드의 키입니다.

#     depress_list = audio_depress(user_input)   # ['일상', '우울', '피로', '무기력', '식욕저하', '우울'] 와 같은 리스트 형식
    
#     return jsonify({'depress': depress_list})  # 감정(emotions)을 JSON 형태로 반환합니다.

# if __name__ == '__main__':
#     app.run()  

# import threading
# threading.Thread(target=app.run, kwargs={'host':'0.0.0.0','port':80}).start() 

In [16]:
# from flask import Flask, jsonify, request
# from pyngrok import ngrok
# import requests

# app = Flask(__name__)
# app.config['JSONIFY_PRETTYPRINT_REGULAR'] = False
# ngrok.kill()


# @app.route('/', methods=['POST'])
# def get_depress():
#     data = request.get_json()  # POST 요청으로 전달된 JSON 데이터를 가져옵니다.

#     user_input = data['user_input']  # 'text'는 POST 요청에서 전달된 텍스트 필드의 키입니다.

#     depress_list = audio_depress(file_path)   # ['일상', '우울', '피로', '무기력', '식욕저하', '우울'] 와 같은 리스트 형식
    
#     return jsonify({'depress': depress_list})  # 감정(emotions)을 JSON 형태로 반환합니다.

# def start_ngrok():
#     ngrok_tunnel_url = ngrok.connect(80)
#     print('Public URL:', ngrok_tunnel_url)

# if __name__ == '__main__':
#     start_ngrok()
#     app.run(port=80)  

In [17]:
# from flask import Flask, jsonify, request
# from pyngrok import ngrok
# from datetime import datetime
# import os

# app = Flask(__name__)
# app.config['JSONIFY_PRETTYPRINT_REGULAR'] = False
# ngrok.kill()


# def generate_unique_filename():
#     current_date = datetime.now().strftime('%Y%m%d')
#     timestamp = datetime.now().strftime('%H%M%S')
#     return f'file_{current_date}_{timestamp}.wav'

# @app.route('/', methods=['POST'])
# def upload():
#     if 'file' not in request.files:
#         return jsonify({'error': 'No audio file found'}), 400

#     file = request.files['file']

#     # 저장할 파일 경로
#     file_name = generate_unique_filename()
#     file_path = os.path.join('./audiofiles', file_name)

#     # 파일 저장
#     file.save(file_path)

#     # 오디오 파일 처리 후 결과 반환
#     depress_list, sigmoid_value = audio_depress(file_path)
    
#     average_prediction, dominant_emotion = audio_emo(file_path)
    
    
#     return jsonify({'depress': depress_list,
#                    'sigmoid_value':sigmoid_value.tolist(),
#                     'emotion': dominant_emotion,
#                     'softmax_value':average_prediction.tolist()
#                    })

# def start_ngrok():
#     ngrok_tunnel_url = ngrok.connect(80)
#     print('Public URL:', ngrok_tunnel_url)

# if __name__ == '__main__':
#     start_ngrok()
#     app.run(port=80)


In [None]:
from flask import Flask, jsonify, request
from pyngrok import ngrok
from datetime import datetime
from flask import make_response
import os

app = Flask(__name__)
app.config['JSONIFY_PRETTYPRINT_REGULAR'] = False

ngrok.kill()


def generate_unique_filename():
    current_date = datetime.now().strftime('%Y%m%d')
    timestamp = datetime.now().strftime('%H%M%S')
    return f'file_{current_date}_{timestamp}.wav'

@app.route('/', methods=['GET','POST'])
def upload():
    
    if request.method == 'POST':
    
        if 'file' not in request.files:
            return jsonify({'error': 'No audio file found'}), 400

        file = request.files['file']

        # 저장할 파일 경로
        file_name = generate_unique_filename()
        file_path = os.path.join('./audiofiles', file_name)

        # 파일 저장
        file.save(file_path)

        # 오디오 파일 처리 후 결과 반환
        depress_list, sigmoid_value = audio_depress(file_path)

        average_prediction, dominant_emotion = audio_emo(file_path)


        return jsonify({'depress': depress_list,
                       'sigmoid_value':sigmoid_value.tolist(),
                        'emotion': dominant_emotion,
                        'softmax_value':average_prediction.tolist()
                       })
    
    else:
        response = make_response('This is a GET request.')
        response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
        return response

def start_ngrok():
    ngrok_tunnel_url = ngrok.connect(80)
    print('Public URL:', ngrok_tunnel_url)

if __name__ == '__main__':
    start_ngrok()
    app.run(port=80)


Public URL: http://257e-175-214-183-100.ngrok-free.app
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:80
Press CTRL+C to quit
127.0.0.1 - - [20/Jun/2023 09:13:13] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [20/Jun/2023 09:13:13] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [20/Jun/2023 09:13:15] "GET /favicon.ico HTTP/1.1" 404 -
127.0.0.1 - - [20/Jun/2023 09:13:16] "GET /favicon.ico HTTP/1.1" 404 -




127.0.0.1 - - [20/Jun/2023 09:18:04] "POST / HTTP/1.1" 200 -




127.0.0.1 - - [20/Jun/2023 09:22:16] "POST / HTTP/1.1" 200 -




127.0.0.1 - - [20/Jun/2023 09:23:14] "POST / HTTP/1.1" 200 -
127.0.0.1 - - [20/Jun/2023 10:18:02] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [20/Jun/2023 10:18:03] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [20/Jun/2023 11:26:38] "GET / HTTP/1.1" 200 -
t=2023-06-21T08:38:21+0900 lvl=eror msg="heartbeat timeout, terminating session" obj=csess id=95aa9b3cc471 clientid=67e86907d9155d4c7281908a22b81f66

t=2023-06-21T08:38:21+0900 lvl=eror msg="session closed, starting reconnect loop" obj=csess id=cbdf611dfbab err="session closed"

t=2023-06-21T08:38:21+0900 lvl=eror msg="failed to reconnect session" obj=csess id=cbdf611dfbab err="dial tcp: lookup tunnel.us.ngrok.com: no such host"

t=2023-06-21T08:38:21+0900 lvl=warn msg="failed to check for update" obj=updater err="Post \"https://update.equinox.io/check\": dial tcp: lookup update.equinox.io: no such host"

t=2023-06-21T08:38:22+0900 lvl=eror msg="failed to reconnect session" obj=csess id=cbdf611dfbab err="dial tcp: lookup tunnel.us.ngrok.com: no such