In [None]:
!pip install sentence_transformers

Collecting sentence_transformers
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/86.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting transformers<5.0.0,>=4.6.0 (from sentence_transformers)
  Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m68.6 MB/s[0m eta [36m0:00:00[0m
Collecting sentencepiece (from sentence_transformers)
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m33.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub>=0.4.0 (from sentence_transformers)
  Downloading huggingface_

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 라이브러리 함수정의

In [29]:
import os
import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numpy import dot
from numpy.linalg import norm
import seaborn as sns
import matplotlib.pyplot as plt
import urllib.request

from sentence_transformers import SentenceTransformer
from imblearn.over_sampling import SMOTE
import librosa
import librosa.display
from IPython.display import Audio
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
import keras
from keras.callbacks import ReduceLROnPlateau
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization
from keras.utils import np_utils, to_categorical
from keras.callbacks import ModelCheckpoint
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

from keras.models import load_model
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
import warnings
import re
import pickle
import joblib
warnings.filterwarnings('ignore')
%matplotlib inline

# 전처리 함수 정의

In [None]:
def noise(data):
    noise_amp = 0.035*np.random.uniform()*np.amax(data)
    data = data + noise_amp*np.random.normal(size=data.shape[0])
    return data

def extract_features(data, sample_rate):
    # ZCR
    result = np.array([])
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
    result=np.hstack((result, zcr)) # stacking horizontally

    # Chroma_stft
    stft = np.abs(librosa.stft(data))
    chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
    result = np.hstack((result, chroma_stft)) # stacking horizontally

    # MFCC
    mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mfcc)) # stacking horizontally

    # Root Mean Square Value
    rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
    result = np.hstack((result, rms)) # stacking horizontally

    # MelSpectogram
    mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mel)) # stacking horizontally

    return result

def get_features(path):

    data, sample_rate = librosa.load(path, duration=2.5, offset=0.0)

    # without augmentation
    res1 = extract_features(data, sample_rate)
    result = np.array(res1)

    # data with noise
    noise_data = noise(data)
    res2 = extract_features(noise_data, sample_rate)
    result = np.concatenate((result, res2), axis = 0)

    return result

class text_embedding():
  def __init__(self, model_name):
    self.model_name = model_name

  def fit(self, new_sent, y=None):
        return self

  def transform(self, new_sent):
        embedding_model = SentenceTransformer(self.model_name)
        embedding_vec = embedding_model.encode(new_sent['sentence'])
        X_val = np.concatenate((new_sent.drop(['sentence'], axis = 1), embedding_vec), axis = 1)
        return X_val

## 문단 > 문장 단위로 나누기
def split_into_sentences(paragraph):
    sentences = re.split("(?<=[.!?])\s+", paragraph)
    return sentences

# 모델 함수

In [53]:
def classification_model(new_sentence, new_voice = 'file.wav'):
  output_dic = {0:'관점변화', 1:'부정', 2:'인정', 3:'존중', 4:'판단'}
  final_result = pd.DataFrame()
  new_sents = pd.DataFrame(split_into_sentences(new_sentence))


  # wav 파일 불러오기
  folder_path = '/content/drive/MyDrive'  # wav 폴더 경로
  new_wav = os.path.join(folder_path, new_voice) # wav 파일 전체 경로

  # 데이터 전처리 함수 불러오기
  txt_embed = text_embedding(model_name = 'jhgan/ko-sroberta-multitask')

  if os.path.isfile(new_wav) and len(new_sents)<3:  # wav파일 있고 2문장 이하면 voice+text 사용
    new_sent = pd.DataFrame([new_sentence])
    new_sent.columns = ['sentence']
    # extract voice feature vector
    new_voice = pd.DataFrame(get_features(new_wav)).transpose()
    new_df = pd.concat([new_voice, new_sent], axis=1)

    # 새로운 데이터 전처리
    X_test = txt_embed.transform(new_df) # extract text embedding vector

    # scaling text data
    scaler = joblib.load('/content/drive/MyDrive/230628_voice_scaler.pkl')
    x_test = scaler.transform(X_test)

    # 긍정 부정 분류 모델 불러옴, 긍부정 예측
    model1 = load_model('/content/drive/MyDrive/230628_voice_model1.h5')
    y_pred1 = model1.predict(x_test, verbose=0).round()

    pred1_df = pd.DataFrame(x_test)
    pred1_df['predict1'] = y_pred1

    pred_neg = pred1_df.loc[pred1_df['predict1'] == 1]
    pred_neg['predict'] = '부정'

    pred_pos = pred1_df.loc[pred1_df['predict1'] == 0]
    x_test2 = pred_pos.drop('predict1', axis=1)

    if len(x_test2) > 0:
      if os.path.isfile(new_wav): # wav 파일 있으면 voice feature 제거하고 text로만 2차분류함
          voice_cols = [x for x in range(324)]
          x_test2 = pd.DataFrame(x_test2).drop(voice_cols, axis=1) # delete voice feature vector

      # 2차 분류 모델 불러옴, 최종 분류
      model2 = load_model('/content/drive/MyDrive/230628_result_model2.h5')
      y_fin = np.argmax(model2.predict(x_test2, verbose=0), axis=1)

      pred_pos['predict'] = np.vectorize(output_dic.get)(y_fin)

    final_result['sentence'] = new_sent['sentence']
    final_result['predict'] = pd.concat([pred_neg, pred_pos]).sort_index()['predict']

  else: # wav 없으면 text만 사용
    new_sents.columns = ['sentence']

    # 새로운 데이터 전처리
    X_test = txt_embed.transform(new_sents) # extract text embedding vector

    scaler = joblib.load('/content/drive/MyDrive/230628_text_scaler.pkl')
    x_test = scaler.transform(X_test)

    # 긍정 부정 분류 모델 불러옴, 긍부정 예측
    model1 = load_model('/content/drive/MyDrive/230628_text_model1.h5')
    y_pred1 = model1.predict(x_test, verbose=0).round()

    pred1_df = pd.DataFrame(x_test)
    pred1_df['predict1'] = y_pred1

    pred_neg = pred1_df.loc[pred1_df['predict1'] == 1]
    pred_neg['predict'] = '부정'

    pred_pos = pred1_df.loc[pred1_df['predict1'] == 0]
    x_test2 = pred_pos.drop('predict1', axis=1)

    if len(x_test2) > 0:
      # 2차 분류 모델 불러옴, 최종 분류
      model2 = load_model('/content/drive/MyDrive/230628_result_model2.h5')
      y_fin = np.argmax(model2.predict(x_test2, verbose=0), axis=1)

      pred_pos['predict'] = np.vectorize(output_dic.get)(y_fin)

    final_result['sentence'] = new_sents['sentence']
    final_result['predict'] = pd.concat([pred_neg, pred_pos]).sort_index()['predict']

  return final_result

# 새로운 데이터에 대한 성능 확인

In [54]:
classification_model('이건 좀 아니지 않나요?. 이 부분은 다른 모델을 사용해서 해보세요. 그래도 잘 하고 있으니까 힘내시고.')

Unnamed: 0,sentence,predict
0,이건 좀 아니지 않나요?.,부정
1,이 부분은 다른 모델을 사용해서 해보세요.,판단
2,그래도 잘 하고 있으니까 힘내시고.,인정


In [15]:
classification_model('제 도움이 필요하면 언제든 요청하세요.')

Unnamed: 0,sentence,predict
0,제 도움이 필요하면 언제든 요청하세요.,존중


In [16]:
classification_model('그렇게 하지마세요.')

Unnamed: 0,sentence,predict
0,그렇게 하지마세요.,부정


In [17]:
classification_model('당신의 목표는 무엇입니까?', 'test_0.wav')

Unnamed: 0,sentence,predict
0,당신의 목표는 무엇입니까?,관점변화


In [18]:
classification_model('당신의 목표는 무엇입니까? 제가 이해하고 있는게 맞나요?', 'test_0.wav')

Unnamed: 0,sentence,predict
0,당신의 목표는 무엇입니까? 제가 이해하고 있는게 맞나요?,관점변화


In [19]:
classification_model('현재 성과향상에 대해 고민하고 있다 하셨죠? 제가 정확하게 이해하고 있나요? 제가 다르게 이해했다면 설명해주시겠어요?', 'test_0.wav')

Unnamed: 0,sentence,predict
0,현재 성과향상에 대해 고민하고 있다 하셨죠?,인정
1,제가 정확하게 이해하고 있나요?,부정
2,제가 다르게 이해했다면 설명해주시겠어요?,부정


In [20]:
classification_model('안녕하세요?')

Unnamed: 0,sentence,predict
0,안녕하세요?,존중


In [21]:
classification_model('오늘 기분은 어때요?')

Unnamed: 0,sentence,predict
0,오늘 기분은 어때요?,존중


In [22]:
classification_model('안녕하세요. 대화 시작할까요?')

Unnamed: 0,sentence,predict
0,안녕하세요.,존중
1,대화 시작할까요?,존중


In [27]:
classification_model('안녕하세요. 반갑습니다. 오늘 기분은 좀 어떠세요? 시작해볼까요. 본인이 생각하는 문제는 어떤건지 알려줄래요?')

Unnamed: 0,sentence,predict
0,안녕하세요.,존중
1,반갑습니다.,존중
2,오늘 기분은 좀 어떠세요?,존중
3,시작해볼까요.,존중
4,본인이 생각하는 문제는 어떤건지 알려줄래요?,관점변화


In [24]:
classification_model('오늘 기분은 어떠세요?')

Unnamed: 0,sentence,predict
0,오늘 기분은 어떠세요?,존중
