## VI 프로젝트 파형파트 augmentation 적용 패치입니다
 - 업데이트 : 2020.04.09


In [0]:
from google.colab import drive
drive.mount('gdrive') 

In [0]:
%cd '/content/gdrive/My Drive/kisti_final_project/Waveform/jjw'

In [0]:
!pip install soundfile

### 모델 import

In [0]:
import json
import librosa
import soundfile
import os, glob
import numpy as np
from sklearn.model_selection import train_test_split
import keras
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import *
from keras.optimizers import rmsprop
from keras.models import load_model

Using TensorFlow backend.


## Augmentation 적용하기 위한 클래스
 - 따로 빼서 .py 파일로 만들어도 된다.

In [0]:
import matplotlib.pyplot as plt

class AudioAugmentation:
    def read_audio_file(self, file_path): # 파일을 읽어옴
        input_length = 16000 # 최대 길이로 맞춰주기(int형으로)
        data = librosa.core.load(file_path)[0]
        #if len(data) > input_length:
         #   data = data[:input_length]
        #else:
        data = np.pad(data, (0, max(0, input_length - len(data))), "constant")
        return data

    def write_audio_file(self, file, data, sample_rate=16000): # augmentation 한 데이터들을 다른이름으로 저장해주는 코드
        librosa.output.write_wav(file, data, sample_rate)

    def plot_time_series(self, data): # 파형 데이터 확인용 필요하면 사용 - 안써도 무방함
        fig = plt.figure(figsize=(14, 8))
        plt.title('Raw wave ')
        plt.ylabel('Amplitude')
        plt.plot(np.linspace(0, 1, len(data)), data)
        plt.show()

    def add_noise(self, data): # 노이즈 추가버전(랜덤함수를 통해 할 때마다 달라지기 때문에 여러개로도 만들수 있다.)
        noise = np.random.randn(len(data))
        data_noise = data + 0.005 * noise
        return data_noise

    def shift(self, data): # 파형을 치우치게 만들어줌
        return np.roll(data, 1600)

    def stretch(self, data, rate=1):# 길이를 늘어뜨리거나 줄임
        input_length = 16000
        data = librosa.effects.time_stretch(data, rate)
        if len(data) > input_length:
            data = data[:input_length]
        else:
            data = np.pad(data, (0, max(0, input_length - len(data))), "constant")
        return data

In [0]:
def extract_feature(file_name,mfcc=True,chroma=True,mel=True):
    sf = soundfile.SoundFile(file_name)
    X = sf.read(dtype = 'float32')
    if X.ndim > 1:
        return np.array([])
    sr = sf.samplerate
    if chroma:
        stft = np.abs(librosa.stft(X))
    result = np.array([])
    if mfcc:
        mfccs = np.mean(librosa.feature.mfcc(y=X,sr=sr,n_mfcc=40).T,axis=0)
        result = np.hstack((result,mfccs))
    if chroma:
        chromas = np.mean(librosa.feature.chroma_stft(S=stft,sr=sr).T,axis=0)
        result = np.hstack((result,chromas))
    if mel:
        mels = np.mean(librosa.feature.melspectrogram(X,sr=sr).T,axis=0)
        result = np.hstack((result,mels))
    sf.close()
    return result

In [0]:
path = '파일을 불러올 장소'
aa = AudioAugmentation()
for f in glob.glob(path):
    fn = os.path.basename(f)
    data = aa.read_audio_file(f)
    data_noise = aa.add_noise(data)
    data_roll = aa.shift(data)
    data_stretch = aa.stretch(data, 0.8)
    # Write generated cat sounds
    aa.write_audio_file('파일 저장 위치'+fn[:-4]+'generated_noise.wav', data_noise)
    aa.write_audio_file('파일 저장 위치'+fn[:-4]+'generated_roll.wav', data_roll)
    aa.write_audio_file('파일 저장 위치'+fn[:-4]+'generated_stretch.wav', data_stretch)

### 아래부턴 변경사항 없음

In [0]:
#emotions = {'1':'negative','2':'neutral','3':'positive'}
emotions = {'1':'negative','2':'positive'}
x_data,y_data = [],[]
for f in glob.glob(path):
    fn = os.path.basename(f)
    emo = fn[2]
    #emo = emo_classification(emo)
    feature = extract_feature(f)
    x_data.append(feature)
    y_data.append(emo)
x_train,x_test,y_train,y_test= train_test_split(np.array(x_data), y_data, test_size=0.2, random_state=1229)

In [0]:
print('X_train : ',x_train.shape,'X_test : ',x_test.shape,'\n Y_train : ',len(y_train),'Y_test : ',len(y_test))

In [0]:
y_train_cat = to_categorical(np.array(y_train))
y_test_cat = to_categorical(np.array(y_test))

In [0]:
print('X_train : ',x_train.shape,' X_test : ',x_test.shape,'\n Y_train : ',len(y_train_cat),'  Y_test : ',len(y_test_cat))

In [0]:
def new_CNN_al():
    ### CNN model, referred to the model C in the report
    model = Sequential()
    model.add(Conv1D(8, 5,padding='same', input_shape=(180, 1)))
    model.add(Activation('relu'))
    model.add(Conv1D(16, 5,padding='same'))
    model.add(Activation('relu'))
    model.add(Dropout(0.1))
    model.add(MaxPooling1D(pool_size=(8)))
    model.add(Conv1D(32, 5,padding='same',))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))
    model.add(Conv1D(16, 5,padding='same',))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(2))
    model.add(Activation('softmax'))
    opt = keras.optimizers.Adam(lr=0.0001, decay=1e-6)
    model.compile(loss='binary_crossentropy', optimizer=opt,metrics=['accuracy'])
    return model

In [0]:
model_WCA = new_CNN_al()
wc_fit = model_WCA.fit(np.expand_dims(x_train,-1),y_train_cat,
                     validation_data=(np.expand_dims(x_test,-1), y_test_cat), 
                     epochs=350, shuffle=True)

In [0]:
model_WCA.summary()

In [0]:
s = model_WCA.evaluate(np.expand_dims(t2,-1),emo_check,batch_size=100)

In [0]:
print(s[1])