prep.ipynbで作成した入力データを用いて実際に学習させる

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import wave
import pickle
import tensorflow as tf
import pickle
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

rng = np.random.RandomState(1234)
random_state = 42

%matplotlib inline

In [3]:
###########################################
# 説明変数用
###########################################

# 短時間フーリエ変換
def stft_core(x, win, step,IMG_SIZE):
    l = len(x) # 入力信号の長さ
    N = len(win) # 窓幅
    M = int(np.ceil(float(l - N + step) / step)) # スペクトログラムの時間フレーム数
    
    new_x = np.zeros(N + ((M - 1) * int(step)), dtype = float)
    new_x[: l] = x # 信号をいい感じの長さにする
    
    X = np.zeros([M, N], dtype = complex) # スペクトログラムの初期化(複素数型)
    X2 = np.zeros([(M-M%IMG_SIZE+IMG_SIZE), N], dtype = complex) # あとで使う
    
    for m in range(M):
        start = int(step * m)
        X[m, :] = np.fft.fft(new_x[start : start + N] * win)
        X2[m, :] = np.fft.fft(new_x[start : start + N] * win)
    return X,X2


def stft(filename,IMG_SIZE):
    # file read
    wf = wave.open(filename , "r" )
    fs = wf.getframerate()  # サンプリング周波数
    g = wf.readframes(wf.getnframes())
    wf.close()

    # -1～1に正規化
    g = np.frombuffer(g, dtype= "int16")/32768.0    

    fftLen = (IMG_SIZE-1)*2
    win = np.hamming(fftLen) # ハミング窓
    step = fftLen / 2                 

    spectrogram,spectrogram2 = stft_core(g, win, step,IMG_SIZE)

    return spectrogram2, fftLen

# 説明変数計算
def calc_data(spectrogram2, fftLen,IMG_SIZE):
    #"""
    def min_max_normalixstion(x):
        x_min = x.min()
        x_max = x.max()
        x_norm = ((x-x_min)/(x_max - x_min)*255).astype("uint8")
        return x_norm
    """
    def min_max_normalixstion(x):
        return x
    """

    data_tmp = min_max_normalixstion(abs(spectrogram2[:, : int(fftLen / 2 + 1)]))
    data = data_tmp.reshape(int((len(data_tmp)*IMG_SIZE)/(IMG_SIZE*IMG_SIZE)),IMG_SIZE,IMG_SIZE)
    
    data = data.reshape(data.shape[0],data.shape[1],data.shape[2],1)
    return data

In [4]:
###########################################
# 目的変数用
###########################################

# 目的変数計算
def calc_target(ydata_tmp,fftLen,data):
    ydata_ft = []
    for i in range(len(ydata_tmp)):
        ydata_ft.append(int(ydata_tmp[i]/fftLen*2))
     
    target = np.zeros([int(len(data)),2], dtype = "uint8")
    target[:,0]=1
    
    for i in range(len(ydata_ft)):
        tmp = int(ydata_ft[i]/256)
        target[tmp][0]=0
        target[tmp][1]=1
    
    return target


In [5]:
###########################################
# CNN用
###########################################

# 畳み込み
class Conv:
    def __init__(self, filter_shape, function=lambda x: x, strides=[1,1,1,1], padding='VALID'):
        # Xavier Initialization
        fan_in = np.prod(filter_shape[:3]) #配列の最初の３つの積
        fan_out = np.prod(filter_shape[:2]) * filter_shape[3]
        self.W = tf.Variable(rng.uniform(
                        low=-np.sqrt(6/(fan_in + fan_out)),
                        high=np.sqrt(6/(fan_in + fan_out)),
                        size=filter_shape
                    ).astype('float32'), name='W')
        self.b = tf.Variable(np.zeros((filter_shape[3]), dtype='float32'), name='b') 
        self.function = function
        self.strides = strides
        self.padding = padding

    def f_prop(self, x):
        u = tf.nn.conv2d(x, self.W, strides=self.strides, padding=self.padding) + self.b
        return self.function(u)
    
# プーリング
class Pooling:
    def __init__(self, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID'):
        self.ksize = ksize
        self.strides = strides
        self.padding = padding
    
    def f_prop(self, x):
        return tf.nn.max_pool(x, ksize=self.ksize, strides=self.strides, padding=self.padding)

# 平滑化
class Flatten:
    def f_prop(self, x):
        return tf.reshape(x, (-1, np.prod(x.get_shape().as_list()[1:])))

# 全結合
class Dense:
    def __init__(self, in_dim, out_dim, function=lambda x: x):
        # Xavier Initialization
        self.W = tf.Variable(rng.uniform(
                        low=-np.sqrt(6/(in_dim + out_dim)),
                        high=np.sqrt(6/(in_dim + out_dim)),
                        size=(in_dim, out_dim)
                    ).astype('float32'), name='W')
        self.b = tf.Variable(np.zeros([out_dim]).astype('float32'))
        self.function = function

    def f_prop(self, x):
        return self.function(tf.matmul(x, self.W) + self.b)
    
    
def cnn(n_epochs, batch_size, n_batches, train_x, valid_x, train_y, valid_y,IMG_SIZE):

    layers = [                            
        Conv((5, 5, 1, 20), tf.nn.relu),  
        Pooling((1, 2, 2, 1)),            
        Conv((5, 5, 20, 50), tf.nn.relu), 
        Pooling((1, 2, 2, 1)),            

        Flatten(),
        Dense(int(((IMG_SIZE-4)/2-4)/2)*int(((IMG_SIZE-4)/2-4)/2)*50, 2, tf.nn.softmax)
    ]

    x = tf.placeholder(tf.float32, [None, IMG_SIZE, IMG_SIZE, 1])
    t = tf.placeholder(tf.float32, [None, 2])

    def f_props(layers, x):
        for layer in layers:
            x = layer.f_prop(x)
        return x

    y = f_props(layers, x)

    cost = -tf.reduce_mean(tf.reduce_sum(t * tf.log(tf.clip_by_value(y, 1e-10, 1.0)), axis=1)) # tf.log(0)によるnanを防ぐ
    train = tf.train.GradientDescentOptimizer(0.01).minimize(cost)
    valid = tf.argmax(y, 1)

    
    
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        for epoch in range(n_epochs):
            train_x, train_y = shuffle(train_x, train_y, random_state=random_state)
            for i in range(n_batches):
                start = i * batch_size
                end = start + batch_size
                sess.run(train, feed_dict={x: train_x[start:end], t: train_y[start:end]})

            pred_y, valid_cost = sess.run([valid, cost], feed_dict={x: valid_x, t: valid_y})
            print('EPOCH:: %i, Validation cost: %.3f, Validation F1: %.3f' % (epoch + 1, valid_cost, f1_score(np.argmax(valid_y, 1).astype('int32'), pred_y, average='macro')))
            val = f1_score(np.argmax(valid_y, 1).astype('int32'), pred_y, average='macro')
            
    return val

In [6]:
###########################################
# ベイズ最適化で調べる関数
###########################################
def func(IMG_SIZE, n_epochs, batch_size):
    
    #入力ファイル
    filename = "sound1.wav"
    #上記入力ファイルを入れたときの求める音が発生した位置
    ydata_tmp = [228438,262174,320607,355446,648711,677596,726768,780349,802840,846279,1189377,1212529,1253101,1286397,1315282,
    1677564,1712623,1762456,1822212,1853743,1884834,1919673,2210953,2250864,2291877,2344797,2377210,2709283,2754706,2802775,
    2841583,2871792,2910379,2933311,2974324,3008061,3068478,3096261,3143889,3176082,3243114,3285891,3633399,3664269,3728434,
    4116514,4151353,4208463,4510107,4540536,4599850,4677687,4709439,4763461,5077894,5111851,5162125,5207548,5239962,5543370,
    5575342,5952397,5983488,6035967,6383034,6418755,6464178,6506073,6533856,6570459,6613897,6660423,6693939,6736054,7028217,
    7066363,7119283,7171321,7463263,7495897,7558519,7627315,7693465,7721910,7988935,8020908,8079561,8128732,8152105,8198631,
    8223988,8288374,8351878,8395978,8799052,8838081,8879976,9198378,9235422,9278640,9344128,9406089,9443133,9474885,9508180,
    9545886,9821731,9853263,9889425,10232302,10263393,10305288,10366587,10396134,10447731,10479483]

    IMG_SIZE = int(IMG_SIZE)#32#64#128
    n_epochs = int(n_epochs)#1#10
    batch_size = int(batch_size)#100    

    # 説明変数計算
    spectrogram2, fftLen = stft(filename,IMG_SIZE)
    data = calc_data(spectrogram2, fftLen,IMG_SIZE)

    # 目的変数計算
    target = calc_target(ydata_tmp,fftLen,data)

    train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.3, random_state=42)

    n_batches = train_x.shape[0]//batch_size
    val = cnn(n_epochs, batch_size, n_batches, train_x, valid_x, train_y, valid_y,IMG_SIZE)
    
    return val

In [7]:
import sys
sys.path.append('/Users/sn/BayesianOptimization_bayes_opt')

from sklearn.cross_validation import cross_val_score
from sklearn.svm import SVC
from sklearn.datasets import load_iris
from bayes_opt import BayesianOptimization
from sklearn.gaussian_process.kernels import RBF
import warnings
warnings.filterwarnings('ignore')



In [8]:
###########################################
# ベイズ最適化で実際に調べる
###########################################
if __name__ == "__main__":

    funcBO = BayesianOptimization(
        func,                            # ブラックボックス関数
        pbounds = {
                    'IMG_SIZE': (32,256),
                    'n_epochs': (1,3),
                    'batch_size': (10,100),
                  },
        verbose = 1
                                ) 

    funcBO.explore({'IMG_SIZE': [32,256],'n_epochs': [1,2],'batch_size': [10,100]}) # 初期の探索位置

    funcBO.maximize(
        init_points = 0, # 最初にしておく数, 初期試行回数は3くらいがいい
        n_iter=2, # 何回評価するか
        acq = "ei", # 獲得関数: poi, ei, ucb
        kernel = RBF(2) # カーネル関数: matern5/2
        )

    print('-' * 53)
    print('Final Results')
    print('func: %f' % funcBO.res['max']['max_val'])


[31mInitialization[0m
[94m----------------------------------------------------------------------[0m
 Step |   Time |      Value |   IMG_SIZE |   batch_size |   n_epochs | 
EPOCH:: 1, Validation cost: 0.023, Validation F1: 0.499
    1 | 01m11s | [35m   0.49913[0m | [32m   32.0000[0m | [32m     10.0000[0m | [32m    1.0000[0m | 
EPOCH:: 1, Validation cost: 3.376, Validation F1: 0.459
EPOCH:: 2, Validation cost: 3.146, Validation F1: 0.459
    2 | 01m18s |    0.45946 |   256.0000 |     100.0000 |     2.0000 | 
[31mBayesian Optimization[0m
[94m----------------------------------------------------------------------[0m
 Step |   Time |      Value |   IMG_SIZE |   batch_size |   n_epochs | 
EPOCH:: 1, Validation cost: 0.042, Validation F1: 0.499
EPOCH:: 2, Validation cost: 0.040, Validation F1: 0.499
EPOCH:: 3, Validation cost: 0.036, Validation F1: 0.499
    3 | 01m59s |    0.49913 |    32.0000 |     100.0000 |     3.0000 | 
EPOCH:: 1, Validation cost: 0.043, Validation F1: 0.4