In [1]:
import os, librosa, glob, pickle
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras import Model, Sequential
from tensorflow.keras.optimizers import *
from tensorflow.keras.losses import *
from tensorflow.keras.callbacks import *
import numpy as np

In [2]:
class arg():
    gpus = '3'
    feature = 'mel'
    lr = 0.005
    decay = 0.98
    epoch = 300
    batch = 8
    channel = 2
config = arg()
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
AUTOTUNE = tf.data.experimental.AUTOTUNE
os.environ['CUDA_VISIBLE_DEVICES'] = config.gpus
strategy = tf.distribute.MirroredStrategy() # devices)
path = './Music/6genres-100s'
datapath, label = [], []
classes = []
sr = 22050
with open('Music/6genres-100s/allgenres.mf') as f:
    for i in f.readlines()[1:]:
        tmp = i.split()
        classes.append(tmp[1])
        datapath.append(path + tmp[0][:-1])
    label = classes
    classes = list(set(classes))
for i, j in enumerate(label):
    label[i] = classes.index(label[i])

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


# data preprocessing

In [3]:
def get_data():
    raw = []
    for i,j in enumerate(datapath):
        _raw = librosa.load(j)
        if sr != _raw[1]:
            print(i, _raw[1])
        raw.append(_raw[0][:660000])
    return raw

def preprocessing(rawdata, label, feature=config.feature):
    x_index = 0
    val_index = 0
    data,val,y,val_y = [],[],[],[]
    data = np.zeros((480,128,1290,3))
    val = np.zeros((120,128,1290,3))
    y = np.zeros((480,len(classes)))
    val_y = np.zeros((120,len(classes)))
    for i, raw in enumerate(tqdm(rawdata)):
        _data = None
        if feature == 'mel':
#             gen = librosa.feature.melspectrogram(raw)
            y_harmonic, y_percussive = librosa.effects.hpss(raw)
            S_harmonic = librosa.feature.melspectrogram(y_harmonic)
            S_percussive = librosa.feature.melspectrogram(y_percussive)
#             log_o = librosa.power_to_db(gen, ref=np.max)
#             log_Sh = librosa.power_to_db(S_harmonic, ref=np.max)
#             log_Sp = librosa.power_to_db(S_percussive, ref=np.max)
#             _data = np.concatenate([np.expand_dims(log_o,-1),np.expand_dims(log_Sh,-1),np.expand_dims(log_Sp,-1)],axis=-1)
            log_Sh = librosa.power_to_db(S_harmonic, ref=np.max)
            log_Sp = librosa.power_to_db(S_percussive, ref=np.max)
            _data = np.concatenate([np.expand_dims(log_Sh,-1),np.expand_dims(log_Sp,-1)],axis=-1)
        elif feature == 'stft':
            data.append(np.expand_dims(librosa.stft(raw), -1))
        elif feature == 'mfcc':
            data.append(np.expand_dims(librosa.feature.mfcc(raw), -1))
        
        if i % 100 < 80:
            data[x_index] = _data
            y[x_index] = tf.one_hot(label[i], len(classes)).numpy()
            x_index += 1
        else:
            val[val_index] = _data
            val_y[val_index] = tf.one_hot(label[i], len(classes)).numpy()
            val_index += 1
    return (data, val, y, val_y)
raw = get_data()

In [117]:
x_2, x_val_2, _, _ = preprocessing(raw, label)

100%|██████████| 600/600 [24:38<00:00,  2.46s/it]


In [120]:
# pickle.dump(x_2,open(datapath + '/x_mel_2.pickle','wb'))
# pickle.dump(x_val_2,open(datapath + '/val_x_mel_2.pickle','wb'))

In [3]:
datapath = '/root/class'
if config.channel == 2:
    x= pickle.load(open(datapath + '/x_mel_2.pickle','rb'))
    val_x= pickle.load(open(datapath + '/val_x_mel_2.pickle', 'rb'))
elif config.channel == 3:
    x= pickle.load(open(datapath + '/x_mel.pickle','rb'))
    val_x= pickle.load(open(datapath + '/val_x_mel.pickle', 'rb'))
y= pickle.load(open(datapath + '/y.pickle', 'rb'))
val_y= pickle.load(open(datapath + '/val_y.pickle', 'rb'))
assert (len(y) == len(x)) and (len(val_x) == len(val_y))

In [4]:
train_dataset = tf.data.Dataset.from_tensor_slices((x, y)).shuffle(len(x)).batch(config.batch).prefetch(AUTOTUNE)
validation_dataset = tf.data.Dataset.from_tensor_slices((val_x, val_y)).shuffle(len(val_x)).batch(config.batch)

In [5]:
shape = (x[0].shape[0], x[0].shape[1], x[0].shape[2])

# model definition

In [6]:
def inception_block(inputs):
    x_1 = BatchNormalization()(inputs)
    x_1 = Conv2D(32,(1,1),strides=1,padding='same', activation='relu')(x_1)
    
    x_2 = BatchNormalization()(inputs)
    x_2 = Conv2D(32,(1,1),strides=1,padding='same', activation='relu')(x_2)
    x_2 = BatchNormalization()(x_2)
    x_2 = Conv2D(32,(3,3),strides=1,padding='same', activation='relu')(x_2)
    
    x_3 = BatchNormalization()(inputs)
    x_3 = Conv2D(32,(1,1),strides=1,padding='same', activation='relu')(x_3)
    x_3 = BatchNormalization()(x_3)
    x_3 = Conv2D(32,(5,5),strides=1,padding='same', activation='relu')(x_3)
    
    x_4 = MaxPool2D((3,3), strides=1, padding='same')(inputs)
    x_4 = BatchNormalization()(x_4)
    x_4 = Conv2D(32,(1,1),strides=1,padding='same', activation='relu')(x_4)
    
    x_5 = BatchNormalization()(inputs)
    x_5 = Conv2D(32,(1,1),strides=1,padding='same', activation='relu')(x_5)
    x_5 = BatchNormalization()(x_5)
    x_5 = Conv2D(32,(7,7),strides=1,padding='same', activation='relu')(x_5)
    
    x_6 = AveragePooling2D((3,3), strides=1, padding='same')(inputs)
    x_6 = BatchNormalization()(x_6)
    x_6 = Conv2D(32,(1,1),strides=1,padding='same', activation='relu')(x_6)
    
    return Concatenate()([inputs,x_1,x_2,x_3,x_4,x_5,x_6])

def build_model():
    model_input = Input(shape=shape) # shape 으로 변경
    x = tf.transpose(model_input, [0,2,1,3])
    x = Conv2D(32,(3,3),strides=1,padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPool2D((4,1))(x)
    
#     x_1 = inception_block(x)
#     x_2 = inception_block(Concatenate()([x,x_1]))
#     x_3 = inception_block(Concatenate()([x,x_1,x_2]))
    x_1 = inception_block(x)
    x_2 = inception_block(x_1)
    x_3 = inception_block(x_2)
    
    x = BatchNormalization()(x_3)
    x = Conv2D(32,(1,1),strides=1,padding='same', activation='relu')(x)
    x = AveragePooling2D((2,2), strides=2)(x)
    
    x = BatchNormalization()(x)
    x = GlobalAveragePooling2D()(x)
    out = Dense(len(classes), activation='softmax')(x)
    return Model(inputs=model_input, outputs=out)

# training

In [7]:

model = build_model()
# model.summary()
# lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
#                                         config.lr,
#                                         decay_steps=config.epoch,
#                                         decay_rate=config.decay,
#                                         staircase=True)
factor = 0.7
learning_rate = 0.01
lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', factor=factor, patience=3, verbose=0, mode='auto',
)
optimizers = Adam(learning_rate=learning_rate)
# optimizers = SGD(learning_rate=0.1, momentum=0.9)
model.compile(optimizer=optimizers,
                      loss=tf.keras.losses.categorical_crossentropy,
                      metrics=['acc', 'Precision', 'Recall'])

In [8]:
checkpoint_path = "model_checkpoint/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    checkpoint_path, verbose=0, save_weights_only=True,
    # 다섯 번째 에포크마다 가중치를 저장합니다
    save_freq='epoch',
#     save_best_only=True)
)
callbacks = [
    cp_callback,
    lr,
    EarlyStopping(monitor='val_loss',
                          mode='min',
                          patience=25)
]

In [9]:
hist = model.fit(train_dataset,epochs=config.epoch,validation_data=validation_dataset, callbacks=callbacks)

Train for 60 steps, validate for 15 steps
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch

Epoch 76/300
Epoch 77/300
Epoch 78/300
Epoch 79/300
Epoch 80/300
Epoch 81/300
Epoch 82/300


In [10]:
pre = hist.history['val_Precision']
rec = hist.history['val_Recall']
f1 = []
for i, j in enumerate(pre):
    f1.append(2 * pre[i] * rec[i] / (pre[i] + rec[i]))
max_epoch = f1.index(np.nanmax(f1))+1
print(f'f1_score: {np.nanmax(f1)}, epoch: {max_epoch}')

f1_score: 0.9411764661518751, epoch: 55


# evaluation

In [11]:
for i in os.listdir('./기록'):
    if i[-3:] == '.h5':
        print(i)

82_adam_0.8,0.002,f10.9205021043027561.h5
82_adam_0.8,0.004, f10.911392365275926.h5
52_adam_0.7,0.001,f10.9367088385641422,channel2.h5
79_adam_0.75,0.001,f10.9198312696748169.h5
82_adam_0.8,0.004,f10.911392365275926.h5
78_adam_0.85,0.002,f10.911392365275926.h5


In [12]:
check_point = f'model_checkpoint/cp-00{max_epoch}.ckpt'
model.load_weights(check_point)
model.save(f'./기록/{max_epoch}_adam_{factor},{learning_rate},f1{np.max(f1)},channel{config.channel}.h5')
print(f'{max_epoch}_adam_{factor},{learning_rate},f1{np.max(f1)},channel{config.channel}')

55_adam_0.7,0.01,f10.9411764661518751,channel2


In [13]:
model_name = '55_adam_0.7,0.01,f10.9411764661518751,channel2' + '.h5'
eval_model = tf.keras.models.load_model(f'./기록/{model_name}')

In [14]:
confusion_table = np.reshape(np.zeros(6*6), (6,6))
precision = 0
recall = 0

for i,j in validation_dataset.unbatch().batch(1):
    # j: real, answer: predict
    answer = tf.argmax(eval_model.predict(i), axis=-1)
    confusion_table[tf.argmax(j, axis=-1)[0]][answer.numpy()[0]] +=  1
print('confusion table')
print(confusion_table)

confusion table
[[18.  1.  1.  0.  0.  0.]
 [ 1. 18.  0.  0.  0.  1.]
 [ 0.  1. 18.  1.  0.  0.]
 [ 0.  0.  0. 20.  0.  0.]
 [ 0.  0.  0.  0. 20.  0.]
 [ 1.  0.  0.  0.  0. 19.]]


In [15]:
tp = np.zeros((6))
fp = np.zeros_like(tp)
fn = np.zeros_like(tp)

for i,j in enumerate(confusion_table):
    tp[i] = j[i] # confusion table에서 대각선
    fn[i] = np.sum(j) - j[i] # confusion table에서 세로축
    fp = np.add(fp, j) # confusion table에서 가로축
    fp[i] -= j[i]

precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1_score = 2 * precision * recall / (precision + recall)
print(f'f1_score is {np.mean(f1_score)}')

f1_score is 0.941447779862414
