In [13]:
import numpy as np
import pandas as pd
import os

import sklearn
from sklearn.model_selection import train_test_split as tts

import tensorflow as tf

from keras import models, layers
from keras import Input

from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, Activation, Conv1D, Embedding, \
                                    GlobalAveragePooling2D, GlobalAveragePooling1D, GlobalMaxPooling1D, MaxPooling2D, MaxPooling1D, BatchNormalization, Add, ReLU, ZeroPadding2D, ZeroPadding1D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

from keras.models import load_model
from keras import optimizers, initializers, regularizers, metrics

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from tensorboard.plugins import projector

import warnings 
warnings.filterwarnings('ignore')

print('tensorflow version:', tf.__version__)

tensorflow version: 2.10.0


In [2]:
import random
import torch

random.seed(2022)
torch.manual_seed(2022)
np.random.seed(2022)

## TPU setting


In [14]:
# GCP에서는 TPU 접근에 TPU의 이름을 사용합니다.
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='local')
tf.config.experimental_connect_to_cluster(resolver)

# This is the TPU initialization code that has to be at the beginning.
tf.tpu.experimental.initialize_tpu_system(resolver)
print("All devices: ", tf.config.list_logical_devices('TPU'))

## data load

In [24]:
train_data = pd.read_csv('dim512_ver2.csv')
train_data.head()

Unnamed: 0,index,0,1,2,3,4,5,6,7,8,...,503,504,505,506,507,508,509,510,511,id
0,331253,0.001577,0.001786,0.002293,0.001331,0.002044,0.001718,0.001849,0.001714,0.003423,...,0.002349,0.001559,0.003429,0.003649,0.00137,0.001879,0.001621,0.001325,0.001596,0
1,298529,0.001535,0.002137,0.001733,0.00161,0.001694,0.001519,0.004592,0.001504,0.001811,...,0.004395,0.001442,0.001636,0.001737,0.002694,0.001471,0.002017,0.001428,0.002755,8
2,29783,0.00102,0.001518,0.004622,0.000992,0.002492,0.001326,0.002638,0.00142,0.001999,...,0.003213,0.001009,0.003319,0.00322,0.001135,0.000974,0.001584,0.000973,0.001391,9
3,68771,0.001365,0.001746,0.002557,0.001189,0.002299,0.001473,0.004394,0.001452,0.002735,...,0.002916,0.001198,0.003772,0.002752,0.0013,0.001333,0.001802,0.001168,0.001573,19
4,84876,0.00174,0.002714,0.001644,0.001729,0.002369,0.001926,0.00257,0.001828,0.002637,...,0.001932,0.001921,0.001658,0.001803,0.001965,0.001693,0.001714,0.001689,0.002006,29


In [21]:
test_data = pd.read_csv('dim512_label.csv')
test_data.head()

Unnamed: 0,cat1,cat2,cat3,cat4,cat5,cat6
0,0.083862,0.034479,0.026788,0.74891,0.027604,0.078358
1,0.042663,0.043163,0.009062,0.767415,0.051414,0.086285
2,0.183374,0.00577,0.235337,0.551223,0.013165,0.011131
3,0.002565,0.006234,0.008222,0.973222,0.006509,0.003247
4,0.016899,0.152607,0.008368,0.773301,0.023471,0.025355


In [22]:
print(len(train_data))
print(len(test_data))

109050
109050


In [25]:
train_data = train_data.drop(['index', 'id'], axis=1)
train_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,502,503,504,505,506,507,508,509,510,511
0,0.001577,0.001786,0.002293,0.001331,0.002044,0.001718,0.001849,0.001714,0.003423,0.002265,...,0.002171,0.002349,0.001559,0.003429,0.003649,0.00137,0.001879,0.001621,0.001325,0.001596
1,0.001535,0.002137,0.001733,0.00161,0.001694,0.001519,0.004592,0.001504,0.001811,0.001942,...,0.001502,0.004395,0.001442,0.001636,0.001737,0.002694,0.001471,0.002017,0.001428,0.002755
2,0.00102,0.001518,0.004622,0.000992,0.002492,0.001326,0.002638,0.00142,0.001999,0.00101,...,0.002665,0.003213,0.001009,0.003319,0.00322,0.001135,0.000974,0.001584,0.000973,0.001391
3,0.001365,0.001746,0.002557,0.001189,0.002299,0.001473,0.004394,0.001452,0.002735,0.001285,...,0.002862,0.002916,0.001198,0.003772,0.002752,0.0013,0.001333,0.001802,0.001168,0.001573
4,0.00174,0.002714,0.001644,0.001729,0.002369,0.001926,0.00257,0.001828,0.002637,0.002073,...,0.00196,0.001932,0.001921,0.001658,0.001803,0.001965,0.001693,0.001714,0.001689,0.002006


In [26]:
train_data = train_data.to_numpy()
train_data.reshape(109050, 512, 1)
train_data.shape

(109050, 512)

In [27]:
test_data.to_numpy()

array([[0.08386171, 0.03447858, 0.0267883 , 0.74890965, 0.02760383,
        0.07835799],
       [0.04266268, 0.0431625 , 0.00906157, 0.76741463, 0.0514138 ,
        0.0862848 ],
       [0.18337384, 0.00576983, 0.235337  , 0.55122334, 0.01316476,
        0.01113121],
       ...,
       [0.01770848, 0.00980657, 0.0085134 , 0.89501047, 0.03046035,
        0.03850071],
       [0.00334057, 0.00461408, 0.0054453 , 0.9749468 , 0.00839448,
        0.0032588 ],
       [0.481758  , 0.05572337, 0.02762377, 0.14890303, 0.03492685,
        0.25106493]])

In [69]:
# df2 = pd.read_csv('dim512_ver2.tsv', sep='\t')
# # df2.to_csv('dim512_ver2.tsv', sep='\t', encoding='utf-8', index=False)
# len(df2)

109050

In [28]:
x_train, x_test, y_train, y_test = train_test_split(train_data, test_data, test_size=0.2)
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(87240, 512)
(87240, 6)
(21810, 512)
(21810, 6)


In [29]:
x_train = x_train.reshape(87240, 512, 1)
x_test = x_test.reshape(21810, 512, 1)
print(x_train.shape)
print(x_test.shape)

(87240, 512, 1)
(21810, 512, 1)


In [25]:
# def data_spliter(train_data, test_data, ratio=None):
    
#     len_train = len(train_data)
#     len_test = len(test_data)
    
#     train_idx = int(len_train*(1-ratio))
#     test_idx = int(len_test*(1-ratio))
    
    
#     if len_train != len_test:
#         print("Error: data shapes are not match!")
        
#     else:
#         x_train, x_test = train_data[:train_idx], train_data[train_idx:]
#         y_train, y_test = test_data[:test_idx], test_data[test_idx:]
        
#         print(f"x_train shape: {x_train.shape}")
#         print(f"x_test shape: {x_test.shape}")
#         print(f"y_train shape: {y_train.shape}")
#         print(f"y_test shape: {y_test.shape}")
    
#     return x_train, x_test, y_train, y_test

In [26]:
# def get_main_emotion(labels):
#     temp = []
#     for i in range(len(labels)):
#         temp_max = np.argmax(labels[i])+1
#         temp.append(temp_max)
#     return temp

In [27]:
# main_emotion = get_main_emotion(categories)
# main_emotion[:10]

[6, 6, 1, 4, 2, 1, 4, 4, 4, 1]

In [28]:
# main_emotion = np.array(main_emotion)
# type(main_emotion)

numpy.ndarray

In [29]:
# len(categories)

112512

In [30]:
# len(main_emotion)

112512

In [31]:
# train_data = train_data.reshape(112512, 48, 1876)
# train_data.shape

(112512, 48, 1876)

In [33]:
# a_train, a_test, b_train, b_test = train_test_split(train_data, categories, test_size=0.2)
# print(a_train.shape)
# print(a_test.shape)
# print(b_train.shape)
# print(b_test.shape)

tcmalloc: large alloc 32420528128 bytes == 0x137d718000 @  0x7fa82986a680 0x7fa82988b824 0x7fa81ee1d064 0x7fa81ee1d7ff 0x7fa81ee7bf55 0x7fa81ee7c0b6 0x7fa81ef0e76a 0x7fa81ef0f1cb 0x5139cc 0x56bf28 0x569dba 0x5f6eb3 0x56cc1f 0x569dba 0x5f6eb3 0x56bacd 0x501923 0x524f74 0x5f15c4 0x5f745f 0x570d55 0x569dba 0x5f6eb3 0x56cc1f 0x569dba 0x6902a7 0x6023c4 0x5c6730 0x56bacd 0x501488 0x56d4d6
tcmalloc: large alloc 8105402368 bytes == 0x1b0bdc0000 @  0x7fa82986a680 0x7fa82988b824 0x7fa81ee1d064 0x7fa81ee1d7ff 0x7fa81ee7bf55 0x7fa81ee7c0b6 0x7fa81ef0e76a 0x7fa81ef0f1cb 0x5139cc 0x56bf28 0x569dba 0x5f6eb3 0x56cc1f 0x569dba 0x5f6eb3 0x56bacd 0x501923 0x524f74 0x5f15c4 0x5f745f 0x570d55 0x569dba 0x5f6eb3 0x56cc1f 0x569dba 0x6902a7 0x6023c4 0x5c6730 0x56bacd 0x501488 0x56d4d6


(90009, 48, 1876)
(22503, 48, 1876)
(90009, 6)
(22503, 6)


In [34]:
# x_train, x_test, y_train, y_test = data_spliter(train_data, test_dt, 0.2)

## resnet50

In [64]:
# # number of classes
# K = 6

# input_tensor = Input(shape=(None, ), dtype='float32', name='input')

# def conv1_layer(x):    
#     x = ZeroPadding1D(padding=3)(x)
#     x = Conv1D(64, 7, strides=2)(x)
#     x = BatchNormalization()(x)
#     x = Activation('relu')(x)
#     x = ZeroPadding1D(padding=1)(x)
    
#     return x   


# def conv2_layer(x):         
#     x = MaxPooling1D(2)(x)     
 
#     shortcut = x
 
#     for i in range(3):
#         if (i == 0):
#             x = Conv1D(64, 1, strides=1, padding='valid')(x)
#             x = BatchNormalization()(x)
#             x = Activation('relu')(x)
            
#             x = Conv1D(64, 3, strides=1, padding='same')(x)
#             x = BatchNormalization()(x)
#             x = Activation('relu')(x)
 
#             x = Conv1D(256, 1, strides=1, padding='valid')(x)
#             shortcut = Conv1D(256, 1, strides=1, padding='valid')(shortcut)            
#             x = BatchNormalization()(x)
#             shortcut = BatchNormalization()(shortcut)
 
#             x = Add()([x, shortcut])
#             x = Activation('relu')(x)
            
#             shortcut = x
 
#         else:
#             x = Conv1D(64, 1, strides=1, padding='valid')(x)
#             x = BatchNormalization()(x)
#             x = Activation('relu')(x)
            
#             x = Conv1D(64, 3, strides=1, padding='same')(x)
#             x = BatchNormalization()(x)
#             x = Activation('relu')(x)
 
#             x = Conv1D(256, 1, strides=1, padding='valid')(x)
#             x = BatchNormalization()(x)            
 
#             x = Add()([x, shortcut])   
#             x = Activation('relu')(x)  
 
#             shortcut = x        
    
#     return x
 
 
 
# def conv3_layer(x):        
#     shortcut = x    
    
#     for i in range(4):     
#         if(i == 0):            
#             x = Conv1D(128, 1, strides=2, padding='valid')(x)
#             x = BatchNormalization()(x)
#             x = Activation('relu')(x)        
            
#             x = Conv1D(128, 3, strides=1, padding='same')(x)
#             x = BatchNormalization()(x)
#             x = Activation('relu')(x)  
 
#             x = Conv1D(512, 1, strides=1, padding='valid')(x)
#             shortcut = Conv1D(512, 1, strides=2, padding='valid')(shortcut)
#             x = BatchNormalization()(x)
#             shortcut = BatchNormalization()(shortcut)            
 
#             x = Add()([x, shortcut])    
#             x = Activation('relu')(x)    
 
#             shortcut = x              
        
#         else:
#             x = Conv1D(128, 1, strides=1, padding='valid')(x)
#             x = BatchNormalization()(x)
#             x = Activation('relu')(x)
            
#             x = Conv1D(128, 3, strides=1, padding='same')(x)
#             x = BatchNormalization()(x)
#             x = Activation('relu')(x)
 
#             x = Conv1D(512, 1, strides=1, padding='valid')(x)
#             x = BatchNormalization()(x)            
 
#             x = Add()([x, shortcut])     
#             x = Activation('relu')(x)
 
#             shortcut = x      
            
#     return x
 
 
 
# def conv4_layer(x):
#     shortcut = x        
  
#     for i in range(6):     
#         if(i == 0):            
#             x = Conv1D(256, 1, strides=2, padding='valid')(x)
#             x = BatchNormalization()(x)
#             x = Activation('relu')(x)        
            
#             x = Conv1D(256, 3, strides=1, padding='same')(x)
#             x = BatchNormalization()(x)
#             x = Activation('relu')(x)  
 
#             x = Conv1D(1024, 1, strides=1, padding='valid')(x)
#             shortcut = Conv1D(1024, 1, strides=2, padding='valid')(shortcut)
#             x = BatchNormalization()(x)
#             shortcut = BatchNormalization()(shortcut)
 
#             x = Add()([x, shortcut]) 
#             x = Activation('relu')(x)
 
#             shortcut = x               
        
#         else:
#             x = Conv1D(256, 1, strides=1, padding='valid')(x)
#             x = BatchNormalization()(x)
#             x = Activation('relu')(x)
            
#             x = Conv1D(256, 3, strides=1, padding='same')(x)
#             x = BatchNormalization()(x)
#             x = Activation('relu')(x)
 
#             x = Conv1D(1024, 1, strides=1, padding='valid')(x)
#             x = BatchNormalization()(x)            
 
#             x = Add()([x, shortcut])    
#             x = Activation('relu')(x)
 
#             shortcut = x      
 
#     return x
 
 
 
# def conv5_layer(x):
#     shortcut = x    
  
#     for i in range(3):     
#         if(i == 0):            
#             x = Conv1D(512, 1, strides=2, padding='valid')(x)
#             x = BatchNormalization()(x)
#             x = Activation('relu')(x)        
            
#             x = Conv1D(512, 3, strides=1, padding='same')(x)
#             x = BatchNormalization()(x)
#             x = Activation('relu')(x)  
 
#             x = Conv1D(2048, 1, strides=1, padding='valid')(x)
#             shortcut = Conv1D(2048, 1, strides=2, padding='valid')(shortcut)
#             x = BatchNormalization()(x)
#             shortcut = BatchNormalization()(shortcut)            
 
#             x = Add()([x, shortcut])  
#             x = Activation('relu')(x)      
 
#             shortcut = x               
        
#         else:
#             x = Conv1D(512, 1, strides=1, padding='valid')(x)
#             x = BatchNormalization()(x)
#             x = Activation('relu')(x)
            
#             x = Conv1D(512, 3, strides=1, padding='same')(x)
#             x = BatchNormalization()(x)
#             x = Activation('relu')(x)
 
#             x = Conv1D(2048, 1, strides=1, padding='valid')(x)
#             x = BatchNormalization()(x)           
            
#             x = Add()([x, shortcut]) 
#             x = Activation('relu')(x)       
 
#             shortcut = x                  
 
#     return x
 
 
 
# x = conv1_layer(input_tensor)
# x = conv2_layer(x)
# x = conv3_layer(x)
# x = conv4_layer(x)
# x = conv5_layer(x)
 
# x = GlobalAveragePooling1D()(x)
# output_tensor = Dense(K, activation='softmax')(x)
 
# resnet50 = Model(input_tensor, output_tensor)
# resnet50.summary()


ValueError: Input 0 of layer "zero_padding1d" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, None)

## simple cnn

In [18]:
'''
input size = (N, 48, 1876, 1)

1876 = 2*2*7*67
48 = 2*2*2*2*3
'''

embedding_dim = 48 # 임베딩 벡터의 차원 // 여기서는 hz의 채널 수
# kernel_size = (4, 6)
kernel_size = (8, 6)
drop_ratio = 0.3
hidden_units = 768 # 뉴런의 수
epoch = 50

def build_cnn2():
    model = Sequential()
    # model.add(Conv2D(16, kernel_size, padding='same', activation='leaky_relu', input_shape=(48, 1876, 1)))
    # model.add(
    model.add(Conv1D(16, 3, padding='same', activation='leaky_relu', input_shape=(512,1)))
    model.add(MaxPooling1D(2))
    
    model.add(Conv1D(32, 3, padding='same', activation='leaky_relu'))
    model.add(MaxPooling1D(2))
    
    model.add(Conv1D(64, 3, padding='same', activation='leaky_relu'))
    model.add(GlobalAveragePooling1D())
    
    model.add(Flatten())
    model.add(Dense(hidden_units, activation='relu'))
    model.add(Dropout(drop_ratio))
    model.add(Dense(6, activation='softmax'))
    
    return model

def build_dense():
    model = Sequential()
    model.add(Dense(64, activation='relu', input_shape=(512,)))
    model.add(Dense(6, activation='softmax'))
    
    return model

## 평가지표

In [19]:
'''
가사 감성벡터 -> (1, 6) shape
mel 감성벡터 -> (1, 6) shape

대전제: 가사랑 mel이랑 동일한 감성을 지향한다.

따라서 가사의 감성벡터랑 mel의 감성벡터랑 완벽히 똑같을 필요는 없다.
BUT, 두 감성벡터가 표현하는 주 감정의 분포는 비슷해야 됨.

예시: (기쁨, 슬픔, 중립)
가사 (0.5, 0.3, 0.2)
mel (0.4, 0.5, 0.1)
-> 잘 분석했다고 판정

가사를 정답으로 줌. -> CNN 학습 -> 학습방향은 평가지표를 따라감.(라벨따라가는게 아님)
-> 따라서 평가지표에서 유사한 감정분포를 따라가도록 알려주면 된다!

가사 (0.5, 0.3, 0.2)
mel (0.1, 0.2, 0.7)
-> 잘 분석하지 못했다고 판정

<평가 매트릭스 알고리즘>
mel의 감성벡터에서 가장 큰 값 2개 추출(= 주 감정 2개) -> A
가사의 감성벡터에서 가장 큰 값 2개 추출(= 주 감정 2개) -> B

set(A) == set(B) 이면 맞춘 판정
아니면 틀린 판정
'''

# 둘 중에 하나 이상 맞추면 ok
def correct1(y_true, y_pred):
    true_set = np.argsort(y_true)[-2:]
    pred_set = np.argsort(y_pred)[-2:]
    if set(true_set) & set(pred_set):
        score = True
    else:
        score = False
    return score

# 순서 상관없이 둘 다 맞추면 ok
def correct2(y_true, y_pred):
    true_set = np.argsort(y_true)[-2:]
    pred_set = np.argsort(y_pred)[-2:]
    if set(true_set) == set(pred_set):
        score = True
    else:
        score = False
    return score

# 순서 상관없이 셋 다 맞추면 ok
def correct3(y_true, y_pred):
    true_set = np.argsort(y_true)[-3:]
    pred_set = np.argsort(y_pred)[-3:]
    if set(true_set) == set(pred_set):
        score = True
    else:
        score = False
    return score

# 셋 중 둘 이상 순서를 맞추면 ok
def correct4(y_true, y_pred):
    true_set = np.argsort(y_true)[-3:]
    pred_set = np.argsort(y_pred)[-3:]
    
    # 둘 이상 순서 상관 없이 맞추는 경우
    if len(set(true_set) & set(pred_set))>=2:
        score = True
    
    ## 둘 이상 순서까지 맞추는 경우
    # if sorted(a[-2:]) == sorted(b[-2:]):
    #     score = True
    
    else:
        score = False
    return score

def semy_metric(f):
    def my_metric(real, pred):
        N = len(real)
        count = 0 
        for i,j in zip(real, pred):
            count += f(i,j)
        return count/N
    return my_metric

def match1(y_true, y_pred):
    score = tf.py_function(func=semy_metric(correct1), inp=[y_true, y_pred], Tout=tf.float16, name='custom1') # tf 2.x
    return score

def match2(y_true, y_pred):
    score = tf.py_function(func=semy_metric(correct2), inp=[y_true, y_pred], Tout=tf.float16, name='custom2') # tf 2.x
    return score

def match3(y_true, y_pred):
    score = tf.py_function(func=semy_metric(correct3), inp=[y_true, y_pred], Tout=tf.float16, name='custom3') # tf 2.x
    return score

def match4(y_true, y_pred):
    score = tf.py_function(func=semy_metric(correct4), inp=[y_true, y_pred], Tout=tf.float16, name='custom4') # tf 2.x
    return score

## train

In [20]:
model_cnn =  build_cnn2()
model_cnn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 512, 16)           64        
                                                                 
 max_pooling1d (MaxPooling1D  (None, 256, 16)          0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 256, 32)           1568      
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 128, 32)          0         
 1D)                                                             
                                                                 
 conv1d_2 (Conv1D)           (None, 128, 64)           6208      
                                                                 
 global_average_pooling1d (G  (None, 64)               0

In [21]:
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)
mc = ModelCheckpoint('best_model.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

In [32]:
model = build_dense()
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (None, 64)                32832     
                                                                 
 dense_3 (Dense)             (None, 6)                 390       
                                                                 
Total params: 33,222
Trainable params: 33,222
Non-trainable params: 0
_________________________________________________________________


In [23]:
model_cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc',match1, match2, match3, match4])
history = model_cnn.fit(x_train, y_train, epochs=epoch, validation_data=(x_test, y_test), callbacks=[es, mc, WandbCallback()])



Instructions for updating:
Use `tf.compat.v1.graph_util.tensor_shape_from_node_def_name`
Epoch 1/50


2022-09-26 14:16:59.383728: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2022-09-26 14:16:59.383888: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session


Epoch 1: val_acc improved from -inf to 0.76800, saving model to best_model.h5




INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_141456-3h8h4zbi/files/model-best/assets


INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_141456-3h8h4zbi/files/model-best/assets
wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_141456-3h8h4zbi/files/model-best)... Done. 0.0s


Epoch 2/50
Epoch 2: val_acc did not improve from 0.76800




INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_141456-3h8h4zbi/files/model-best/assets


INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_141456-3h8h4zbi/files/model-best/assets
wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_141456-3h8h4zbi/files/model-best)... Done. 0.0s


Epoch 3/50
Epoch 3: val_acc did not improve from 0.76800




INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_141456-3h8h4zbi/files/model-best/assets


INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_141456-3h8h4zbi/files/model-best/assets
wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_141456-3h8h4zbi/files/model-best)... Done. 0.0s


Epoch 4/50
Epoch 4: val_acc did not improve from 0.76800




INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_141456-3h8h4zbi/files/model-best/assets


INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_141456-3h8h4zbi/files/model-best/assets
wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_141456-3h8h4zbi/files/model-best)... Done. 0.0s


Epoch 5/50
Epoch 5: val_acc did not improve from 0.76800




INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_141456-3h8h4zbi/files/model-best/assets


INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_141456-3h8h4zbi/files/model-best/assets
wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_141456-3h8h4zbi/files/model-best)... Done. 0.0s


Epoch 6/50
Epoch 6: val_acc did not improve from 0.76800
Epoch 7/50
Epoch 7: val_acc did not improve from 0.76800
Epoch 8/50
Epoch 8: val_acc did not improve from 0.76800
Epoch 9/50
Epoch 9: val_acc did not improve from 0.76800
Epoch 10/50
Epoch 10: val_acc did not improve from 0.76800




INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_141456-3h8h4zbi/files/model-best/assets


INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_141456-3h8h4zbi/files/model-best/assets
wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_141456-3h8h4zbi/files/model-best)... Done. 0.0s


Epoch 11/50
Epoch 11: val_acc did not improve from 0.76800
Epoch 12/50
Epoch 12: val_acc did not improve from 0.76800
Epoch 13/50
Epoch 13: val_acc did not improve from 0.76800
Epoch 14/50
Epoch 14: val_acc did not improve from 0.76800
Epoch 15/50
Epoch 15: val_acc did not improve from 0.76800
Epoch 15: early stopping


# train_dense2

In [33]:
# import tensorflow_addons as tfa
# metric = tfa.metrics.r_square.RSquare()
# tf.keras.metrics.RootMeanSquaredError()

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc',match1, match2, match3, match4])
# model.compile(optimizer='adam', loss='mean_squared_error', metrics=['acc', match1, match2, match3, match4])
# history = model.fit(x_train, y_train, epochs=epoch, validation_data=(x_test, y_test), callbacks=[es, mc, WandbCallback()])
history = model.fit(x_train, y_train, epochs=epoch, validation_data=(x_test, y_test), callbacks=[es, mc, WandbCallback()])





Instructions for updating:
Use `tf.compat.v1.graph_util.tensor_shape_from_node_def_name`
Epoch 1/50


2022-09-26 01:20:01.420869: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2022-09-26 01:20:01.421085: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session


Epoch 1: val_acc improved from -inf to 0.77029, saving model to best_model.h5
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 2/50
Epoch 2: val_acc did not improve from 0.77029
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 3/50
Epoch 3: val_acc did not improve from 0.77029
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 4/50
Epoch 4: val_acc did not improve from 0.77029
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 5/50
Epoch 5: val_acc did not improve from 0.77029
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 6/50
Epoch 6: val_acc did not improve from 0.77029
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 7/50
Epoch 7: val_acc did not improve from 0.77029
Epoch 8/50
Epoch 8: val_acc did not improve from 0.77029
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 9/50
Epoch 9: val_acc improved from 0.77029 to 0.77033, saving model to best_model.h5
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 10/50
Epoch 10: val_acc did not improve from 0.77033
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 11/50
Epoch 11: val_acc improved from 0.77033 to 0.77056, saving model to best_model.h5
Epoch 12/50
Epoch 12: val_acc did not improve from 0.77056
Epoch 13/50
Epoch 13: val_acc improved from 0.77056 to 0.77070, saving model to best_model.h5
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 14/50
Epoch 14: val_acc did not improve from 0.77070
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 15/50
Epoch 15: val_acc did not improve from 0.77070
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 16/50
Epoch 16: val_acc did not improve from 0.77070
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 17/50
Epoch 17: val_acc improved from 0.77070 to 0.77075, saving model to best_model.h5
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 18/50
Epoch 18: val_acc improved from 0.77075 to 0.77107, saving model to best_model.h5
Epoch 19/50
Epoch 19: val_acc improved from 0.77107 to 0.77130, saving model to best_model.h5
Epoch 20/50
Epoch 20: val_acc did not improve from 0.77130
Epoch 21/50
Epoch 21: val_acc did not improve from 0.77130
Epoch 22/50
Epoch 22: val_acc did not improve from 0.77130
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 23/50
Epoch 23: val_acc did not improve from 0.77130
Epoch 24/50
Epoch 24: val_acc did not improve from 0.77130
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 25/50
Epoch 25: val_acc improved from 0.77130 to 0.77166, saving model to best_model.h5
Epoch 26/50
Epoch 26: val_acc did not improve from 0.77166
Epoch 27/50
Epoch 27: val_acc did not improve from 0.77166
Epoch 28/50
Epoch 28: val_acc did not improve from 0.77166
Epoch 29/50
Epoch 29: val_acc did not improve from 0.77166
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 30/50
Epoch 30: val_acc did not improve from 0.77166
Epoch 31/50
Epoch 31: val_acc did not improve from 0.77166
Epoch 32/50
Epoch 32: val_acc improved from 0.77166 to 0.77176, saving model to best_model.h5
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 33/50
Epoch 33: val_acc did not improve from 0.77176
Epoch 34/50
Epoch 34: val_acc did not improve from 0.77176
Epoch 35/50
Epoch 35: val_acc did not improve from 0.77176
Epoch 36/50
Epoch 36: val_acc did not improve from 0.77176
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 37/50
Epoch 37: val_acc did not improve from 0.77176
Epoch 38/50
Epoch 38: val_acc did not improve from 0.77176
Epoch 39/50
Epoch 39: val_acc did not improve from 0.77176
Epoch 40/50
Epoch 40: val_acc did not improve from 0.77176
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 41/50
Epoch 41: val_acc did not improve from 0.77176
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 42/50
Epoch 42: val_acc improved from 0.77176 to 0.77185, saving model to best_model.h5
Epoch 43/50
Epoch 43: val_acc did not improve from 0.77185
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 44/50
Epoch 44: val_acc did not improve from 0.77185
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 45/50
Epoch 45: val_acc did not improve from 0.77185
Epoch 46/50
Epoch 46: val_acc improved from 0.77185 to 0.77189, saving model to best_model.h5
Epoch 47/50
Epoch 47: val_acc did not improve from 0.77189
Epoch 48/50
Epoch 48: val_acc did not improve from 0.77189
Epoch 49/50
Epoch 49: val_acc did not improve from 0.77189
INFO:tensorflow:Assets written to: /home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best/assets


wandb: Adding directory to artifact (/home/nochesita95/wandb/run-20220926_011548-3ace3vbc/files/model-best)... Done. 0.0s


Epoch 50/50
Epoch 50: val_acc did not improve from 0.77189


## 평가

In [83]:
# 평가
test_loss, match4, match1, match2, match3, test_acc = model.evaluate(x_test, y_test, verbose=2)
print("test_loss: {} ".format(test_loss))
print("test_acc: {}".format(test_acc))
print("test_match1: {}".format(match1))
print("test_match2: {}".format(match2))
print("test_match3: {}".format(match3))
print("test_match4: {}".format(match4))

682/682 - 50s - loss: 1.0713 - acc: 0.7703 - match1: 0.9863 - match2: 0.2269 - match3: 0.3534 - match4: 0.7763 - 50s/epoch - 73ms/step
test_loss: 1.0713202953338623 
test_acc: 0.776260495185852
test_match1: 0.77028888463974
test_match2: 0.9863095283508301
test_match3: 0.22686535120010376
test_match4: 0.35344868898391724


In [90]:
inf_data = train_data

# 인퍼런스용 데이터셋
def inf_mel(model, data):
    temp_result = model.predict(data)
    np.save('dim512_inf', temp_result)
    print('file has saved.')

inf_mel(model, inf_data)

file has saved.


In [91]:
inf_dim512 = np.load('dim512_inf.npy')
inf_dim512[:5]

array([[0.09690215, 0.01703895, 0.04080429, 0.77372783, 0.02106589,
        0.05046088],
       [0.10069682, 0.02590643, 0.07402409, 0.7130443 , 0.02185237,
        0.06447601],
       [0.05976965, 0.00968204, 0.09559699, 0.8094395 , 0.00977854,
        0.01573324],
       [0.06527718, 0.01209964, 0.08108936, 0.80754834, 0.01226337,
        0.0217221 ],
       [0.12432417, 0.04683331, 0.04042078, 0.5997846 , 0.04137602,
        0.14726111]], dtype=float32)

In [92]:
inf_dim512 = pd.DataFrame(inf_dim512)
inf_dim512.to_csv('din512_inf.tsv', sep='\t', encoding='utf-8', index=False)

In [94]:
temp222 = pd.read_csv('din512_inf.tsv', sep='\t')
temp222.head()

Unnamed: 0,0,1,2,3,4,5
0,0.096902,0.017039,0.040804,0.773728,0.021066,0.050461
1,0.100697,0.025906,0.074024,0.713044,0.021852,0.064476
2,0.05977,0.009682,0.095597,0.809439,0.009779,0.015733
3,0.065277,0.0121,0.081089,0.807548,0.012263,0.021722
4,0.124324,0.046833,0.040421,0.599785,0.041376,0.147261


In [40]:
test_data.head()

Unnamed: 0,cat1,cat2,cat3,cat4,cat5,cat6
0,0.083862,0.034479,0.026788,0.74891,0.027604,0.078358
1,0.042663,0.043163,0.009062,0.767415,0.051414,0.086285
2,0.183374,0.00577,0.235337,0.551223,0.013165,0.011131
3,0.002565,0.006234,0.008222,0.973222,0.006509,0.003247
4,0.016899,0.152607,0.008368,0.773301,0.023471,0.025355


In [42]:
np.argmax(test_data.iloc[0])

3

In [95]:
dim512_categories = []

for i in range(len(temp222)):
    temp_cat = np.argmax(temp222.iloc[i])
    if temp_cat == 0: dim512_categories.append('기쁨')
    elif temp_cat == 1: dim512_categories.append('긴장')
    elif temp_cat == 2: dim512_categories.append('평화')
    elif temp_cat == 3: dim512_categories.append('슬픔')
    elif temp_cat == 4: dim512_categories.append('분노')
    else: dim512_categories.append('중립')

dim512_categories[:5]

['슬픔', '슬픔', '슬픔', '슬픔', '슬픔']

In [96]:
dim512_cats = pd.DataFrame(dim512_categories)
dim512_cats.head()

Unnamed: 0,0
0,슬픔
1,슬픔
2,슬픔
3,슬픔
4,슬픔


In [97]:
dim512_cats.to_csv('dim512_inf_labels.tsv', encoding='utf-8', sep='\t', index=False)
dim512_cats_test = pd.read_csv('dim512_inf_labels.tsv', sep='\t')
dim512_cats_test.head()

Unnamed: 0,0
0,슬픔
1,슬픔
2,슬픔
3,슬픔
4,슬픔


In [None]:
# inf_data_path = 'mel_data'

# # 인퍼런스용 데이터셋
# def inf_dataloader(model, idx):
#     for i in range(idx):
#         temp = np.load(inf_data_path+f'/f_list_{i*10000}_{(i+1)*10000}.npy')
#         temp_result = model.predict(temp)
#         np.save(f'inf_result_{i}', temp_result)
#         print(f'file no.{i} has saved.')

# # inf_dataloader(model, 12)
# inf_dataloader(resnet50, 12)

In [None]:
# npy_path = 'inf_npy_resnet'

# def npy_concatenate(path, fname):
#     npy0 = np.load(npy_path+'/inf_result_0.npy')
#     npy1 = np.load(npy_path+'/inf_result_1.npy')
#     npy2 = np.load(npy_path+'/inf_result_2.npy')
#     npy3 = np.load(npy_path+'/inf_result_3.npy')
#     npy4 = np.load(npy_path+'/inf_result_4.npy')
#     npy5 = np.load(npy_path+'/inf_result_5.npy')
#     npy6 = np.load(npy_path+'/inf_result_6.npy')
#     npy7 = np.load(npy_path+'/inf_result_7.npy')
#     npy8 = np.load(npy_path+'/inf_result_8.npy')
#     npy9 = np.load(npy_path+'/inf_result_9.npy')
#     npy10 = np.load(npy_path+'/inf_result_10.npy')
#     npy11 = np.load(npy_path+'/inf_result_11.npy')

#     inf_result_npy = np.concatenate((npy0, npy1, npy2, npy3, npy4, npy5, npy6, npy7, npy8, npy9, npy10, npy11), axis=0)
    
#     print(type(inf_result_npy))
#     print(inf_result_npy.shape)
    
#     np.save(fname, inf_result_npy)

# npy_concatenate(npy_path, 'inf_result_resnet')

In [None]:
# result = np.load('inf_result_resnet.npy')
# result[:5]

In [None]:
# def is_result_good(result):
#     c = []
#     for i in range(len(result)):
#         temp = np.argmax(result[i])+1
#         if temp == 6:
#             temp = '중립'
#         elif temp == 5:
#             temp = '분노'
#         elif temp == 4:
#             temp = '슬픔'
#         elif temp == 3:
#             temp = '평화'
#         elif temp == 2:
#             temp = '긴장'
#         else:
#             temp = '기쁨'
#         c.append(temp)
#     print(c[:10])

# is_result_good(result)

## 인퍼런스 평가

In [None]:
'''
대전제: 가사랑 멜로디랑 표현방법은 다를지언정 같은 감정(S)을 표현하고자 한다.
근데 기존 가사만 의존하는 방식은 멜로디를 고려할 수가 없다. (e.g. 김건모 - 잘못된 만남)

mel을 쓰는 이유-> 가사로만 S를 근사하기보다는 mel까지 분석해서 S에 대한 근사도를 올리기 위함

cor1, cor4 -> 높게 나올수록 좋음 (가사가 표현하는 주된 감정들을 얼마나 캐치하고 있느냐)
cor2, cor3 -> 어느 정도 높아야 하긴 하는데, 너무 높으면 또 안됨 (가사하고 얼마나 똑같이 표현하고 있느냐)

2,3번을 말로 옮기면...
-> 10~15%의 곡은 멜로디와 가사가 거의 비슷한 분위기를 가지고 있음
-> 85~90%의 곡은 멜로디와 가사의 분위기가 다름 (얼마나 다른지는 1, 4를 봐야 알 수 있음)

1,4번을 말로 옮기면...
-> 88%의 곡은 멜로디와 가사가 공통으로 가지는 분위기가 1개 이상 존재함 (e.g. 기쁨+슬픔 & 기쁨/중립)
-> 87%의 곡은 멜로디와 가사가 공통으로 가지는 분위기가 2개 이상 존재함 (e.g. 슬픔+분노+중립 & 슬픔+중립+평화)

acc 수치
-> 가장 메인이 되는 감정이 일치하는 비율 : 40%
-> 그렇다면 나머지 60%는 틀린 것이냐? 그건 또 아님 (e.g. true=(0.46, 0.44, 0.2) pred=(0.44, 0.46, 0.2) -> 틀렸다 판정)

99.99999
99.99998
99.99997

0.0002

mel의 감성분석한 라벨이 있으면 1, 2, 3, 4번의 적합한 수치가 얼마나 되는지 알 수 있음
'''