<a href="https://colab.research.google.com/github/junyoung11/junyoung11/blob/main/youtubemusic_efficientnet_30.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**drive 연동**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**압축풀기**

In [None]:
path_to_zip_file = '/content/drive/MyDrive/array.zip'
directory_to_extract_to = '/content/array_file'

import zipfile
with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
    zip_ref.extractall(directory_to_extract_to)



**efficientnet 모델**

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os 

df= pd.DataFrame()
df['file'] = os.listdir('/content/array_file/array') # 현재 위치의 array 폴더 안의 모든 파일 이름 가져오기
df['tag'] = df['file'].map(lambda x: x[0]) # 파일 이름의 첫글자가 tag


from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
                                        df['file'].astype('str'), 
                                        pd.get_dummies(df['tag']).astype('int32'), 
                                        test_size=0.2, 
                                        shuffle=True, 
                                        stratify=df['tag']
                                    )

def data_generator(X, y):
    def gen():
        file_ids = X
        labels = y.to_numpy()
        #reidx = random.sample(list(range(len(y))), k=len(y))
        #song_ids = song_ids[reidx]
        #y = y[reidx]
        for file_id, label in zip(file_ids, labels):
            pth=f"/content/array_file/array/{file_id}" 
            m = np.load(pth).astype('float32')
            mel = m.reshape(40, 3000, 1)
            yield mel, label
              
            '''
            if m.shape[1] < 1876: # zero padding
                pad_len = 1876 - mel.shape[1]
                pad = np.zeros((48, pad_len))
                m = np.concatenate([m, pad], axis=1) 
            yield np.expand_dims(m, axis=2), label
            '''
    return gen

import tensorflow as tf
DATA_SHAPE = (40, 3000, 1)
ds_train = tf.data.Dataset.from_generator(
    data_generator(X_train, y_train), 
    output_signature=(
        tf.TensorSpec(shape=DATA_SHAPE, dtype='float32'),
        tf.TensorSpec(shape=(8), dtype='int32')
     )
)
ds_val = tf.data.Dataset.from_generator(
    data_generator(X_val, y_val), 
    output_signature=(
        tf.TensorSpec(shape=DATA_SHAPE, dtype='float32'),
        tf.TensorSpec(shape=(8), dtype='int32')
     )
)

AUTOTUNE = tf.data.experimental.AUTOTUNE
ds_train = (
    ds_train
    .batch(16)
    #.cache()
    .prefetch(buffer_size=AUTOTUNE)
)

ds_val = (
    ds_val
    .batch(16)
    #.cache()
    .prefetch(buffer_size=AUTOTUNE)
)

from keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout


efficientnet = tf.keras.applications.efficientnet.EfficientNetB0(
    include_top=False,
    weights=None,
    #classes=8,
    pooling='avg',
    input_shape=DATA_SHAPE)

model_eff = Sequential([
    efficientnet,
    Dropout(0.5),
    Dense(8, activation='softmax', kernel_initializer=tf.keras.initializers.HeNormal())])

model_eff.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

early = tf.keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)
model_eff.fit(ds_train, validation_data=ds_val, epochs=15, callbacks=[early])

model_eff.save_weights('/content/drive/MyDrive/youtube_music/youtube_30')

#model = model_eff.load_weights('/content/drive/MyDrive/youtube_music')

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15


**데이터프레임 저장**

In [None]:
import pandas as pd
df.to_csv("/content/drive/MyDrive/youtube_music/youtube_30/youtube_30.csv")

**recall, precision, f1**


In [None]:
from keras import backend as K

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

# compile the model
model_eff.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc',f1_m,precision_m, recall_m])

batch_size=16
verbose=1
_loss, _acc, _precision, _recall, _f1score = model_eff.evaluate(ds_val, batch_size=batch_size, verbose=verbose)
print('loss: {:.3f}, accuracy: {:.3f}, precision: {:.3f}, recall: {:.3f}, f1score: {:.3f}'.format(_loss, _acc, _precision, _recall, _f1score))

loss: 0.788, accuracy: 0.715, precision: 0.705, recall: 0.800, f1score: 0.634
