In [2]:
import os
import math
import keras
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.layers import Input, Dense, Flatten, Lambda, Dropout, Activation, LSTM, GRU, \
        TimeDistributed, Convolution1D, MaxPooling1D, Convolution2D, MaxPooling2D, \
        BatchNormalization, GlobalAveragePooling1D, GlobalMaxPooling1D, concatenate, \
        ZeroPadding2D, Reshape,  GlobalAveragePooling2D, GlobalMaxPooling2D, AveragePooling2D
# from keras.layers.local import LocallyConnected1D
# from keras.layers.advanced_activations import ELU
from keras.optimizers import Adam, RMSprop
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping, CSVLogger, TensorBoard
from keras import backend as K
from keras.models import Model
from keras.models import load_model
from sklearn.model_selection import train_test_split
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
%matplotlib inline

# import tensorflow.compat.v1 as tf
# import keras.backend as KTF
# config = tf.ConfigProto()
# config.gpu_options.allow_growth=True   #不全部占满显存, 按需分配
# sess = tf.Session(config=config)
# tf.keras.backend.set_session(sess)


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [5]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()

In [6]:
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [7]:
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Set memory growth to true
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

In [8]:
#cannot import name 'merge' from 'keras.layers'
tf.config.experimental.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [9]:
!pip install tensorflow



In [10]:

def base_conv_block(num_conv_filters, kernel_size):
    def f(input_):
        x = BatchNormalization()(input_)
        x = Activation('relu')(x)
        out = Convolution2D(num_conv_filters, kernel_size, padding='same')(x)
        return out
    return f

In [11]:
def multi_scale_block(num_conv_filters):
    def f(input_):
        branch1x1 = base_conv_block(num_conv_filters, 1)(input_)

        branch3x3 = base_conv_block(num_conv_filters, 1)(input_)
        branch3x3 = base_conv_block(num_conv_filters, 3)(branch3x3)

        branch5x5 = base_conv_block(num_conv_filters, 1)(input_)
        branch5x5 = base_conv_block(num_conv_filters, 5)(branch5x5)

        branchpool = MaxPooling2D(pool_size=(3,3), strides=(1,1), padding='same')(input_)
        branchpool = base_conv_block(num_conv_filters, 1)(branchpool)

        out = concatenate([branch1x1,branch3x3,branch5x5,branchpool], axis=-1)
#         out = base_conv_block(num_conv_filters, 1)(out)
        return out
    return f

In [12]:
def dense_block(num_dense_blocks, num_conv_filters):
    def f(input_):
        x = input_
        for _ in range(num_dense_blocks):
            out = multi_scale_block(num_conv_filters)(x)
            x = concatenate([x, out], axis=-1)
        return x
    return f

In [13]:
def transition_block(num_conv_filters):
    def f(input_):
        x = BatchNormalization()(input_)
        x = Activation('relu')(x)
        x = Convolution2D(num_conv_filters, 1)(x)
        out = AveragePooling2D(pool_size=(2, 2), strides=(2, 2))(x)
        return out
    return f

In [14]:
def multi_scale_level_cnn(input_shape, num_dense_blocks, num_conv_filters, num_classes):
    model_input = Input(shape=input_shape)

    x = Convolution2D(num_conv_filters, 3, padding='same')(model_input)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(4, 1))(x)

    x = dense_block(num_dense_blocks, num_conv_filters)(x)
    x = transition_block(num_conv_filters)(x)

    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = GlobalAveragePooling2D()(x)

    model_output = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=model_input, outputs=model_output)

    return model

In [15]:
def process_data_for_conv2D(X, resize_shape=None):
    X_conv2D = []
    for sample in X:
        sample = np.reshape(sample, newshape=(sample.shape[0], sample.shape[1], 1))
        if resize_shape:
            sample = resize(sample, output_shape=resize_shape)
        X_conv2D.append(sample)
    return np.array(X_conv2D, dtype=np.float32)

def data_iter(X, y, batch_size):
    num_samples = X.shape[0]
    idx = list(range(num_samples))
    while True:
        for i in range(0, num_samples, batch_size):
            j = idx[i:min(i+batch_size, num_samples)]
            yield X[j, :], y[j, :]

In [16]:
def train_val_test_split(X, y, train_size, val_size, test_size):
    X_train, X_val_test, y_train, y_val_test = train_test_split(X, y, train_size=train_size, stratify=y)
    X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=test_size/(test_size + val_size), stratify=y_val_test)
    return X_train, y_train, X_val, y_val, X_test, y_test

In [17]:
# X_melspec = np.load('/share/音乐分类2/GTZAN/without_split_features/melspec_feature_2048.npy')
# y = np.load('/share/音乐分类2/GTZAN/onehot_labels.npy')
import numpy as np
from sklearn.preprocessing import OneHotEncoder
X_melspec = np.load('/content/drive/MyDrive/X_spectro.npy')
y = np.load('/content/drive/MyDrive/Y_spectro.npy')
y_one = OneHotEncoder().fit_transform(y.reshape(-1, 1))
# X_melspec = X_melspec.transpose(0,2,1)
X_melspec.shape
X_melspec = process_data_for_conv2D(X_melspec)
print(X_melspec.shape)
print(y_one.shape)

(540, 646, 128, 1)
(540, 10)


In [18]:
#check the architecture of the net
model = multi_scale_level_cnn(input_shape=(X_melspec.shape[1], X_melspec.shape[2], X_melspec.shape[3]),
                              num_dense_blocks=3, num_conv_filters=32, num_classes=10)
# model = get_multi_level_cnn_model_3(input_shape=(X_train.shape[1], X_train.shape[2], X_train.shape[3]), num_classes=10)
model.summary()

In [None]:
y_train

array([[0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [None]:
model_checkpoint

<keras.src.callbacks.ModelCheckpoint at 0x7a56a3a37100>

In [19]:
# Split the dataset into fixed training/validation (480 samples) and test (60 samples)
X_train_val = X_melspec[:-60]
y_train_val = y_one.toarray()[:-60]
X_test_fixed = X_melspec[-60:]
y_test_fixed = y_one.toarray()[-60:]

k_fold = 5
num_classes = 10

epochs = 100
batch_size = 2
lr = 0.01
file_name0 = 'GTZAN_model.keras'
path = '/content/drive/MyDrive/aime/logs/'
csv_name0 = 'GTZAN_csv.csv'
train_loss_record = []
train_acc_record = []
val_loss_record = []
val_acc_record = []
test_loss_record = []
test_acc_record = []

for i in range(k_fold):
    print('Start %d fold training' % (i + 1))
    # Split the training/validation data into train and validation (no test split)
    X_train, X_val,y_train, y_val = train_test_split(
        X_train_val, y_train_val,
        train_size=420/480
    )
    # Use the fixed test set
    X_test, y_test = X_test_fixed, y_test_fixed

    file_name = '/content/drive/MyDrive/aime/Extend/' + str(i) + '_fold_' + file_name0
    csv_path = path + str(i) + '_fold_' + csv_name0
    lr_change = ReduceLROnPlateau(monitor="loss", factor=0.5, patience=3, min_lr=0.000)
    model_checkpoint = ModelCheckpoint(file_name, monitor='val_accuracy', save_best_only=True, mode='max')
    early_stopping = EarlyStopping(monitor='loss', min_delta=0.01, patience=10, mode='min')
    csv_logger = CSVLogger(csv_path)
    callbacks = [lr_change, model_checkpoint, early_stopping, csv_logger]
    opt = Adam(learning_rate=lr)
    model = multi_scale_level_cnn(input_shape=(X_melspec.shape[1], X_melspec.shape[2], X_melspec.shape[3]),
                                  num_dense_blocks=3, num_conv_filters=32, num_classes=num_classes)
    model.compile(
        loss='categorical_crossentropy',
        metrics=['accuracy'],
        optimizer=opt)
    model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs,
              validation_data=(X_val, y_val), verbose=1,
              callbacks=callbacks)
    model_best = load_model(file_name)
    train_loss, train_acc = model_best.evaluate(X_train, y_train, batch_size=batch_size, verbose=0)
    val_loss, val_acc = model_best.evaluate(X_val, y_val, batch_size=batch_size, verbose=0)
    test_loss, test_acc = model_best.evaluate(X_test, y_test, batch_size=batch_size, verbose=0)

    train_loss_record.append(train_loss)
    train_acc_record.append(train_acc)
    val_loss_record.append(val_loss)
    val_acc_record.append(val_acc)
    test_loss_record.append(test_loss)
    test_acc_record.append(test_acc)
    print('\n\n%d fold train loss %.4f train acc %.4f, val loss %.4f val acc %.4f, test loss %.4f test acc %.4f\n\n' %
          (i + 1, train_loss, train_acc, val_loss, val_acc, test_loss, test_acc))

train_loss_avg = np.mean(np.array(train_loss_record))
train_acc_avg = np.mean(np.array(train_acc_record))
val_loss_avg = np.mean(np.array(val_loss_record))
val_acc_avg = np.mean(np.array(val_acc_record))
test_loss_avg = np.mean(np.array(test_loss_record))
test_acc_avg = np.mean(np.array(test_acc_record))
print('\n\n%d fold train loss avg %.4f train acc avg %.4f, val loss avg %.4f val acc avg %.4f, test loss avg %.4f test acc avg %.4f' %
      (k_fold, train_loss_avg, train_acc_avg, val_loss_avg, val_acc_avg, test_loss_avg, test_acc_avg))

Start 1 fold training
Epoch 1/100
[1m210/210[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 43ms/step - accuracy: 0.1693 - loss: 2.2996 - val_accuracy: 0.1500 - val_loss: 4.3671 - learning_rate: 0.0100
Epoch 2/100
[1m210/210[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 30ms/step - accuracy: 0.2320 - loss: 2.0696 - val_accuracy: 0.3500 - val_loss: 2.0070 - learning_rate: 0.0100
Epoch 3/100
[1m210/210[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 28ms/step - accuracy: 0.2514 - loss: 2.0262 - val_accuracy: 0.2667 - val_loss: 2.4755 - learning_rate: 0.0100
Epoch 4/100
[1m210/210[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 28ms/step - accuracy: 0.2464 - loss: 2.0063 - val_accuracy: 0.1333 - val_loss: 2.5234 - learning_rate: 0.0100
Epoch 5/100
[1m210/210[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 28ms/step - accuracy: 0.3250 - loss: 1.9228 - val_accuracy: 0.1667 - val_loss: 3.8405 - learning_rate: 0.0100
Epoch 6/100
[1m210/210[0m [32m━━━━

In [20]:
# prompt: Calculate mean and standard deviation for 0.7, 0.75, 0.667,0.667,0.7167

import numpy as np

data = [0.7, 0.75, 0.667, 0.667, 0.7167]

mean = np.mean(data)
std_dev = np.std(data)

print(f"Mean: {mean}")
print(f"Standard Deviation: {std_dev}")


Mean: 0.70014
Standard Deviation: 0.031485844438413894


### Previous possible duplicate runs

In [None]:
#without data argumatent
k_fold = 5
num_classes = 10


epochs = 100
batch_size = 2
lr = 0.01
file_name0 = 'GTZAN_model.keras'
path  = '/content/drive/MyDrive/aime/logs/'
csv_name0 = 'GTZAN_csv.csv'
train_loss_record = []
train_acc_record = []
val_loss_record = []
val_acc_record = []
test_loss_record = []
test_acc_record = []
for i in range(k_fold):
    print('Start %d fold training' % (i+1))
    X_train, y_train, X_val, y_val, X_test, y_test = train_val_test_split(X_melspec, y_one.toarray(), train_size=train_size,
                                                                          val_size=val_size, test_size=test_size)
    file_name = '/content/drive/MyDrive/aime/Extend/'+str(i)+'_fold_'+file_name0
#     log_path  = path+str(i)+'_fold_'+'tensorboard_log'
    csv_path  = path+str(i)+'_fold_'+ csv_name0
    lr_change = ReduceLROnPlateau(monitor="loss", factor=0.5, patience=3, min_lr=0.000)
    model_checkpoint = ModelCheckpoint(file_name, monitor='val_accuracy', save_best_only=True, mode='max')
    early_stopping = EarlyStopping(monitor='loss', min_delta=0.01, patience=10, mode='min')
    csv_logger = CSVLogger(csv_path)
#     tb_cb = TensorBoard(log_dir=log_path, write_images=1, histogram_freq=1)
    callbacks =[lr_change, model_checkpoint, early_stopping,csv_logger]
    opt = Adam(learning_rate=lr)
    model = multi_scale_level_cnn(input_shape=(X_melspec.shape[1], X_melspec.shape[2], X_melspec.shape[3]),
                              num_dense_blocks=3, num_conv_filters=32, num_classes=num_classes)
    model.compile(
                loss='categorical_crossentropy',
                metrics=['accuracy'],
                optimizer=opt)
    model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs,
              validation_data=(X_val, y_val), verbose=1,
              callbacks=callbacks)
    model_best = load_model(file_name)
    train_loss, train_acc = model_best.evaluate(X_train, y_train, batch_size=batch_size, verbose=0)
    val_loss, val_acc = model_best.evaluate(X_val, y_val, batch_size=batch_size, verbose=0)
    test_loss, test_acc = model_best.evaluate(X_test, y_test, batch_size=batch_size, verbose=0)

    train_loss_record.append(train_loss)
    train_acc_record.append(train_acc)
    val_loss_record.append(val_loss)
    val_acc_record.append(val_acc)
    test_loss_record.append(test_loss)
    test_acc_record.append(test_acc)
    print('\n\n%d fold train loss %.4f train acc %.4f, val loss %.4f val acc %.4f, test loss %.4f test acc %.4f\n\n' %
          (i+1, train_loss, train_acc, val_loss, val_acc, test_loss, test_acc))
train_loss_avg = np.mean(np.array(train_loss_record))
train_acc_avg = np.mean(np.array(train_acc_record))
val_loss_avg = np.mean(np.array(val_loss_record))
val_acc_avg = np.mean(np.array(val_acc_record))
test_loss_avg = np.mean(np.array(test_loss_record))
test_acc_avg = np.mean(np.array(test_acc_record))
print('\n\n%d fold train loss avg %.4f train acc avg %.4f, val loss avg %.4f val acc avg %.4f, test loss avg %.4f test acc avg %.4f' %
  (k_fold, train_loss_avg, train_acc_avg, val_loss_avg, val_acc_avg, test_loss_avg, test_acc_avg))

Start 1 fold training
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100

In [None]:

num_classes = 13

train_size = 0.8
val_size = 0.1
test_size = 0.1

epochs = 100
batch_size = 8
lr = 0.01
train_loss_record = []
train_acc_record = []
val_loss_record = []
val_acc_record = []
test_loss_record = []
test_acc_record = []
for i in range(k_fold):
    print('Start %d fold training' % (i+1))
    X_train, y_train, X_val, y_val, X_test, y_test = train_val_test_split(X_melspec, y, train_size=train_size,
                                                                          val_size=val_size, test_size=test_size)
    model_best = load_model(file_name)
    train_loss, train_acc = model_best.evaluate(X_train, y_train,batch_size=batch_size,verbose=0)
    val_loss, val_acc = model_best.evaluate(X_val, y_val, batch_size=batch_size,verbose=0)
    test_loss, test_acc = model_best.evaluate(X_test, y_test,batch_size=batch_size, verbose=0)

    train_loss_record.append(train_loss)
    train_acc_record.append(train_acc)
    val_loss_record.append(val_loss)
    val_acc_record.append(val_acc)
    test_loss_record.append(test_loss)
    test_acc_record.append(test_acc)
    print('\n\n%d fold train loss %.4f train acc %.4f, val loss %.4f val acc %.4f, test loss %.4f test acc %.4f\n\n' %
          (i+1, train_loss, train_acc, val_loss, val_acc, test_loss, test_acc))

train_loss_avg = np.mean(np.array(train_loss_record))
train_acc_avg = np.mean(np.array(train_acc_record))
val_loss_avg = np.mean(np.array(val_loss_record))
val_acc_avg = np.mean(np.array(val_acc_record))
test_loss_avg = np.mean(np.array(test_loss_record))
test_acc_avg = np.mean(np.array(test_acc_record))
print('\n\n%d fold train loss avg %.4f train acc avg %.4f, val loss avg %.4f val acc avg %.4f, test loss avg %.4f test acc avg %.4f' %
  (k_fold, train_loss_avg, train_acc_avg, val_loss_avg, val_acc_avg, test_loss_avg, test_acc_avg))

In [None]:
model = multi_scale_level_cnn(input_shape=(X_train.shape[1], X_train.shape[2], X_train.shape[3]),
                              num_dense_blocks=3, num_conv_filters=32, num_classes=10)
model.summary()


epochs = 100
batch_size = 8
opt = Adam(lr=0.0001)
lr_change = ReduceLROnPlateau(monitor="loss", factor=0.5, patience=2, min_lr=0.000)
train_data_iter = data_iter(X_train, y_train, batch_size)
test_data_iter = data_iter(X_test, y_test, batch_size)
model.compile(
            loss='categorical_crossentropy',
            metrics=['accuracy'],
            optimizer=opt)

for e in range(epochs):
    batchs = 0
    for X_batch, y_batch in train_data_iter:
        model.train_on_batch(X_batch, y_batch)
        batchs += 1
        if batchs >= len(X_train) / 32:
            break
    train_evaluation = model.evaluate(X_train, y_train, verbose=0)
    val_evaluation = model.evaluate(X_val, y_val, verbose=0)
    test_evaluation = model.evaluate(X_test, y_test, verbose=0)

    print('Epoch %d train_loss: %.4f train_acc: %.4f, val_loss: %.4f val_acc: %.4f, test_loss: %.4f, test_acc: %.4f' %
          (e+1, train_evaluation[0], train_evaluation[1], val_evaluation[0], val_evaluation[1], test_evaluation[0], test_evaluation[1]))