# Steps for Training
- Extract all the samples and generate spectrogram jpegs for each
- Train a CNN on the jpegs
- 

In [1]:
from utils import general_utils
import importlib
importlib.reload(general_utils)
import pandas as pd

Using TensorFlow backend.


In [2]:


train_files, train_targets, train_target_names = general_utils.load_dataset('data/nsynth-train-spectrograms')

print('info about training set')
print('number of samples: ', len(train_files))
print('categories are: ', sorted(set(train_target_names)))
#print(train_files[500:501])
#print(train_targets[500:501])
#print(train_target_names[500:501])

valid_files, valid_targets, valid_target_names = general_utils.load_dataset('data/nsynth-valid-spectrograms')

print('\ninfo about validation set')
print('number of samples: ', len(valid_files))
print('categories are: ', sorted(set(valid_target_names)))
#print(valid_files[500:501])
#print(valid_targets[500:501])
#print(valid_target_names[500:501])

# load the test data
test_files, test_targets, test_target_names = general_utils.load_dataset('data/nsynth-test-spectrograms')

print('\ninfo about test set')
print('number of samples: ', len(test_files))
print('categories are: ', sorted(set(test_target_names)))
#print(test_files[500:501])
#print(test_targets[500:501])
#print(test_target_names[500:501])

info about training set
number of samples:  283704
categories are:  ['bass', 'brass', 'flute', 'guitar', 'keyboard', 'mallet', 'organ', 'reed', 'string', 'vocal']

info about validation set
number of samples:  12678
categories are:  ['bass', 'brass', 'flute', 'guitar', 'keyboard', 'mallet', 'organ', 'reed', 'string', 'vocal']

info about test set
number of samples:  4096
categories are:  ['bass', 'brass', 'flute', 'guitar', 'keyboard', 'mallet', 'organ', 'reed', 'string', 'vocal']


In [3]:
#build dataframe that will be part of the image generator construction
#PART 1: zip the data

# merge the three arrays into an array of tupples
train_data = list(zip(train_files, train_targets, train_target_names))
valid_data = list(zip(valid_files, valid_targets, valid_target_names))
test_data = list(zip(test_files, test_targets, test_target_names))

In [4]:
# PART 2: to limit the size of the datasets, uncomment / change the code here

train_data = train_data[:100000]
valid_data = valid_data[:4300]
#test_data = test_data[:4300]

In [5]:
# PART 3: Create the dataframes

train_df = pd.DataFrame(train_data, columns = ['file_paths', 'targets', 'target_names'])
valid_df = pd.DataFrame(valid_data, columns = ['file_paths', 'targets', 'target_names'])
test_df = pd.DataFrame(test_data, columns = ['file_paths', 'targets', 'target_names'])

In [6]:
print('train shape', train_df.shape)
print('valid shape', valid_df.shape)
print('test shape', test_df.shape)

train shape (100000, 3)
valid shape (4300, 3)
test shape (4096, 3)


In [7]:
# create the data generator
from keras_preprocessing.image import ImageDataGenerator

datagen=ImageDataGenerator(rescale=1./255.,validation_split=0.25)

In [8]:
# make the training data generator
train_generator = datagen.flow_from_dataframe(
        dataframe=train_df,
        x_col='file_paths',
        y_col='target_names',
        batch_size=32,
        seed=69,
        shuffle=True,
        class_mode='categorical',
        target_size=(64,64)
    )


Found 100000 images belonging to 10 classes.


In [9]:
# make the validation data generator
valid_generator = datagen.flow_from_dataframe(
        dataframe=valid_df,
        x_col='file_paths',
        y_col='target_names',
        batch_size=32,
        seed=69,
        shuffle=True,
        class_mode='categorical',
        target_size=(64,64)
    )

Found 4300 images belonging to 10 classes.


In [10]:
# make the validation data generator
test_generator = datagen.flow_from_dataframe(
        dataframe=test_df,
        x_col='file_paths',
        y_col='target_names',
        batch_size=32,
        #seed=69,
        shuffle=True,
        class_mode='categorical',
        target_size=(64,64)
    )

Found 4096 images belonging to 10 classes.


In [11]:
# USEFULL THINGS

#to go through all of the files to make sure they're finable
#import os.path
#for f in valid_files:#valid_generator.filepaths:
#    if not os.path.exists(f):
#        print(f, 'does not exist')

# get all of the file paths in the data generator
#generator_files = set(valid_generator.filepaths)

# make sure all of the actual files are accounted for in the generator
#i = 0
#for f in valid_files:
#    if f not in generator_files:
#        print('N', f)
#        i+=1
    #else:
        #print('Y', f)
#print(i, 'files do not match')

In [12]:
# Inspired by https://medium.com/gradientcrescent/urban-sound-classification-using-convolutional-neural-networks-with-keras-theory-and-486e92785df4
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras import regularizers, optimizers
from tensorflow.keras.callbacks import ModelCheckpoint
import pandas as pd
import numpy as np

def create_model(show_summary=False):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), padding='same', input_shape=(64,64,3)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Conv2D(64, (3, 3), padding='same'))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.5))
    model.add(Conv2D(128, (3, 3), padding='same'))
    model.add(Activation('relu'))
    model.add(Conv2D(128, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))
    model.compile(optimizers.RMSprop(lr=0.0005, decay=1e-6),loss="categorical_crossentropy",metrics=["accuracy"])
    
    if show_summary:
        model.summary()
    
    return model

In [13]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
#STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
print('train_generator.n//train_generator.batch_size is {}//{}'.format(train_generator.n, train_generator.batch_size))
print('valid_generator.n//valid_generator.batch_size is {}//{}'.format(valid_generator.n, valid_generator.batch_size))
print('test_generator.n//test_generator.batch_size is {}//{}'.format(test_generator.n, test_generator.batch_size))
print('STEP_SIZE_TRAIN, STEP_SIZE_VALID, STEP_SIZE_TEST is {}, {}, {}'.format(STEP_SIZE_TRAIN, STEP_SIZE_VALID, STEP_SIZE_TEST))

train_generator.n//train_generator.batch_size is 100000//32
valid_generator.n//valid_generator.batch_size is 4300//32
test_generator.n//test_generator.batch_size is 4096//32
STEP_SIZE_TRAIN, STEP_SIZE_VALID, STEP_SIZE_TEST is 3125, 134, 128


In [14]:
#checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.v2.hdf5', 
#                               verbose=1, save_best_only=True)

#train_model = create_model(show_summary=True)
#train_model.fit_generator(generator=train_generator,
#                          steps_per_epoch=STEP_SIZE_TRAIN,
#                          validation_data=valid_generator,
#                          validation_steps=STEP_SIZE_VALID,
#                          epochs=10,
#                          callbacks=[checkpointer]
#)

#r_trained_orig = train_model.evaluate_generator(generator=valid_generator, steps=STEP_SIZE_VALID)

In [15]:
# first test untrained model
untrained_test_model = create_model()

score_untrained = untrained_test_model.evaluate_generator(test_generator, steps=STEP_SIZE_TEST)
print('Loss: ', score_untrained[0], ' Accuracy: ', score_untrained[1])

#for i, n in enumerate(test_generator.filenames):
#    print('file:', n, ' score: ', scores[i][0])


W0908 22:46:37.797879 140058644711232 deprecation.py:506] From /home/ec2-user/anaconda3/envs/magenta-gpu/lib/python3.6/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Loss:  2.3010107818990946  Accuracy:  0.036621094


In [27]:
## Testing the untrained model
importlib.reload(general_utils)

results = general_utils.run_prediction(untrained_test_model, test_generator, STEP_SIZE_TEST)

print('Total test records:', len(results))
print('Number of correct:', np.count_nonzero(results))


Total test records: 4096
Number of correct: 185


In [16]:
# next test the trained model
trained_test_model = create_model()
trained_test_model.load_weights('saved_models/weights.best.v1.hdf5')

score_trained = trained_test_model.evaluate_generator(test_generator, steps=STEP_SIZE_TEST)
print('Loss: ', score_trained[0], ' Accuracy: ', score_trained[1])



Loss:  0.9580376138910651  Accuracy:  0.6879883


In [26]:
## Testing the trained model
importlib.reload(general_utils)

results = general_utils.run_prediction(trained_test_model, test_generator, STEP_SIZE_TEST)

print('Total test records:', len(results))
print('Number of correct:', np.count_nonzero(results))


Total test records: 4096
Number of correct: 566


In [21]:
### to display some details about the above results

data_labels_dict = dict((data_labels[k],k) for k in data_labels.keys())
print('all possible labels:',data_labels_dict)

##### map the predictions to the labels for later display
test_labels_pred = [[i,data_labels_dict[i]] for i in test_y_pred]
test_labels_orig = [[i,data_labels_dict[i]] for i in test_y_labels]

test_filenames = [f.split('/')[-1] for f in test_generator.filenames]
[print(v[3],'original label',v[0],'\tfilename',v[1],'\tpredicted label',v[2]) for v in zip(test_labels_orig[:10], test_filenames[:10],test_labels_pred[:10],results[:10])]



all possible labels: {0: 'bass', 1: 'brass', 2: 'flute', 3: 'guitar', 4: 'keyboard', 5: 'mallet', 6: 'organ', 7: 'reed', 8: 'string', 9: 'vocal'}
True original label [0, 'bass'] 	filename bass_synthetic_098-049-025.jpg 	predicted label [0, 'bass']
False original label [4, 'keyboard'] 	filename keyboard_acoustic_004-033-127.jpg 	predicted label [1, 'brass']
False original label [4, 'keyboard'] 	filename keyboard_acoustic_004-031-050.jpg 	predicted label [5, 'mallet']
False original label [6, 'organ'] 	filename organ_electronic_028-101-050.jpg 	predicted label [4, 'keyboard']
False original label [8, 'string'] 	filename string_acoustic_057-060-100.jpg 	predicted label [4, 'keyboard']
False original label [4, 'keyboard'] 	filename keyboard_electronic_002-053-127.jpg 	predicted label [5, 'mallet']
True original label [0, 'bass'] 	filename bass_synthetic_098-048-025.jpg 	predicted label [0, 'bass']
False original label [8, 'string'] 	filename string_acoustic_056-055-100.jpg 	predicted label

[None, None, None, None, None, None, None, None, None, None]

In [18]:
# next test the model trained by NSynth (think I'll have to use a transfer learning approach)

###NOTE: Could not figure out how to use the ckpt checkpoint files to either evaluate or even use 
### . as the basis for transfer learning. I'd like to go back and do this, but could use some advice.

trained_nsynth_test_model = create_model()
#nsynth_checkpoint = tf.train.load_checkpoint('examples/model_files/model.ckpt-200000') #, latest_filename='model.ckpt-200000.index'
#print(nsynth_checkpoint)

### results of this was:
## NotImplementedError: Streaming restore not supported from name-based checkpoints when graph building. 
##   File a feature request if this limitation bothers you. 
##   As a workaround, consider either using tf.train.Checkpoint to load 
##   name-based checkpoints or enabling eager execution.

## I did find this. See link in the comment: https://github.com/tensorflow/magenta/issues/955

path = 'examples/model_files/model.ckpt-200000'
#cp = tf.train.Checkpoint(path)


trained_nsynth_test_model.load_weights(path)


r_trained = trained_nsynth_test_model.evaluate_generator(test_generator, steps=STEP_SIZE_TEST)



W0908 19:18:24.825218 140180489463616 deprecation.py:323] From /home/ec2-user/anaconda3/envs/magenta-gpu/lib/python3.6/site-packages/tensorflow/python/training/tracking/util.py:1200: NameBasedSaverStatus.__init__ (from tensorflow.python.training.tracking.util) is deprecated and will be removed in a future version.
Instructions for updating:
Restoring a name-based tf.train.Saver checkpoint using the object-based restore API. This mode uses global names to match variables, and so is somewhat fragile. It also adds new restore ops to the graph each time it is called when graph building. Prefer re-encoding training checkpoints in the object-based format: run save() on the object-based saver (the same one this message is coming from) and use that checkpoint in the future.


NotImplementedError: Streaming restore not supported from name-based checkpoints when graph building. File a feature request if this limitation bothers you. As a workaround, consider either using tf.train.Checkpoint to load name-based checkpoints or enabling eager execution.

In [None]:
# test the results of a few
p = untrained_test_model.predict_generator(test_generator, steps=STEP_SIZE_TEST, verbose=1)

In [None]:
#and display the first 10 predictions
p_indices = np.argmax(p, axis=1)

labels = (train_generator.class_indices)

labels = dict((v,k) for k,v in labels.items())

predictions = [labels[k] for k in p_indices]

print(predictions[0:9])