<a href="https://colab.research.google.com/github/thomasbhard/mic-model-retraining/blob/master/mic_model_update.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Retraining the Model with new recordings from the app

- Make sure the json file with the firebase keys is in the same directory as this notebook!

- Upload the h5 model you want to update and set the model filename

- Don't forget to download all the files for later use if you run this in a colab environment.


**Have fun & be safe**

In [0]:
# setup workspace
pip install firebase_admin

Collecting firebase_admin
[?25l  Downloading https://files.pythonhosted.org/packages/49/b1/ba41c23eb0f7895a4db5a03255bfeec0f54ee8b70374dc68ff2586cafaa5/firebase_admin-3.2.1-py2.py3-none-any.whl (82kB)
[K     |████                            | 10kB 21.7MB/s eta 0:00:01[K     |████████                        | 20kB 5.1MB/s eta 0:00:01[K     |████████████                    | 30kB 6.2MB/s eta 0:00:01[K     |███████████████▉                | 40kB 5.4MB/s eta 0:00:01[K     |███████████████████▉            | 51kB 5.7MB/s eta 0:00:01[K     |███████████████████████▉        | 61kB 6.7MB/s eta 0:00:01[K     |███████████████████████████▉    | 71kB 6.9MB/s eta 0:00:01[K     |███████████████████████████████▊| 81kB 7.2MB/s eta 0:00:01[K     |████████████████████████████████| 92kB 5.5MB/s 
Collecting google-cloud-storage>=1.18.0
[?25l  Downloading https://files.pythonhosted.org/packages/cd/6d/75c2a47af99d15aa8b4de4e66226c128e623f8c9d3e27a8588368ccc38fc/google_cloud_storage-1.25.0-

In [0]:
# filenames to load and store models
current_model = 'm1578220494_first=rel_batTrues12.h5'
new_model = 'retrained_model.h5'
new_model_lite = 'retrainded_model.tflite'

In [0]:
# setup firebase
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore


cred = credentials.Certificate('mic-training-firebase-adminsdk-upqap-539c749b5b.json')
firebase_admin.initialize_app(cred)

db = firestore.client()
print('Firebase connection successful!')

Firebase connection successful!


In [0]:
# Collect data
collection = db.collection(u'recordings')

recordings = collection.stream()

features = []
labels = []

for recording in recordings:
  data = recording.to_dict()
  samples = data['samples']
  label = data['label']
  # print(label)

  features.append(samples)
  labels.append(label)

print(str(len(features)) + ' features collected')
print(str(len(labels)) + ' labels collected')


385 features collected
385 labels collected


In [0]:
# Process features and labels
import numpy as np
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical

# change full instrument names to one_hot_encoded label vectors
instruments = ['cel', 'cla', 'flu', 'gac', 'gel', 'org', 'pia', 'sax', 'tru', 'vio']
fullnames = {'Cello': 'cel','Clarinet': 'cla','Flute': 'flu','Acustic Guitar': 'gac', 'Electric Guitar': 'gel','Organ': 'org', 'Piano': 'pia','Saxophone': 'sax','Trumpet': 'tru','Violin': 'vio'}

label_encoder = LabelEncoder()
label_encoder.fit(instruments)

labels_trans = [fullnames[label] for label in labels]
labels_enc = label_encoder.transform(labels_trans)
labels_one_hot = to_categorical(labels_enc, num_classes=10)

# scale each feature vector to np array between -1.0 and 1.0
features_np = []

for feature in features:
  feature_np = np.array(feature, dtype=np.float32)
  feature_max = np.max(np.abs(feature_np))
  if max != 0:
    feature_np /= feature_max
  features_np.append(feature_np)

print('Processed features and labels')

Using TensorFlow backend.


Processed features and labels




In [0]:
# create dataframe
import pandas as pd


df_features = pd.DataFrame(features_np)
df_features.columns = ['Sample ' + str(i) for i in range(2048)]

df_labels = pd.DataFrame(labels_one_hot)
df_labels.columns = instruments

assert len(df_features.index) == len(df_labels.index)

df = pd.concat([df_features, df_labels], axis=1, join='inner')

print(df.head())

In [0]:
# save csv
import time

outfilebase = time.strftime("%Y%m%d-%H%M%S", time.gmtime(time.time()))
outfilename = outfilebase + '-slice-2048.csv'

df.to_csv(outfilename)

In [0]:
# Prepare dataframe for training
df = df.sample(frac=1.0)

X = df.iloc[:, :-10]
X = np.expand_dims(X, axis=2)

Y = df.iloc[:, -10:].values

In [0]:
# update model
from tensorflow.keras.models import load_model

model = load_model(current_model)
model.train_on_batch(X, y=Y)
model.save(new_model)

In [0]:
# convert to tflite
import tensorflow as tf

converter = tf.lite.TFLiteConverter.from_keras_model_file(new_model)
tflite_model = converter.convert()

file = open(new_model_lite, 'wb' ) 
file.write(tflite_model)