In [2]:
import pandas as pd
from sklearn.metrics import confusion_matrix

import tensorflow as tf
import tensorflow_datasets as tfds
import keras

### Start with loading the dataset

In [3]:
batch_size = 10
img_height = 369
img_width = 496

#images original being a directory of the dataset, each subdirectory of images_original is a class folder
#each class folder contains samples belonging to that class
data_dir = './images_original'

#since spectrograms contain no meaningful RGB information grayscaling doesnt have any information loss
train = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size,
  color_mode = 'grayscale')

val = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size,
  color_mode = 'grayscale')


Found 999 files belonging to 10 classes.
Using 800 files for training.
Found 999 files belonging to 10 classes.
Using 199 files for validation.


### Defining the model architecture

In [4]:
num_classes = 10

model = tf.keras.Sequential([
  tf.keras.layers.Rescaling(1./255),
  tf.keras.layers.Conv2D(32, 1, activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(16, 1, activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(8, 1, activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(num_classes)
])

In [None]:
#visualizde architecture
keras.utils.plot_model(model2, "cnn_from_scratch.png")

In [None]:
model.compile(
  optimizer='adam',
  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
  metrics=['accuracy'])

In [None]:
EPOCHS = 20
model.fit(
  train,
  validation_data = val,
  epochs=EPOCHS 
)
model.summary()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

history = fitted 
#change the earlier line that says model.fit(blah blah blah) to fitted = model.fit(blah blah blah)

epoch_list = list(range(1,len(history.history['accuracy'])+1))
plt.plot(epoch_list, history.history['accuracy'], label='Train Accuracy')
plt.plot(epoch_list, history.history['val_accuracy'], label='Validation Accuracy')
plt.ylabel('Accuracy Value')
plt.xlabel('Epoch')
plt.title('Accuracy')
plt.legend(loc="lower right")

In [None]:
results = model.evaluate(val, batch_size=128)

### Transfer Learning---using VGGish

In [5]:
from vggish import VGGish

import os
import vggish_params as params

WEIGHTS_PATH = "vggish_audioset_weights_without_fc2.h5"
#you will have to download the pretrained model manually
#it was too big for the github
EPOCHS = 20

base_model = VGGish(include_top=False)

base_model.trainable = False

last_layer = base_model.output
x = tf.keras.layers.Flatten()(last_layer)
x = tf.keras.layers.Dense(units = 10, activation = 'softmax')(x)

pre_trained_model = tf.keras.Model(inputs = base_model.inputs, outputs = x)

pre_trained_model.compile(
  optimizer='adam',
  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
  metrics=['accuracy'])

In [6]:
fitted_pre_trained = pre_trained_model.fit(train, validation_data = val, epochs = EPOCHS)

Epoch 1/20


  return dispatch_target(*args, **kwargs)


 4/80 [>.............................] - ETA: 2:59 - loss: 3.9533 - accuracy: 0.1250

KeyboardInterrupt: 

In [None]:
history = fitted_pre_trained 
#change the earlier line that says model.fit(blah blah blah) to fitted = model.fit(blah blah blah)

epoch_list = list(range(1,len(history.history['accuracy'])+1))
plt.plot(epoch_list, history.history['accuracy'], label='Train Accuracy')
plt.plot(epoch_list, history.history['val_accuracy'], label='Validation Accuracy')
plt.ylabel('Accuracy Value')
plt.xlabel('Epoch')
plt.title('Accuracy')
plt.legend(loc="lower right")

In [None]:
keras.utils.plot_model(model2, "pre_trained.png")

In [12]:
model2.layers

[<keras.engine.input_layer.InputLayer at 0x2014f3140d0>,
 <keras.layers.convolutional.conv2d.Conv2D at 0x2014df30790>,
 <keras.layers.pooling.max_pooling2d.MaxPooling2D at 0x2014df2c460>,
 <keras.layers.convolutional.conv2d.Conv2D at 0x2014df2c970>,
 <keras.layers.pooling.max_pooling2d.MaxPooling2D at 0x2014f37fc40>,
 <keras.layers.convolutional.conv2d.Conv2D at 0x2014e32f880>,
 <keras.layers.convolutional.conv2d.Conv2D at 0x2014f3b2910>,
 <keras.layers.pooling.max_pooling2d.MaxPooling2D at 0x2014f3b2ca0>,
 <keras.layers.convolutional.conv2d.Conv2D at 0x2014f3ac970>,
 <keras.layers.convolutional.conv2d.Conv2D at 0x2014f37f550>,
 <keras.layers.pooling.max_pooling2d.MaxPooling2D at 0x2014ce5c9d0>,
 <keras.layers.pooling.global_average_pooling2d.GlobalAveragePooling2D at 0x2014d6e60d0>,
 <keras.layers.reshaping.flatten.Flatten at 0x2014f314d90>,
 <keras.layers.core.dense.Dense at 0x2014df2ca60>]

In [9]:
for layer in model2.layers:
    print("weights:", len(layer.weights))
    print("trainable_weights:", len(layer.trainable_weights))
    print("non_trainable_weights:", len(layer.non_trainable_weights))

weights: 0
trainable_weights: 0
non_trainable_weights: 0
weights: 2
trainable_weights: 0
non_trainable_weights: 2
weights: 0
trainable_weights: 0
non_trainable_weights: 0
weights: 2
trainable_weights: 0
non_trainable_weights: 2
weights: 0
trainable_weights: 0
non_trainable_weights: 0
weights: 2
trainable_weights: 0
non_trainable_weights: 2
weights: 2
trainable_weights: 0
non_trainable_weights: 2
weights: 0
trainable_weights: 0
non_trainable_weights: 0
weights: 2
trainable_weights: 0
non_trainable_weights: 2
weights: 2
trainable_weights: 0
non_trainable_weights: 2
weights: 0
trainable_weights: 0
non_trainable_weights: 0
weights: 0
trainable_weights: 0
non_trainable_weights: 0
weights: 0
trainable_weights: 0
non_trainable_weights: 0
weights: 2
trainable_weights: 2
non_trainable_weights: 0
