In [23]:
import librosa 
import librosa.display
import numpy as np
import pandas as pd
from pathlib import Path
import seaborn as sns
from tensorflow import keras 
from tensorflow.keras.preprocessing import image_dataset_from_directory, image 
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, GlobalAveragePooling2D, InputLayer,Dropout
from tensorflow.keras.applications import VGG19
from sklearn.utils.class_weight import compute_class_weight

In [10]:
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

In [3]:
path = Path.cwd()/'data'
answers = pd.read_csv('answers.csv')
images_directory = path/'train_images'

In [7]:
cb_training = image_dataset_from_directory(images_directory, labels='inferred', image_size=(97,97), subset='training', validation_split=.2, seed=10)
cb_validation = image_dataset_from_directory(images_directory, labels='inferred', image_size=(97,97), subset='validation', validation_split=.2, seed=10)

Found 30000 files belonging to 2 classes.
Using 24000 files for training.
Found 30000 files belonging to 2 classes.
Using 6000 files for validation.


# Review 
## Terrible Accuracy 
So the accuracy is terrible. Why? 
* I adjusted the input 
* I adjusted the class weights 
* Transfer Learning - maybe this didnt actually help? 

## Checking What Happened 
We need to figure out why the accuracy dropped so much. So lets go back to the original CNN model with the new spectrograms, and rerun it. 
### Experiment 1: Run model w/ original class weights, but new spectrograms 
In notebook 3, we were getting .9 accuracy, even if we were overfitting grossly. If its the new spectrograms, then running that model w/ other parameters the same should drop the accuracy like a rock. 

### Experiment 2: The class Weights 
If the new spectrograms dont make the difference, then we also adjusted the class weights. Run the original model with the new class weights, and see if the accuracy drops.  

### Experiment 3: Dense Layers w/ original CNN output
Compare dense layers from the transfer learning from notebook 4 w/ dense layers from notebook 3, using the original sigmoid output layer. Perhaps they are too large. Additionally, the activation function is softmax instead of sigmoid. Using VGG19 + dense layers from my original CNN and original output layer, can we compare it to the VGG19+dense layers from notebook 4 

### Experiment 4: The output layer
The tropical paper uses softmax, whereas my model uses sigmoid. Since we only have 2 classes, maybe sigmoid would work better? Using the better(by loss) model from experiment 3, try both. 

# Below Is the Original CNN Model
The input size is (64, 64, 3)  
We have changed to (97, 97, 3) so I will make that adjustment. Everything else is the same

In [5]:
cnn = Sequential()
cnn.add(InputLayer(input_shape=(97,97, 3)))
cnn.add(Conv2D(filters=5, kernel_size=3, activation='relu', padding='same'))
cnn.add(MaxPooling2D())
cnn.add(Conv2D(filters=10, kernel_size=3, activation='relu', padding='same'))
cnn.add(MaxPooling2D())
cnn.add(Conv2D(filters=20, kernel_size=3, activation='relu', padding='same'))
cnn.add(MaxPooling2D())
cnn.add(Conv2D(filters=30, kernel_size=3, activation='relu', padding='same'))
cnn.add(MaxPooling2D())
cnn.add(Conv2D(filters=40, kernel_size=3, activation='relu', padding='same'))
cnn.add(GlobalAveragePooling2D())

cnn.add(layers.Dense(20, activation='relu'))
cnn.add(layers.Dense(100, activation='relu'))
cnn.add(layers.Dense(100, activation='relu'))

cnn.add(layers.Dense(1, activation='sigmoid'))
cnn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 97, 97, 5)         140       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 48, 48, 5)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 48, 48, 10)        460       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 24, 24, 10)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 24, 24, 20)        1820      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 12, 12, 20)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 12, 12, 30)        5

# Experiment 1 - Was it the Spectrograms? 

In [8]:
METRICS = [
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'), 
      keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
      keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]
cnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=METRICS)
class_weight = {0: .5, 1:1}
history = cnn.fit(cb_training, epochs=10,validation_data=cb_validation, class_weight = class_weight)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Analysis 
Below is the historical output from running this model in Notebook 3 on the slightly different histrograms. 
Already we see that the output is comprable - perhaps slightly lower on the starting accuracy, but in the correct vicinity for sure. Accuracy is sitting at .9, historical was .92. Precision and recall are pretty high too, with precision being a little bit lower - this matches the historical pattern as well. We also see that the validation is performing worse because we're overfitting - this is expected, and matches historicals well.  
While the spectrograms might be responsible for a small percentage of accuracy drop, we're not seeing the severe drop we saw in the transfer learning. 

### Conclusion: Likely not the Adjusted Spectrogram and Size
```
Epoch 1/50
750/750 [==============================] - 38s 47ms/step - loss: 0.1075 - tp: 5201.0000 - fp: 1342.0000 - tn: 17030.0000 - fn: 427.0000 - accuracy: 0.9263 - precision: 0.7949 - recall: 0.9241 - auc: 0.9790 - prc: 0.9294 - val_loss: 0.2400 - val_tp: 1282.0000 - val_fp: 505.0000 - val_tn: 4096.0000 - val_fn: 117.0000 - val_accuracy: 0.8963 - val_precision: 0.7174 - val_recall: 0.9164 - val_auc: 0.9654 - val_prc: 0.8937
Epoch 2/50
750/750 [==============================] - 34s 46ms/step - loss: 0.1047 - tp: 5217.0000 - fp: 1287.0000 - tn: 17085.0000 - fn: 411.0000 - accuracy: 0.9293 - precision: 0.8021 - recall: 0.9270 - auc: 0.9802 - prc: 0.9331 - val_loss: 0.2090 - val_tp: 1211.0000 - val_fp: 368.0000 - val_tn: 4233.0000 - val_fn: 188.0000 - val_accuracy: 0.9073 - val_precision: 0.7669 - val_recall: 0.8656 - val_auc: 0.9651 - val_prc: 0.8896
Epoch 3/50
750/750 [==============================] - 32s 43ms/step - loss: 0.1028 - tp: 5250.0000 - fp: 1288.0000 - tn: 17084.0000 - fn: 378.0000 - accuracy: 0.9306 - precision: 0.8030 - recall: 0.9328 - auc: 0.9804 - prc: 0.9331 - val_loss: 0.2577 - val_tp: 1307.0000 - val_fp: 606.0000 - val_tn: 3995.0000 - val_fn: 92.0000 - val_accuracy: 0.8837 - val_precision: 0.6832 - val_recall: 0.9342 - val_auc: 0.9640 - val_prc: 0.8871
Epoch 4/50
750/750 [==============================] - 30s 40ms/step - loss: 0.0993 - tp: 5257.0000 - fp: 1227.0000 - tn: 17145.0000 - fn: 371.0000 - accuracy: 0.9334 - precision: 0.8108 - recall: 0.9341 - auc: 0.9819 - prc: 0.9380 - val_loss: 0.2320 - val_tp: 1264.0000 - val_fp: 467.0000 - val_tn: 4134.0000 - val_fn: 135.0000 - val_accuracy: 0.8997 - val_precision: 0.7302 - val_recall: 0.9035 - val_auc: 0.9648 - val_prc: 0.8953
Epoch 5/50
750/750 [==============================] - 36s 47ms/step - loss: 0.0970 - tp: 5276.0000 - fp: 1235.0000 - tn: 17137.0000 - fn: 352.0000 - accuracy: 0.9339 - precision: 0.8103 - recall: 0.9375 - auc: 0.9827 - prc: 0.9403 - val_loss: 0.2161 - val_tp: 1232.0000 - val_fp: 406.0000 - val_tn: 4195.0000 - val_fn: 167.0000 - val_accuracy: 0.9045 - val_precision: 0.7521 - val_recall: 0.8806 - val_auc: 0.9638 - val_prc: 0.8876
Epoch 6/50
750/750 [==============================] - 30s 40ms/step - loss: 0.0952 - tp: 5309.0000 - fp: 1263.0000 - tn: 17109.0000 - fn: 319.0000 - accuracy: 0.9341 - precision: 0.8078 - recall: 0.9433 - auc: 0.9831 - prc: 0.9399 - val_loss: 0.2231 - val_tp: 1269.0000 - val_fp: 463.0000 - val_tn: 4138.0000 - val_fn: 130.0000 - val_accuracy: 0.9012 - val_precision: 0.7327 - val_recall: 0.9071 - val_auc: 0.9652 - val_prc: 0.8932
Epoch 7/50
750/750 [==============================] - 31s 42ms/step - loss: 0.0922 - tp: 5327.0000 - fp: 1235.0000 - tn: 17137.0000 - fn: 301.0000 - accuracy: 0.9360 - precision: 0.8118 - recall: 0.9465 - auc: 0.9840 - prc: 0.9448 - val_loss: 0.2125 - val_tp: 1241.0000 - val_fp: 387.0000 - val_tn: 4214.0000 - val_fn: 158.0000 - val_accuracy: 0.9092 - val_precision: 0.7623 - val_recall: 0.8871 - val_auc: 0.9643 - val_prc: 0.8929
Epoch 8/50
750/750 [==============================] - 36s 48ms/step - loss: 0.0911 - tp: 5319.0000 - fp: 1178.0000 - tn: 17194.0000 - fn: 309.0000 - accuracy: 0.9380 - precision: 0.8187 - recall: 0.9451 - auc: 0.9845 - prc: 0.9453 - val_loss: 0.2419 - val_tp: 1274.0000 - val_fp: 486.0000 - val_tn: 4115.0000 - val_fn: 125.0000 - val_accuracy: 0.8982 - val_precision: 0.7239 - val_recall: 0.9107 - val_auc: 0.9648 - val_prc: 0.8852
Epoch 9/50
750/750 [==============================] - 32s 43ms/step - loss: 0.0866 - tp: 5317.0000 - fp: 1098.0000 - tn: 17274.0000 - fn: 311.0000 - accuracy: 0.9413 - precision: 0.8288 - recall: 0.9447 - auc: 0.9859 - prc: 0.9502 - val_loss: 0.2211 - val_tp: 1175.0000 - val_fp: 322.0000 - val_tn: 4279.0000 - val_fn: 224.0000 - val_accuracy: 0.9090 - val_precision: 0.7849 - val_recall: 0.8399 - val_auc: 0.9597 - val_prc: 0.8879
Epoch 10/50
750/750 [==============================] - 31s 42ms/step - loss: 0.0849 - tp: 5354.0000 - fp: 1074.0000 - tn: 17298.0000 - fn: 274.0000 - accuracy: 0.9438 - precision: 0.8329 - recall: 0.9513 - auc: 0.9862 - prc: 0.9498 - val_loss: 0.2267 - val_tp: 1251.0000 - val_fp: 428.0000 - val_tn: 4173.0000 - val_fn: 148.0000 - val_accuracy: 0.9040 - val_precision: 0.7451 - val_recall: 0.8942 - val_auc: 0.9634 - val_prc: 0.8893
Epoch 11/50
750/750 [==============================] - 31s 41ms/step - loss: 0.0823 - tp: 5357.0000 - fp: 1067.0000 - tn: 17305.0000 - fn: 271.0000 - accuracy: 0.9442 - precision: 0.8339 - recall: 0.9518 - auc: 0.9871 - prc: 0.9529 - val_loss: 0.2571 - val_tp: 1226.0000 - val_fp: 432.0000 - val_tn: 4169.0000 - val_fn: 173.0000 - val_accuracy: 0.8992 - val_precision: 0.7394 - val_recall: 0.8763 - val_auc: 0.9587 - val_prc: 0.8802
Epoch 12/50
750/750 [==============================] - 36s 48ms/step - loss: 0.0791 - tp: 5378.0000 - fp: 995.0000 - tn: 17377.0000 - fn: 250.0000 - accuracy: 0.9481 - precision: 0.8439 - recall: 0.9556 - auc: 0.9881 - prc: 0.9577 - val_loss: 0.2766 - val_tp: 1255.0000 - val_fp: 478.0000 - val_tn: 4123.0000 - val_fn: 144.0000 - val_accuracy: 0.8963 - val_precision: 0.7242 - val_recall: 0.8971 - val_auc: 0.9568 - val_prc: 0.8694
Epoch 13/50
750/750 [==============================] - 31s 41ms/step - loss: 0.0798 - tp: 5390.0000 - fp: 1036.0000 - tn: 17336.0000 - fn: 238.0000 - accuracy: 0.9469 - precision: 0.8388 - recall: 0.9577 - auc: 0.9877 - prc: 0.9558 - val_loss: 0.2448 - val_tp: 1179.0000 - val_fp: 343.0000 - val_tn: 4258.0000 - val_fn: 220.0000 - val_accuracy: 0.9062 - val_precision: 0.7746 - val_recall: 0.8427 - val_auc: 0.9558 - val_prc: 0.8767
Epoch 14/50
750/750 [==============================] - 30s 40ms/step - loss: 0.0742 - tp: 5418.0000 - fp: 969.0000 - tn: 17403.0000 - fn: 210.0000 - accuracy: 0.9509 - precision: 0.8483 - recall: 0.9627 - auc: 0.9890 - prc: 0.9599 - val_loss: 0.2540 - val_tp: 1223.0000 - val_fp: 385.0000 - val_tn: 4216.0000 - val_fn: 176.0000 - val_accuracy: 0.9065 - val_precision: 0.7606 - val_recall: 0.8742 - val_auc: 0.9578 - val_prc: 0.8803
```

# Experiment 2: The Class Weights 
We also adjusted the class weights in the transfer learning notebook. Since this alters the importance of different classes, we want to make sure that this is not the cause of the dropped accuracy. 


In [12]:
xx =compute_class_weight(class_weight='balanced',classes=np.unique(answers.label), y=answers.label)
class_weight = dict(zip(np.unique(answers.label), xx))
print(class_weight)

history = cnn.fit(cb_training, epochs=5,validation_data=cb_validation, class_weight = class_weight)

{0: 0.652940408305402, 1: 2.1346235947061336}
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## Analysis 
Accuracy is high, not the class weights. 

# Experiment 3: Smaller Layers 
I used a relatively large dropout, and big dense layers, since thats what I saw in the tropical dataset. However, perhaps that is not what we want? 
Lets compare 2 models:   
VGG19 + dense layers from the original CNN  + output using sigmoid 
and  
VGG19 + dense layers from notebook 4, without the Dropout layers + output using sigmoid

In [18]:
# Dense layers original CNN 

base_model = VGG19(weights='imagenet', include_top=False, input_shape=(97,97,3))
for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = Flatten()(x)

x = Dense(20, activation='relu')(x)
x = Dense(100, activation='relu')(x)
x = Dense(100, activation='relu')(x)

predictions = Dense(1, activation='sigmoid')(x)

model =  Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='adam',loss='binary_crossentropy', metrics=METRICS)

history = model.fit(cb_training, epochs=5,validation_data=cb_validation, class_weight = class_weight)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [19]:
# Dense Layers from notebook 4 
base_model = VGG19(weights='imagenet', include_top=False, input_shape=(97,97,3))
for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = Flatten()(x)

x = Dense(100, activation='relu')(x)
x = Dense(100, activation='relu')(x)

predictions = Dense(1, activation='sigmoid')(x)

model =  Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='adam',loss='binary_crossentropy', metrics=METRICS)

history = model.fit(cb_training, epochs=2,validation_data=cb_validation, class_weight = class_weight)

Epoch 1/2
Epoch 2/2


## Analysis 
The accuracy and other metrics are similar between the two model instances, so it looks like the cause of the drop is not due to the difference in the dense layers.  
Also the metrics are so similar, I would have trouble choosing the model based on so few epochs. 

# Experiment 4 Output 
Try softmax as the final layer, using the same parameters as the previous experiment besides that. 


In [20]:
# Softmax 
base_model = VGG19(weights='imagenet', include_top=False, input_shape=(97,97,3))
for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = Flatten()(x)

x = Dense(100, activation='relu')(x)
x = Dense(100, activation='relu')(x)

predictions = Dense(1, activation='softmax')(x)

model =  Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='adam',loss='binary_crossentropy', metrics=METRICS)

history = model.fit(cb_training, epochs=2,validation_data=cb_validation, class_weight = class_weight)

Epoch 1/2
Epoch 2/2


## Analysis 
There it is! Theres the drop in accuracy! 
Softmax causes a massive drop in accuracy, as well as weirdness in the validation true positive and negative counts.  
## I've made errors in Notebook 4 
Softmax is multiclass - Im operating on a binary problem. This wouldnt be a problem, since 2 classes is still  multiple classes. My output, however, is not in the one-hot-encoding format, (it is in 1 column), so trying to give dense 2 classes was giving an error, and I, in a very silly way, classified stuff as 1 class.  
Since we're in a binary problem, Sigmoid is good to use. 
https://medium.com/arteos-ai/the-differences-between-sigmoid-and-softmax-activation-function-12adee8cf322 

# Experiment 5 Dropout 
Since the original CNN overfit, I wanted to add a dropout layer. The tropical classification paper I've been referencing also has a dropout (rather large, .5). I set mine slightly lower - at .3. Lets give it a try. 


In [24]:
base_model = VGG19(weights='imagenet', include_top=False, input_shape=(97,97,3))
for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = Flatten()(x)

x = Dense(100, activation='relu')(x)
x = Dropout(.3)(x)
x = Dense(100, activation='relu')(x)
x = Dropout(.3)(x)

predictions = Dense(1, activation='sigmoid')(x)

model =  Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='adam',loss='binary_crossentropy', metrics=METRICS)

history = model.fit(cb_training, epochs=2,validation_data=cb_validation, class_weight = class_weight)

Epoch 1/2
Epoch 2/2


# Train Again

https://upcommons.upc.edu/bitstream/handle/2117/175744/131673.pdf?sequence=1&isAllowed=y

2 dense layers seems to be the standard used in both the overview above, as well as the Tropical Classification. 
Trying a bit of a larger layer, but with dropouts 

In [None]:
base_model = VGG19(weights='imagenet', include_top=False, input_shape=(97,97,3))
for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = Flatten()(x)

x = Dense(500, activation='relu')(x)
x = Dropout(.5)(x)
x = Dense(500, activation='relu')(x)
x = Dropout(.5)(x)


predictions = Dense(1, activation='sigmoid')(x)

model =  Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='adam',loss='binary_crossentropy', metrics=METRICS)

history = model.fit(cb_training, epochs=30,validation_data=cb_validation, class_weight = class_weight)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30


Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
169/750 [=====>........................] - ETA: 13:32 - loss: 0.3645 - tp: 1216.0000 - fp: 986.0000 - tn: 3140.0000 - fn: 66.0000 - accuracy: 0.8055 - precision: 0.5522 - recall: 0.9485 - auc: 0.8820 - prc: 0.5743