## Imports

In [1]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers 
from tensorflow.keras.models import Sequential, Model
from  matplotlib import pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
import cv2


## Selected Data & their (sub)directories
15 species

In [62]:
# The list of chosen birds

birds = 'OKINAWA RAIL, NORTHERN PARULA, OVENBIRD, SUPERB STARLING, WALL CREAPER, RED NAPED TROGON, NORTHERN JACANA, MAGPIE GOOSE, IVORY GULL, KOOKABURRA, KILLDEAR, PARADISE TANAGER, RED BELLIED PITTA, RUDY KINGFISHER, STRAWBERRY FINCH'
birds = birds.split(', ')
birds = sorted(birds)

# birds = ['IVORY GULL', 'KILLDEAR', 'KOOKABURRA', ...]
# len(birds) = 15
len(birds)

15

In [63]:
# List of directories of all of the 
birds

DIR = 'train'
subnames = [name for name in os.listdir(DIR) if os.path.isdir(os.path.join(DIR, name))]

# subanames = [ABBOTTS BABBLER, ABBOTS BOOBY, ...]
# len(subnames) = 400

In [64]:
# List of our chosen species' subdirectories

trainDIR = 'train/'
trainsubs = [trainDIR+str(bird)+'/' for bird in birds]

testDIR = 'test/'
testsubs = [testDIR+str(bird)+'/' for bird in birds]

validDIR = 'valid/'
validsubs = [validDIR+str(bird)+'/' for bird in birds]


# trainsubs = ['train/OKINAWA RAIL/',
#                       ...
#            'train/STRAWBERRY FINCH/']

# testsubs = ['test/OKINAWA RAIL/',
#                       ...
#            'test/STRAWBERRY FINCH/']

# validsubs = ['valid/OKINAWA RAIL/',
#                       ...
#            'valid/STRAWBERRY FINCH/']


In [65]:
# Creating a list of indices for our 15 selected birds, sorted.

indices = []
for i in range(len(subnames)):
    if subnames[i] in birds:
        indices.append(i)
        
# indices = [236, 245, 248, ...]
# len(indices) = 15

## Train/Test Data

In [66]:
# pixel size
pixels = 100

In [51]:
# Creating the list of arrays for our train data. They get normalized.

trainimglist = []
trainindlist = []
for i in range(len(trainsubs)):
    jpgs = next(os.walk(trainsubs[i]))[2][:120] #list of files in the subdir
    for jpg in jpgs:
        img_array = cv2.imread(os.path.join(trainsubs[i],jpg), cv2.IMREAD_COLOR)
        new_array = cv2.resize(img_array, (pixels, pixels))/255.
        trainimglist.append(new_array)
        trainindlist.append(i)
        
        img_array = cv2.imread(os.path.join(trainsubs[i],jpg))
        
        gray = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)
        new_gray = cv2.resize(img_array, (pixels, pixels))/255.
        trainimglist.append(new_gray)
        trainindlist.append(i)
        
        flip = cv2.flip(img_array, 0)
        new_flip = cv2.resize(flip, (pixels, pixels))/255.
        trainimglist.append(new_flip)
        trainindlist.append(i)
        
# len(trainimglist) = 5400            # The list of img arrays
# len(trainindlist) = 5400            # The list of indices associated with each array of same index in trainimglist
len(trainimglist), len(trainindlist)

(5400, 5400)

In [52]:
# Creating the list of arrays for our test data. They get normalized, too.

testimglist = []
testindlist = []
for i in range(len(testsubs)):
    jpgs = next(os.walk(testsubs[i]))[2] #list of files in the subdir
    for jpg in jpgs:
        img_array = cv2.imread(os.path.join(testsubs[i],jpg), cv2.IMREAD_COLOR)
        new_array = cv2.resize(img_array, (pixels, pixels))/255.
        testimglist.append(new_array)
        testindlist.append(i)
# len(testimglist) = 75            # The list of img arrays
# len(testindlist) = 75            # The list of indices associated with each array of same index in imglist

In [69]:
# Creating the list of arrays for our validation data. They get normalized, too.

validimglist = []
validindlist = []
for i in range(len(validsubs)):
    jpgs = next(os.walk(validsubs[i]))[2] #list of files in the subdir
    for jpg in jpgs:
        img_array = cv2.imread(os.path.join(validsubs[i],jpg), cv2.IMREAD_COLOR)
        new_array = cv2.resize(img_array, (pixels, pixels))/255.
        validimglist.append(new_array)
        validindlist.append(i)
        

In [54]:
# Changing our train/test data to numpy arrays

X_train = np.array(trainimglist)
y_train = np.array(trainindlist)
X_test = np.array(testimglist)
y_test = np.array(testindlist)
X_valid = np.array(testimglist)
y_valid = np.array(testindlist)

X_train.shape, y_train.shape, X_test.shape, y_test.shape, X_valid.shape, y_valid.shape


((5400, 100, 100, 3),
 (5400,),
 (75, 100, 100, 3),
 (75,),
 (75, 100, 100, 3),
 (75,))

## Current best model

In [56]:
# Current best model -- Use the next cells below to make changes and print new results while keeping these ones. Update 
# Delete these when we get a better one if desired.

from tensorflow import keras
from tensorflow.keras import layers
from keras.callbacks import EarlyStopping

X_train = np.array(trainimglist)
y_train = np.array(trainindlist)
X_test = np.array(testimglist)
y_test = np.array(testindlist)
X_valid = np.array(testimglist)
y_valid = np.array(testindlist)


num_classes = 15
input_shape = (pixels,pixels,3)

X_train = np.expand_dims(X_train, -1)
X_test = np.expand_dims(X_test, -1)
X_valid = np.expand_dims(X_valid, -1)
print("X_train shape:", X_train.shape)
print(X_train.shape[0], "train samples")
print(X_test.shape[0], "test samples")

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
y_valid = keras.utils.to_categorical(y_test, num_classes)
X_valid = np.expand_dims(X_valid, -1)

# 4 x [ Conv2D + MaxPooling2D ] layers
model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        
        layers.Conv2D(128, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        
        layers.Conv2D(256, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)), 
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax"),
    ]
)
model.summary()

X_train shape: (5400, 100, 100, 3, 1)
5400 train samples
75 test samples
Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_16 (Conv2D)          (None, 98, 98, 32)        896       
                                                                 
 max_pooling2d_16 (MaxPoolin  (None, 49, 49, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_17 (Conv2D)          (None, 47, 47, 64)        18496     
                                                                 
 max_pooling2d_17 (MaxPoolin  (None, 23, 23, 64)       0         
 g2D)                                                            
                                                                 
 conv2d_18 (Conv2D)          (None, 21, 21, 128)       73856     
                                               

In [57]:
batch_size = 100
epochs = 15
val_split = 0.05
early_stopping = [
    EarlyStopping(monitor='val_loss', patience=4), 
    EarlyStopping(monitor='accuracy', patience=4, min_delta=.02)
]

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=val_split, callbacks=None)
#Change val_split for more traindata

###############################

#  -> isntead of xtest, ytest, try X_valid, y_valid
score = model.evaluate(X_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Test loss: 0.5504311919212341
Test accuracy: 0.8799999952316284


In [71]:
model.save('ourbestmodel')

INFO:tensorflow:Assets written to: ourbestmodel\assets


In [72]:
loaded_model = tf.keras.models.load_model('ourbestmodel')
loaded_model

<keras.engine.sequential.Sequential at 0x2ac69f9de80>

In [70]:
X_valid = np.array(testimglist)
y_valid = np.array(testindlist)

y_valid = keras.utils.to_categorical(y_test, num_classes)
X_valid = np.expand_dims(X_valid, -1)

X_valid.shape, y_valid.shape


((75, 100, 100, 3, 1), (75, 15, 15))

In [73]:
score = loaded_model.evaluate(X_valid, y_valid)
score

## Here for some reason we did not succeed in laoding this model, but on the other uploaded file 
## (named 'Project_4_add-on'), we successfully validated on an older but less performant one.

###### At the end of the 'add-on' notebook, you should be able to see this:
## from tensorflow import keras
## model1 = keras.models.load_model('bestmodel')
## score = model1.evaluate(X_valid, y_valid, verbose = 0)
## score
##

## [0.4565109312534332, 0.9200000166893005]
##      LOSS              ACCURACY

ValueError: in user code:

    File "C:\Users\sampr\anaconda3\lib\site-packages\keras\engine\training.py", line 1525, in test_function  *
        return step_function(self, iterator)
    File "C:\Users\sampr\anaconda3\lib\site-packages\keras\engine\training.py", line 1514, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\sampr\anaconda3\lib\site-packages\keras\engine\training.py", line 1507, in run_step  **
        outputs = model.test_step(data)
    File "C:\Users\sampr\anaconda3\lib\site-packages\keras\engine\training.py", line 1473, in test_step
        self.compute_loss(x, y, y_pred, sample_weight)
    File "C:\Users\sampr\anaconda3\lib\site-packages\keras\engine\training.py", line 918, in compute_loss
        return self.compiled_loss(
    File "C:\Users\sampr\anaconda3\lib\site-packages\keras\engine\compile_utils.py", line 201, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "C:\Users\sampr\anaconda3\lib\site-packages\keras\losses.py", line 141, in __call__
        losses = call_fn(y_true, y_pred)
    File "C:\Users\sampr\anaconda3\lib\site-packages\keras\losses.py", line 245, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "C:\Users\sampr\anaconda3\lib\site-packages\keras\losses.py", line 1789, in categorical_crossentropy
        return backend.categorical_crossentropy(
    File "C:\Users\sampr\anaconda3\lib\site-packages\keras\backend.py", line 5083, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (None, 15, 15) and (None, 15) are incompatible
