In [None]:
import os
os.listdir('/kaggle/input')

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input/esc-animalsounds'):
    c = 0
    for filename in filenames:
        c += 1
        
    if c>0: 
        print("{} : {} images".format(dirname,c))
        print("--------------")

In [None]:
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras import backend as K

In [None]:
class SmallerVGGNet:
    @staticmethod
    def build(width, height, depth, classes, finalAct):
        # initialize the model along with the input shape to be
        # "channels last" and the channels dimension itself
        model = Sequential()
        inputShape = (height, width, depth)
        chanDim = -1
        # if we are using "channels first", update the input shape
        # and channels dimension
        if K.image_data_format() == "channels_first":
            inputShape = (depth, height, width)
            chanDim = 1
            
        # CONV => RELU => POOL
        model.add(Conv2D(32, (3, 3), padding="same", input_shape=inputShape))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(MaxPooling2D(pool_size=(3, 3)))
        model.add(Dropout(0.25))
        # (CONV => RELU) * 2 => POOL
        model.add(Conv2D(64, (3, 3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(Conv2D(64, (3, 3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))
        # (CONV => RELU) * 2 => POOL
        model.add(Conv2D(128, (3, 3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(Conv2D(128, (3, 3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))
        # first set of FC => RELU layers
        model.add(Flatten())
        model.add(Dense(1024))
        model.add(Activation("relu"))
        model.add(BatchNormalization())
        model.add(Dropout(0.5))
        # sigmoid activation for multi-label classification
        model.add(Dense(classes))
        model.add(Activation(finalAct))
        # return the constructed network architecture
        return model

In [None]:
import matplotlib
# import the necessary packages
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.preprocessing.image import img_to_array
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import random
import pickle
import cv2
import os

In [None]:
EPOCHS = 100
INIT_LR = 1e-3
BS = 30
IMAGE_DIMS = (150, 200, 3)

In [None]:
imagePaths = []
for dirname, _, filenames in os.walk(r'/kaggle/input/esc-animalsounds/'):
    for filename in filenames:
        imagePaths.append(os.path.join(dirname, filename))
        
imagePaths = sorted(imagePaths)
random.seed(42)
random.shuffle(imagePaths)

In [None]:
image = imagePaths[4]
image = cv2.imread(image)
image = cv2.resize(image, (IMAGE_DIMS[1], IMAGE_DIMS[0]))
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))
plt.show()

In [None]:
data = []
labels = []

for imagePath in imagePaths:
    image = cv2.imread(imagePath)
    image = cv2.resize(image, (IMAGE_DIMS[1], IMAGE_DIMS[0]))
    image = img_to_array(image)
    data.append(image)
    
    l = label = imagePath.split(os.path.sep)[-2]
    labels.append((l,))
    
labels[0:5]

In [None]:
print(len(data))
print(len(labels))

In [None]:
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)

In [None]:
mlb = MultiLabelBinarizer()
labels = mlb.fit_transform(labels)

print(len(mlb.classes_))
print(mlb.classes_)

In [None]:
(train_X, valid_X, train_y, valid_y) = train_test_split(data, labels, test_size=0.2, random_state=42)

In [None]:
model = SmallerVGGNet.build(
    width=IMAGE_DIMS[1], height=IMAGE_DIMS[0],
    depth=IMAGE_DIMS[2], classes=len(mlb.classes_),
    finalAct="sigmoid"
)

In [None]:
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)

model.compile(loss="binary_crossentropy", optimizer=opt,
    metrics=["accuracy"]
)

model.summary()

In [None]:
fit_results = model.fit(
    train_X, train_y,
    epochs = EPOCHS,
    batch_size = BS,
    validation_data = (valid_X, valid_y),
    verbose=1
)

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15,10))

ax1.plot(fit_results.history['loss'], label='Training')
ax1.plot(fit_results.history['val_loss'], label='Validation')
ax1.legend()
ax1.set_title('Loss')

ax2.plot(fit_results.history['accuracy'], label='Training')
ax2.plot(fit_results.history['val_accuracy'], label='Validation')
ax2.legend()
ax2.set_title('Accuracy')

In [None]:
model.save('AnimalSounds_SmallerVGGNet.h5')

f = open("AS_VGGN.pickle", "wb")
f.write(pickle.dumps(mlb))
f.close()