<a href="https://colab.research.google.com/github/kunakl07/Killer-Whale-sound-classification/blob/master/1_Whale_sound_detection_first.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!git clone https://github.com/axiom-data-science/OrcaCNN-data

Cloning into 'OrcaCNN-data'...
remote: Enumerating objects: 68, done.[K
remote: Counting objects:   1% (1/68)   [Kremote: Counting objects:   2% (2/68)   [Kremote: Counting objects:   4% (3/68)   [Kremote: Counting objects:   5% (4/68)   [Kremote: Counting objects:   7% (5/68)   [Kremote: Counting objects:   8% (6/68)   [Kremote: Counting objects:  10% (7/68)   [Kremote: Counting objects:  11% (8/68)   [Kremote: Counting objects:  13% (9/68)   [Kremote: Counting objects:  14% (10/68)   [Kremote: Counting objects:  16% (11/68)   [Kremote: Counting objects:  17% (12/68)   [Kremote: Counting objects:  19% (13/68)   [Kremote: Counting objects:  20% (14/68)   [Kremote: Counting objects:  22% (15/68)   [Kremote: Counting objects:  23% (16/68)   [Kremote: Counting objects:  25% (17/68)   [Kremote: Counting objects:  26% (18/68)   [Kremote: Counting objects:  27% (19/68)   [Kremote: Counting objects:  29% (20/68)   [Kremote: Counting objects:  30% (21/6

In [0]:
cd /content/OrcaCNN-data

/content/OrcaCNN-data


In [0]:
rm /content/OrcaCNN-data/data/training/positive/class_labels.json

In [0]:
!pwd

/content/OrcaCNN-data


In [0]:

import librosa
import os
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
import numpy as np
from tqdm import tqdm

DATA_PATH = "/content/OrcaCNN-data/data/training/"


# Input: Folder Path
# Output: Tuple (Label, Indices of the labels, one-hot encoded labels)
def get_labels(path=DATA_PATH):
    labels = os.listdir(path)
    label_indices = np.arange(0, len(labels))
    return labels, label_indices, to_categorical(label_indices)


# Handy function to convert wav2mfcc
def wav2mfcc(file_path, max_len=11):
    wave, sr = librosa.load(file_path, mono=True, sr=None)
    wave = wave[::3]
    mfcc = librosa.feature.mfcc(wave, sr=16000)

    # If maximum length exceeds mfcc lengths then pad the remaining ones
    if (max_len > mfcc.shape[1]):
        pad_width = max_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')

    # Else cutoff the remaining parts
    else:
        mfcc = mfcc[:, :max_len]
    
    return mfcc


def save_data_to_array(path=DATA_PATH, max_len=11):
    labels, _, _ = get_labels(path)

    for label in labels:
        # Init mfcc vectors
        mfcc_vectors = []

        wavfiles = [path + label + '/' + wavfile for wavfile in os.listdir(path + '/' + label)]
        for wavfile in tqdm(wavfiles, "Saving vectors of label - '{}'".format(label)):
            mfcc = wav2mfcc(wavfile, max_len=max_len)
            mfcc_vectors.append(mfcc)
        np.save(label + '.npy', mfcc_vectors)


def get_train_test(split_ratio=0.6, random_state=42):
    # Get available labels
    labels, indices, _ = get_labels(DATA_PATH)

    # Getting first arrays
    X = np.load(labels[0] + '.npy')
    y = np.zeros(X.shape[0])

    # Append all of the dataset into one single array, same goes for y
    for i, label in enumerate(labels[1:]):
        x = np.load(label + '.npy')
        X = np.vstack((X, x))
        y = np.append(y, np.full(x.shape[0], fill_value= (i + 1)))

    assert X.shape[0] == len(y)

    return train_test_split(X, y, test_size= (1 - split_ratio), random_state=random_state, shuffle=True)



def prepare_dataset(path=DATA_PATH):
    labels, _, _ = get_labels(path)
    data = {}
    for label in labels:
        data[label] = {}
        data[label]['path'] = [path  + label + '/' + wavfile for wavfile in os.listdir(path + '/' + label)]

        vectors = []

        for wavfile in data[label]['path']:
            wave, sr = librosa.load(wavfile, mono=True, sr=None)
            # Downsampling
            wave = wave[::3]
            mfcc = librosa.feature.mfcc(wave, sr=16000)
            vectors.append(mfcc)

        data[label]['mfcc'] = vectors

    return data


def load_dataset(path=DATA_PATH):
    data = prepare_dataset(path)

    dataset = []

    for key in data:
        for mfcc in data[key]['mfcc']:
            dataset.append((key, mfcc))

    return dataset[:100]


# print(prepare_dataset(DATA_PATH))



Using TensorFlow backend.


In [0]:



import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.utils import to_categorical

# Second dimension of the feature is dim2
feature_dim_2 = 11

# Save data to array file first
save_data_to_array(max_len=feature_dim_2)

# # Loading train set and test set
X_train, X_test, y_train, y_test = get_train_test()

# # Feature dimension
feature_dim_1 = 20
channel = 1
epochs = 500
batch_size = 100
verbose = 1
num_classes = 2

# Reshaping to perform 2D convolution
X_train = X_train.reshape(X_train.shape[0], feature_dim_1, feature_dim_2, channel)
X_test = X_test.reshape(X_test.shape[0], feature_dim_1, feature_dim_2, channel)

y_train_hot = to_categorical(y_train)
y_test_hot = to_categorical(y_test)


Saving vectors of label - 'positive': 100%|██████████| 19/19 [00:00<00:00, 95.73it/s]
Saving vectors of label - 'negative': 100%|██████████| 25/25 [00:01<00:00, 15.97it/s]


In [0]:
def get_model():
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(2, 2), activation='relu', input_shape=(feature_dim_1, feature_dim_2, channel)))
    model.add(Conv2D(48, kernel_size=(2, 2), activation='relu'))
    model.add(Conv2D(120, kernel_size=(2, 2), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.4))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adadelta(),
                  metrics=['accuracy'])
    return model

# Predicts one sample
def predict(filepath, model):
    sample = wav2mfcc(filepath)
    sample_reshaped = sample.reshape(1, feature_dim_1, feature_dim_2, channel)
    return get_labels()[0][
            np.argmax(model.predict(sample_reshaped))
    ]


In [0]:

model = get_model()
model.fit(X_train, y_train_hot, batch_size=batch_size, epochs=epochs, verbose=verbose, validation_data=(X_test, y_test_hot))
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Train on 26 samples, validate on 18 samples
Epoch 1/1200
Epoch 2/1200
Epoch 3/1200
Epoch 4/1200
Epoch 5/1200
Epoch 6/1200
Epoch 7/1200
Epoch 8/1200
Epoch 9/1200
Epoch 10/1200
Epoch 11/1200
Epoch 12/1200
Epoch 13/1200
Epoch 14/1200
Epoch 15/1200
Epoch 16/1200
Epoch 17/1200
Epoch 18/1200
Epoch 19/1200
Epoch 20/1200
Epoch 21/1200
Epoch 22/1200
Epoch 23/1200
Epoch 24/1200
Epoch 25/1200
Epoch 26/1200
Epoch 27/1200
Epoch 28/1200
Epoch 29/1200
Epoch 30/1200
Epoch 31/1200
Epoch 32/1200
Epoch 33/1200
Epoch 34/1200
Epoch 35/1200
Epoch 36/1200
Epoch 37/1200
Epoch 38/1200
Epoch 39/1200
Epoch 40/1200
Epoch 41/1200
Epoch 42/1200
Epoch 43/1200
Epoch 44/1200
Epoch 45/1200
Epoch 46/1200
Epoch 47/1200
Epoch 48/1200
Epoch 49/1200
Epoch 50/1200
Epoch 51/1200
Epoch 52/1200
Ep

In [0]:
!pip install pydub


Collecting pydub
  Downloading https://files.pythonhosted.org/packages/79/db/eaf620b73a1eec3c8c6f8f5b0b236a50f9da88ad57802154b7ba7664d0b8/pydub-0.23.1-py2.py3-none-any.whl
Installing collected packages: pydub
Successfully installed pydub-0.23.1


In [0]:
  from pydub import AudioSegment

AUDIO_FILE="/content/OrcaCNN-data/data/long_samples/long_sample_01.wav"
sound = AudioSegment.from_file(AUDIO_FILE)
i=0
k=0
p=0
m=len(sound)
halfway_point = 4000
for i in range(i,m,4000):

        first_half = sound[p:halfway_point]
     #   sh=sound[p+4000:halfway_point+4000]


        k=k+4000
        first_half.export("/content/OrcaCNN-data/data/long_sample{0}.wav".format(k),format="wav")
    #    sh.export("/content/Pests-data/popo{0}.wav".format(k),format="wav")
        halfway_point=halfway_point+4000
        p=p+4000

In [0]:
l=4000
i=4000
u=0
for i in range(i,m,4000):
  u=u+1
  print("For sample{0} the output is".format(u))
  print(predict('/content/OrcaCNN-data/data/long_sample{0}.wav'.format(l), model=model))
  l=l+4000

For sample1 the output is
positive
For sample2 the output is
positive
For sample3 the output is
negative
For sample4 the output is
positive
For sample5 the output is
positive
For sample6 the output is
negative
For sample7 the output is
negative


In [0]:
'''
import os
import wave

import pylab
def graph_spectrogram(wav_file):
    sound_info, frame_rate = get_wav_info(wav_file)
    pylab.figure(num=None, figsize=(19, 12))
    pylab.subplot(111)
    pylab.title('spectrogram of %r' % wav_file)
    pylab.specgram(sound_info, Fs=frame_rate)
    pylab.savefig('spectrogram.png')
def get_wav_info(wav_file):
    wav = wave.open(wav_file, 'r')
    frames = wav.readframes(-1)
    sound_info = pylab.fromstring(frames, 'int16')
    frame_rate = wav.getframerate()
    wav.close()
    return sound_info, frame_rate
graph_spectrogram('/content/OrcaCNN-data/data/long_samples/long_sample_01.wav')'''

In [0]:
!pip install -U numpy==1.11.0.


In [0]:
!pip install numpy==1.11.0