**IMPORTING LIBRARIES**

In [None]:
!pip install imageai 
from imageai.Detection import ObjectDetection
import numpy as np 
from PIL import Image
import cv2
import os
import glob
import keras
from keras.preprocessing import image 
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

**SETTING UP DATASETS AND RESIZING**

In [None]:
train_directory='../input/100-bird-species/birds/train'
val_directory='../input/100-bird-species/birds/valid'
test_directory='../input/100-bird-species/birds/test'
train_datagen=ImageDataGenerator(rescale=1/255)
val_datagen=ImageDataGenerator(rescale=1/255)
test_datagen=ImageDataGenerator(rescale=1/255)
train_generator=train_datagen.flow_from_directory(train_directory,
                                                 target_size=(224,224),
                                                 color_mode='rgb',
                                                  batch_size=256,
                                                 class_mode='sparse')
val_generator=val_datagen.flow_from_directory(val_directory,
                                                 target_size=(224,224),
                                                 batch_size=256,
                                                 color_mode='rgb',
                                                 class_mode='sparse')
test_generator=test_datagen.flow_from_directory(test_directory,
                                                batch_size=256,
                                                 target_size=(224,224),
                                                 color_mode='rgb',
                                                 class_mode='sparse')


**IMPORTING OUR CLASSIFICATION MODEL**

In [None]:
model = keras.models.load_model("../input/275speciesmodel/Bird_Classification_Model.h5")

**PREDICTION FUNCTION**

In [None]:
import glob
def predict(dir):
    img=image.load_img(dir,target_size=(224,224))
    img1=image.img_to_array(img)
    img1=img1/255
    img1=np.expand_dims(img1,[0])
   # f, axarr = plt.subplots(1,2)
   # axarr[0].imshow(img)
   # axarr[0].title.set_text("Bird in Video")
    pred=int(np.argmax(model.predict(img1), axis=-1))
    for key,values in train_generator.class_indices.items():
        if pred==values:
            if round(np.max(model.predict(img1))*100,2) > 50:
                print("The predicted image of the bird is: "+str(key)+" with a probability of "+str(round(np.max(model.predict(img1))*100,2))+"%")
                actual_img=image.load_img(glob.glob("../input/100-bird-species/birds/train/"+str(key)+"/*.jpg")[0],target_size=(224,224))
                f, axarr = plt.subplots(1,2)
                plt.axis(False)
                plt.grid("off")
                axarr[0].imshow(img)
                axarr[0].title.set_text("Bird in Video")
                axarr[1].imshow(actual_img)
                axarr[1].title.set_text("Actual image")
                plt.axis(False)
                plt.grid("off")
                return key
    plt.show()

**READING VIDEO**

In [None]:
cam = cv2.VideoCapture("../input/bird-video/vlc-record-2021-08-17-18h58m51s-duck.mp4-.mp4")

In [None]:
detector = ObjectDetection()
detector.setModelTypeAsRetinaNet()

In [None]:
model_path = '../input/resnet/resnet50_coco_best_v2.1.0.h5'

In [None]:
detector.setModelPath(model_path)
detector.loadModel()

In [None]:
custom_objects = detector.CustomObjects(bird=True)

In [None]:
cam.set(cv2.CAP_PROP_POS_AVI_RATIO,0)
frameCount = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))
frameWidth = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH))
frameHeight = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT))
videoFPS = int(cam.get(cv2.CAP_PROP_FPS))

print (f"frameCount: {frameCount}")
print (f"frameWidth: {frameWidth}")
print (f"frameHeight: {frameHeight}")
print (f"videoFPS: {videoFPS}")

**FRAME EXTRACTION AND OBJECT DETECTION**

In [None]:
buf = np.empty((
    frameCount,
    frameHeight,
    frameWidth,
    3), np.dtype('uint8'))

fc = 0
ret = True

while (fc < frameCount):
    ret, buf[fc] = cam.read()
    fc += 1
    
cur = 0
i =0
while(cur<frameCount):
        name = './frame' + str(i) + '.jpg'
        cv2.imwrite(name, buf[cur]) 
        
        input_path = './frame' + str(i) + '.jpg'
        output_path = 'result'+str(i)+'.png'
        detections, extracted_objects = detector.detectObjectsFromImage(input_image=input_path, output_image_path=output_path, custom_objects=custom_objects, extract_detected_objects=True, minimum_percentage_probability=25)
        
        cur += videoFPS
        #cur+=15
        i +=1

In [None]:
for filename in glob.glob("*.png"):
    os.remove(filename)

In [None]:
for filename in glob.glob("*.jpg"):
    os.remove(filename)

In [None]:
os.remove('__notebook_source__.ipynb')

In [None]:
print(os.listdir('.'))

**CLASSIFYING BIRDS PRESENT IN VIDEO**

In [None]:

# import counter class from collections module
from collections import Counter
birdkeys=[]
for x in os.listdir('.'):
    for y in os.listdir('./'+x):
            birdkeys.append(predict(x+'/'+y))
print(birdkeys)
x=Counter(birdkeys).most_common(3)
for i in x:
    for j in i:
        if j==None:
            x.remove(i)
video_prediction=x[0][0]

In [None]:
import numpy as np
import pandas as pd
import wave
from scipy.io import wavfile
import os
import librosa
from librosa.feature import melspectrogram
import warnings
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.utils import shuffle
from sklearn.utils import class_weight
from PIL import Image
from uuid import uuid4
import sklearn
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras import Input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Activation
from tensorflow.keras.layers import BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense, Flatten, Dropout, Activation, LSTM, SimpleRNN, Conv1D, Input, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0


import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

!pip install moviepy
from moviepy.editor import *

In [None]:
model_audio = keras.models.load_model("../input/audio-model1/best_model.h5")

In [None]:
def predict_on_melspectrogram(song_sample, sample_length):
    N_mels=216

    if len(song_sample)>=sample_length:
        mel = melspectrogram(song_sample, n_mels=N_mels)
        db = librosa.power_to_db(mel)
        normalised_db = sklearn.preprocessing.minmax_scale(db)
        db_array = (np.asarray(normalised_db)*255).astype(np.uint8)

        prediction = model_audio.predict(np.array([np.array([db_array, db_array, db_array]).T]))
        predicted_bird = np.argmax(prediction)
        return predicted_bird
    else:
        return "nocall"

In [None]:
def predict_submission(filename):
    wave_data,wave_rate=librosa.load(filename)
    sample_length=5*wave_rate
    #song_sample=np.array(wave_data)
    song_sample = np.array(wave_data[0:sample_length])
    predicted_bird=predict_on_melspectrogram(song_sample,sample_length)
    return predicted_bird

In [None]:
mp4_file = "../input/bird-video/vlc-record-2021-08-17-18h58m51s-duck.mp4-.mp4"
mp3_file = "./audio_clip_conv.mp3"
videoclip = VideoFileClip(mp4_file)
audioclip = videoclip.audio
audioclip.write_audiofile(mp3_file)
audioclip.close()
videoclip.close()

In [None]:
birb=['AMERICAN CROW', 'AMERICAN REDSTART', 'amerob', 'astfly', 'BARN SWALLOW ', "BEWICK'S WREN", 'bkhgro', 'BLUE JAY', 'buggna', 'cangoo', 'CAROLINA WREN', 'CHIPPING SPARROW', 'COMMON RAVEN', 'comred', 'COMMON TERN', 'COMMON YELLOWTHROAT', 'DARK EYED JUNCO', 'EASTERN MEADOWLARK', 'EASTERN TOWHEE', 'eucdov', 'gnwtea', 'greegr', 'grhowl', 'herthr', 'HORNED LARK', 'HOUSE FINCH', 'HOUSE SPARROW', 'HOUSE WREN', "LINCOLN'S SPARROW", 'MALLARD DUCK', 'MARSH WREN', 'NORTHERN CARDINAL', 'NORTHERN FLICKER', 'NORTHERN MOCKINGBIRD', 'ORANGE-CROWNED WARBLER', 'OVENBIRD', 'redcro', 'RED-WINGED BLACKBIRD', 'SAVANNAH SPARROW ', 'SONG SPARROW', 'spotow', 'swathr', 'TIT MOUSE', 'warvir', 'WESTERN MEADOWLARK', 'WESTERN WOOD PEWEE', 'WHITE-BREASTED NUTHATCH', 'WHITE-CROWNED SPARROW', 'WHITE-THROATED SPARROW']

In [None]:
audio_file_path = "./audio_clip_conv.mp3"
#audio_file_path = "../input/bird-video/549833.mp3"
result = predict_submission(audio_file_path)
print(result)
print(birb[result])

In [None]:

actual_img=image.load_img(glob.glob("../input/100-bird-species/birds/train/"+str(birb[result])+"/*.jpg")[0],target_size=(224,224))
plt.imshow(actual_img)
plt.title("Image of Predicted Bird - "+str(birb[result]))
plt.axis(False)
plt.grid("off")

In [None]:
if video_prediction== birb[result]:
    print("The predictions made by the audio and video model are the same. The bird seen in the video is:",video_prediction)