In [None]:
import os
import warnings
warnings.filterwarnings(action='ignore')

import pandas as pd
import librosa
import numpy as np

from sklearn.utils import shuffle
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt

import tensorflow as tf
# Global vars
RANDOM_SEED = 1337
SAMPLE_RATE = 32000
SIGNAL_LENGTH = 5 # seconds
SPEC_SHAPE = (48, 128) # height x width
FMIN = 500
FMAX = 12500

In [None]:
# Load metadata file
train = pd.read_csv('../input/birdclef-2021/train_metadata.csv',)

# Limit the number of training samples and classes


# Second, assume that birds with the most training samples are also the most common
# A species needs at least 200 recordings with a rating above 4 to be considered common
birds_count = {}
for bird_species, count in zip(train.primary_label.unique(), 
                               train.groupby('primary_label')['primary_label'].count().values):
    birds_count[bird_species] = count
most_represented_birds = [key for key,value in birds_count.items()] 

TRAIN = train.query('primary_label in @most_represented_birds')
LABELS = sorted(TRAIN.primary_label.unique())

# Let's see how many species and samples we have left
print('NUMBER OF SPECIES IN TRAIN DATA:', len(LABELS))
print('NUMBER OF SAMPLES IN TRAIN DATA:', len(TRAIN))
print('LABELS:', most_represented_birds)

In [None]:
TRAIN_SPECS = []
for folder in os.listdir("../input/saving-melspecs/melspectrogram_dataset/"):
    for files in os.listdir("../input/saving-melspecs/melspectrogram_dataset/"+folder):
        TRAIN_SPECS.append("../input/saving-melspecs/melspectrogram_dataset/"+folder+"/"+files)
#print(TRAIN_SPECS)

In [None]:
plt.figure(figsize=(15, 7))
for i in range(12):
    spec = Image.open(TRAIN_SPECS[i])
    plt.subplot(3, 4, i + 1)
    plt.title(TRAIN_SPECS[i].split(os.sep)[-1])
    plt.imshow(spec, origin='lower')

In [None]:
train_specs, train_labels = [], []
with tqdm(total=len(TRAIN_SPECS)) as pbar:
    i =0
    for path in TRAIN_SPECS:
        pbar.update(1)

        # Open image
        spec = Image.open(path)

        # Convert to numpy array
        spec = np.array(spec, dtype='float32')
        
        # Normalize between 0.0 and 1.0
        # and exclude samples with nan 
        spec -= spec.min()
        spec /= spec.max()
        if not spec.max() == 1.0 or not spec.min() == 0.0:
            continue

        # Add channel axis to 2D array
        spec = np.expand_dims(spec, -1)

        # Add new dimension for batch size
        spec = np.expand_dims(spec, 0)

        # Add to train data
        if len(train_specs) == 0:
            train_specs = spec
        else:
            train_specs = np.vstack((train_specs, spec))

        # Add to label data
        target = np.zeros((len(LABELS)), dtype='float32')
        bird = path.split(os.sep)[-2]
        target[LABELS.index(bird)] = 1.0
        if len(train_labels) == 0:
            train_labels = target
        else:
            train_labels = np.vstack((train_labels, target))
        i+=1
        if i%1000==0:
            trainSpecsfileName = "../working/"+ str(i) + "_train_specs.npy"
            np.save(trainSpecsfileName, train_specs)
            
            trainLabelfileName = "../working/"+ str(i) + "_train_label.npy"
            np.save(trainLabelfileName, train_labels)
            train_specs = []
            train_labels = []

            

In [None]:
np.save("../working/Last_train_specs.npy", train_specs)
np.save("../working/Last_train_labels.npy", train_labels)

In [None]:
train_labels.shape

In [None]:
train_specs.shape

## Converting all these indivdual NPY files into One

In [None]:
import numpy as np
import os

In [None]:
listFiles = []
for file in os.listdir("./"):

    if file[-9:] == "specs.npy":
        try:
            listFiles.append(int(file[:-16]))
        except:
            print(file)

listFiles.sort()
print(listFiles)

In [None]:
import os.path
from os import path

for i in listFiles:
    pathForSpecs = "./" + str(i)+"_train_specs.npy"
    pathForLabel = "./" + str(i)+"_train_label.npy"
    
    if not path.exists(pathForSpecs) or not path.exists(pathForLabel):
        print(i)

In [None]:
len(listFiles)

In [None]:
train_specs_0To50k = []
train_specs_0To50k = np.load("./1000_train_specs.npy")

for i in range(1,50):
    print(listFiles[i])
    trainSpecsCurrentArray = np.load("./" + str(listFiles[i])+"_train_specs.npy")
    arrSpecs = np.concatenate((train_specs_0To50k,trainSpecsCurrentArray), axis=0)
    train_specs_0To50k = arrSpecs

train_specs_50To100k = np.load("./" + str(listFiles[50])+"_train_specs.npy")
for i in range(51,100):
    print(listFiles[i])
    trainSpecsCurrentArray = np.load("./" + str(listFiles[i])+"_train_specs.npy")
    arrSpecs = np.concatenate((train_specs_50To100k,trainSpecsCurrentArray), axis=0)
    train_specs_50To100k = arrSpecs

train_specs_100To150k = np.load("./" + str(listFiles[100])+"_train_specs.npy")
for i in range(101,150):
    print(listFiles[i])
    trainSpecsCurrentArray = np.load("./" + str(listFiles[i])+"_train_specs.npy")
    arrSpecs = np.concatenate((train_specs_100To150k,trainSpecsCurrentArray), axis=0)
    train_specs_100To150k = arrSpecs
    #train_specs_100To150k.shape
    
train_specs_150To172k = np.load("./" + str(listFiles[150])+"_train_specs.npy")
for i in range(151,len(listFiles)):
    print(listFiles[i])
    trainSpecsCurrentArray = np.load("./" + str(listFiles[i])+"_train_specs.npy")
    arrSpecs = np.concatenate((train_specs_150To172k,trainSpecsCurrentArray), axis=0)
    train_specs_150To172k = arrSpecs

In [None]:
'''
#Save 50 iteration x 4 files
np.save("./train_specs_0To50k.npy",train_specs_0To50k)
np.save("./train_specs_50To100k.npy",train_specs_50To100k)
np.save("./train_specs_100To150k.npy",train_specs_100To150k)
np.save("./train_specs_150To172k.npy",train_specs_150To172k)
train_specs_0To50k = []
train_specs_50To100k = []
train_specs_100To150k = []
train_specs_150To172k = []
'''

In [None]:
train_specs_0To100k =  np.concatenate((train_specs_0To50k,train_specs_50To100k), axis=0)
train_specs_0To50k = []
train_specs_50To100k = []
train_specs_100To170k =  np.concatenate((train_specs_100To150k,train_specs_150To172k), axis=0)
train_specs_100To150k = []
train_specs_150To172k = []
print(train_specs_0To100k.shape,train_specs_100To170k.shape)

In [None]:
'''
#Save 100K iteration x 2 files
np.save("./train_specs_0To100k.npy",train_specs_0To100k)
np.save("./train_specs_100To170k.npy",train_specs_100To170k)
print(train_specs_0To100k.shape,train_specs_100To170k.shape)
train_specs_0To100k = []
train_specs_100To170k = []

'''

In [None]:
train_specs_0To170k =  np.concatenate((train_specs_0To100k,train_specs_100To170k), axis=0)
print(train_specs_0To170k.shape)
train_specs_0To100k = []
train_specs_100To170k = []

In [None]:
np.save("./train_specs_0To170k.npy",train_specs_0To170k)
train_specs_0To170k = []

In [None]:
file_size = os.path.getsize('./train_specs_0To170k.npy')
print("File Size is :", file_size, "bytes")

In [None]:
train_label_0To50k = []
train_label_0To50k = np.load("./1000_train_label.npy")

for i in range(1,50):
    print(listFiles[i])
    trainlabelCurrentArray = np.load("./" + str(listFiles[i])+"_train_label.npy")
    arrSpecs = np.concatenate((train_label_0To50k,trainlabelCurrentArray), axis=0)
    train_label_0To50k = arrSpecs

train_label_50To100k = np.load("./" + str(listFiles[50])+"_train_label.npy")
for i in range(51,100):
    print(listFiles[i])
    trainlabelCurrentArray = np.load("./" + str(listFiles[i])+"_train_label.npy")
    arrSpecs = np.concatenate((train_label_50To100k,trainlabelCurrentArray), axis=0)
    train_label_50To100k = arrSpecs

train_label_100To150k = np.load("./" + str(listFiles[100])+"_train_label.npy")
for i in range(101,150):
    print(listFiles[i])
    trainlabelCurrentArray = np.load("./" + str(listFiles[i])+"_train_label.npy")
    arrSpecs = np.concatenate((train_label_100To150k,trainlabelCurrentArray), axis=0)
    train_label_100To150k = arrSpecs
    
train_label_150To172k = np.load("./" + str(listFiles[150])+"_train_label.npy")
for i in range(151,len(listFiles)):
    print(listFiles[i])
    trainlabelCurrentArray = np.load("./" + str(listFiles[i])+"_train_label.npy")
    arrSpecs = np.concatenate((train_label_150To172k,trainlabelCurrentArray), axis=0)
    train_label_150To172k = arrSpecs

In [None]:
print(train_label_0To50k.shape)
print(train_label_50To100k.shape)
print(train_label_100To150k.shape)
print(train_label_150To172k.shape)

In [None]:
'''
#Save 50 iteration x 4 files
np.save("./train_label_0To50k.npy",train_label_0To50k)
np.save("./train_label_50To100k.npy",train_label_50To100k)
np.save("./train_label_100To150k.npy",train_label_100To150k)
np.save("./train_label_150To172k.npy",train_label_150To172k)
train_label_0To50k = []
train_label_50To100k = []
train_label_100To150k = []
train_label_150To172k = []
'''

In [None]:
train_label_0To100k =  np.concatenate((train_label_0To50k,train_label_50To100k), axis=0)
train_label_0To50k = []
train_label_50To100k = []
train_label_100To170k =  np.concatenate((train_label_100To150k,train_label_150To172k), axis=0)
train_label_100To150k = []
train_label_150To172k = []
print(len(train_label_0To100k),len(train_label_100To170k))

In [None]:
'''#Save 100K iteration x 2 files
np.save("./train_label_0To100k.npy",train_label_0To100k)
np.save("./train_label_100To170k.npy",train_label_100To170k)
print(train_label_0To100k.shape,train_label_100To170k.shape)
train_label_0To100k = []
train_label_100To170k = []'''

In [None]:
train_label_0To170k =  np.concatenate((train_label_0To100k,train_label_100To170k), axis=0)
train_label_0To100k = []
train_label_100To170k = []

In [None]:
np.save("./train_label_0To170k.npy",train_label_0To170k)
train_label_0To170k.shape

In [None]:
file_size = os.path.getsize('./train_label_0To170k.npy')
print("File Size is :", file_size, "bytes")

Delete All Individual NPY files

In [None]:
for i in listFiles:
    pathForSpecs = "./" + str(i)+"_train_specs.npy"
    pathForLabel = "./" + str(i)+"_train_label.npy"
    
    os.remove(pathForSpecs)
    os.remove(pathForLabel)

## Test for Concatenate

In [None]:
'''fisrtArraySpecs = np.load("./1000_train_specs.npy")
secondArraySpecs = np.load("./2000_train_specs.npy")

print(fisrtArraySpecs[:4].shape,secondArraySpecs[:4].shape)
arr = np.concatenate((fisrtArraySpecs[:4], secondArraySpecs[:4]), axis=0)
print(arr.shape)'''

In [None]:
'''from PIL import Image as im
#data = im.fromarray(array)

plt.figure(figsize=(15, 7))
for i in range(8):
    newArray = (arr[i] * 255).astype(np.uint8)
    newArray = np.squeeze(newArray, axis=2)
    spec = Image.fromarray(newArray)
    plt.subplot(2, 4, i + 1)
    #plt.title(TRAIN_SPECS[i].split(os.sep)[-1])
    plt.imshow(spec, origin='lower')'''

In [None]:
'''from PIL import Image as im
#data = im.fromarray(array)

plt.figure(figsize=(15, 7))
for i in range(4):
    newArray = (fisrtArraySpecs[i] * 255).astype(np.uint8)
    newArray = np.squeeze(newArray, axis=2)
    spec = Image.fromarray(newArray)
    plt.subplot(2, 4, i + 1)
    #plt.title(TRAIN_SPECS[i].split(os.sep)[-1])
    plt.imshow(spec, origin='lower')'''

In [None]:
'''from PIL import Image as im
#data = im.fromarray(array)

plt.figure(figsize=(15, 7))
for i in range(4):
    newArray = (secondArraySpecs[i] * 255).astype(np.uint8)
    newArray = np.squeeze(newArray, axis=2)
    spec = Image.fromarray(newArray)
    plt.subplot(2, 4, i + 1)
    #plt.title(TRAIN_SPECS[i].split(os.sep)[-1])
    plt.imshow(spec, origin='lower')'''