In [None]:
#-------------------------------------------------------------------------------------------------------------------------------------------------------#
"""@author Shaela Khan, Updated 1st April,2022,  Updated : 7th Aug, Sunday
 Prelude1.ipynb  -> In testing phase
# Bird Recognition from birdsongs using Deep learning -> We are working on building a model using Deep learning techniques to identify fire and smoke from images.
# DataSource : - https://www.kaggle.com/datasets/rtatman/british-birdsong-dataset  (Aggregated from the original Xeno-Canto Dataset.)
# Provided dataset has a directory with - songs
#                                       - .csv file with labels
#                                       -
#  Dataset contains Image files from which, we create a train and test dataset for training purposes.
# We then create a CNN model with possible usage of pre-trained models, that can identify the difference classes defined - this is a supervised learning
# problem.
# Input: train,test
# Output : The file should have
#          : Classify image from testing provided.
##
"""
#-------------------------------------------------------------------------------------------------------------------------------------------------------#

In [None]:
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt # plotting
import numpy as np # linear algebra
import os # accessing directory structure
import pandas as pd #data processing, CSV file I/O (e.g. pd.read_csv)

import soundfile as sf # audio data processing.
import librosa
import scipy.signal as signal
import IPython.display
import IPython.display as ipd    # play audio , display the widget to play audio etc..
from IPython.display import display, Audio
from ipywidgets import interactive
%matplotlib inline





print("Hello ! And welcome to the Road runner project !")

In [None]:
print(os.listdir('./small Xeno-Canto'))

# Distribution graphs (histogram/bar graph) of column data
def plotPerColumnDistribution(df, nGraphShown, nGraphPerRow):
    nunique = df.nunique()
    df = df[[col for col in df if 1 < nunique[col] < 50]] # For displaying purposes, pick columns that have between 1 and 50 unique values
    nRow, nCol = df.shape
    columnNames = list(df)
    nGraphRow = (nCol + nGraphPerRow - 1) / nGraphPerRow
    plt.figure(num = None, figsize = (6 * nGraphPerRow, 8 * nGraphRow), dpi = 80, facecolor = 'w', edgecolor = 'k')
    for i in range(min(nCol, nGraphShown)):
        plt.subplot(nGraphRow, nGraphPerRow, i + 1)
        columnDf = df.iloc[:, i]
        if not np.issubdtype(type(columnDf.iloc[0]), np.number):
            valueCounts = columnDf.value_counts()
            valueCounts.plot.bar()
        else:
            columnDf.hist()
        plt.ylabel('counts')
        plt.xticks(rotation = 90)
        plt.title(f'{columnNames[i]} (column {i})')
    plt.tight_layout(pad = 1.0, w_pad = 1.0, h_pad = 1.0)
    plt.show()


# Correlation matrix
def plotCorrelationMatrix(df, graphWidth):
    filename = df.dataframeName
    df = df.dropna('columns') # drop columns with NaN
    df = df[[col for col in df if df[col].nunique() > 1]] # keep columns where there are more than 1 unique values
    if df.shape[1] < 2:
        print(f'No correlation plots shown: The number of non-NaN or constant columns ({df.shape[1]}) is less than 2')
        return
    corr = df.corr()
    plt.figure(num=None, figsize=(graphWidth, graphWidth), dpi=80, facecolor='w', edgecolor='k')
    corrMat = plt.matshow(corr, fignum = 1)
    plt.xticks(range(len(corr.columns)), corr.columns, rotation=90)
    plt.yticks(range(len(corr.columns)), corr.columns)
    plt.gca().xaxis.tick_bottom()
    plt.colorbar(corrMat)
    plt.title(f'Correlation Matrix for {filename}', fontsize=15)
    plt.show()


    # Scatter and density plots
def plotScatterMatrix(df, plotSize, textSize):
    df = df.select_dtypes(include =[np.number]) # keep only numerical columns
    # Remove rows and columns that would lead to df being singular
    df = df.dropna('columns')
    df = df[[col for col in df if df[col].nunique() > 1]] # keep columns where there are more than 1 unique values
    columnNames = list(df)
    if len(columnNames) > 10: # reduce the number of columns for matrix inversion of kernel density plots
        columnNames = columnNames[:10]
    df = df[columnNames]
    ax = pd.plotting.scatter_matrix(df, alpha=0.75, figsize=[plotSize, plotSize], diagonal='kde')
    corrs = df.corr().values

    for i, j in zip(*plt.np.triu_indices_from(ax, k = 1)):
        ax[i, j].annotate('Corr. coef = %.3f' % corrs[i, j], (0.8, 0.2), xycoords='axes fraction', ha='center', va='center', size=textSize)
    plt.suptitle('Scatter and Density Plot')
    plt.show()

# Reading in data.
nRowsRead = 1000 # specify 'None' if want to read whole file
df1 = pd.read_csv('./small Xeno-Canto/birdsong_metadata.csv', delimiter=',', nrows = nRowsRead)
df1.dataframeName = 'birdsong_metadata.csv'
nRow, nCol = df1.shape
print(f'There are {nRow} rows and {nCol} columns')
print(df1.head())
print(df1.info(verbose=True))

In [None]:
df1.head(5)
plotPerColumnDistribution(df1, 10, 5)

In [None]:
plotCorrelationMatrix(df1, 8)
plotScatterMatrix(df1, 9, 10)

In [None]:
# Now, let's read in a sample birdsong file.

data, samplerate = sf.read("./small Xeno-Canto/songs/songs/xc154285.flac")
freq, time, Sxx = signal.spectrogram(data, samplerate, scaling='spectrum')
plt.pcolormesh(time, freq, Sxx, shading='auto',cmap='Purples')

# add axis labels
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [sec]')

In [None]:
# The previous function didn't do much for the visual spectrum.
# Hence we use a different function on here. Better visualization for bioacoustics.
#Pxx, freqs, bins, im = plt.specgram(data, Fs=samplerate,cmap='coolwarm_r') # 'CMRmap_r' , 'gnuplot2_r', 'cool' was okay
#ax = plt.axes() # testing #ax.set_facecolor('cyan')

# add axis labels
#plt.ylabel('Frequency [Hz]')
#plt.xlabel('Time [sec]')
#plt.show()

In [None]:
# Hence we use a different function on here. Better visualization for bioacoustics.
Pxx, freqs, bins, im = plt.specgram(data, Fs=samplerate,cmap='viridis') # 'CMRmap_r' , 'gnuplot2_r'
#ax = plt.axes() # testing #ax.set_facecolor('cyan')

# add axis labels
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [sec]')
plt.show()

# different bird song visualization
data2, samplerate = sf.read("./small Xeno-Canto/songs/songs/xc27080.flac")
Pxx, freqs, bins, im = plt.specgram(data2, Fs=samplerate, cmap='viridis') # 'CMRmap_r' , 'gnuplot2_r'


# add axis labels
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [sec]')
plt.show()

In [None]:
# Starting pre-processing , here we go

audio_data = "./dataset/northern cardinal/XC11502.wav"
x , sr = librosa.load(audio_data)
print(type(x), type(sr))#<class 'numpy.ndarray'> <class 'int'>print(x.shape, sr)#(94316,) 22050
print(x.shape)

#This returns an audio time series as a numpy array with a default sampling rate(sr) of 22KHZ mono. We can change this behavior by resampling at 44.1KHz.
sound = librosa.load(audio_data, sr=44100)
# or to disable resampling.
# librosa.load('./small Xeno-Canto/songs/songs/xc29230.flac', sr=None)
# he sample rate is the number of samples of audio carried per second, measured in Hz or kHz.
#Playing Audio: Using,IPython.display.Audio you can play the audio in your jupyter notebook.

print("Play my audio damn it !!! ")
import IPython
from IPython.display import Audio
import librosa.display
from scipy.io.wavfile import write

#cwd = './small Xeno-Canto/songs/xc27080.flac'
#Audio(cwd)
#Audio("./voices/Home By The Sea.mp3")

In [None]:
#Visualizing Audio:
#We can plot the audio array using librosa.display.waveplot:
%matplotlib inline
import librosa.display
plt.figure(figsize=(14, 5))
librosa.display.waveshow(x, sr=sr)

In [None]:
"""import pygame
pygame.init()
pygame.mixer.pre_init(44100, 16, 2, 4096) # Frequency, channel size, channels, buffersize
pygame.mixer.init()

song = pygame.mixer.Sound('./small Xeno-Canto/songs/songs/xc154285.flac')
song.play()
"""
"""
    Playing with the pygame library works - however - it doesnt allow use of a player with controls.

"""

In [None]:
X = librosa.stft(x)
Xdb = librosa.amplitude_to_db(abs(X))
plt.figure(figsize=(14, 5))
librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
plt.colorbar()

In [None]:
librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='log')
plt.colorbar()                                                    # Based on the spetrogram above, we see most action is taaking place at the bottom. Hence
                                                                  # We take a log of the y- axis.

In [None]:
""" This part is not required or necessary at this point."""
sr = 22050 # sample rate
T = 5.0    # seconds
t = np.linspace(0, T, int(T*sr), endpoint=False) # time variable
x = 0.5*np.sin(2*np.pi*220*t)# pure sine wave at 220 Hz
#Playing the audio
ipd.Audio(x, rate=sr) # load a NumPy array
#Saving the audio
#librosa.output.write_wav('tone_220.wav', x, sr)  , Doesn't work in the new version of librosa
# import soundfile as sf
#sf.write('tone2.wav',samplerate=sr,data=x)
# Audio('tone2.wav')
"""This part is not required or necessary at this point."""


In [None]:
import sklearn
from sklearn.preprocessing import MinMaxScaler
"""
scaler = MinMaxScaler()
spectral_centroids = librosa.feature.spectral_centroid(x, sr=sr)[0]
spectral_centroids.shape(775,)
# Computing the time variable for visualization
plt.figure(figsize=(12, 4))
frames = range(len(spectral_centroids))
t = librosa.frames_to_time(frames)


# Normalising the spectral centroid for visualisation

def normalize(x, axis=0):
    return scaler.fit(x, axis=axis)
#Plotting the Spectral Centroid along the waveform
librosa.display.waveshow(x, sr=sr, alpha=0.4)
plt.plot(t, normalize(spectral_centroids), color='b')
 """


In [None]:
chromagram = librosa.feature.chroma_stft(x, sr=sr)
plt.figure(figsize=(15, 5))
librosa.display.specshow(chromagram, x_axis='time', y_axis='chroma',hop_length=10, cmap='coolwarm')

In [None]:
mfccs = librosa.feature.mfcc(x, sr, n_mfcc=40)
print(mfccs.shape)
(20, 97)
#Displaying  the MFCCs:
plt.figure(figsize=(15, 7))
librosa.display.specshow(mfccs, sr=sr, x_axis='time')

# Override
S = librosa.feature.melspectrogram(y=x, sr=sr, n_mels=128,fmax=8000)
fig, ax = plt.subplots(nrows=2, sharex=True)
img = librosa.display.specshow(librosa.power_to_db(S, ref=np.max), x_axis='time', y_axis='mel', fmax=8000,ax= ax[0])
fig.colorbar(img, ax=[ax[0]])
ax[0].set(title='Mel spectrogram')
ax[0].label_outer()
img = librosa.display.specshow(mfccs, x_axis='time', ax=ax[1])
fig.colorbar(img, ax=[ax[1]])
ax[1].set(title='MFCC')
# Just a theory - but these images can also be used as input to cnn models , yes? Because different feature tactics.

In [None]:
cmap = plt.get_cmap('inferno')
plt.figure(figsize=(8,8))


songname = './dataset/northern cardinal/XC11502.wav'  # Because you need the whole path.
y, sr = librosa.load(songname, mono=True, duration=5)
X = librosa.stft(y)
Xdb = librosa.amplitude_to_db(abs(X))
plt.specgram(Xdb, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB')
plt.axis('off')
        #base = os.path.splitext(filename)[0]
        # os.rename(my_file, base + '.bin')
        #plt.savefig('img_data-Voices/'+base+'.png')
plt.show()
#plt.clf()

