In [None]:
import numpy as np 
import pandas as pd

import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt
import scipy as sp

import os

### Looking at Audio Data

In [None]:
file_1 , sr = librosa.load('../input/rfcx-species-audio-detection/train/00204008d.flac')
print(type(file_1), type(sr))
print(file_1.shape, sr)

- returns an audio time series as a numpy array
- sr == sampling rate (22.05 KHz)

In [None]:
file_2, _ = librosa.load('../input/rfcx-species-audio-detection/train/003b04435.flac')
file_3, _ = librosa.load('../input/rfcx-species-audio-detection/train/003bec244.flac')
file_4, _ = librosa.load('../input/rfcx-species-audio-detection/train/005f1f9a5.flac')
file_5, _ = librosa.load('../input/rfcx-species-audio-detection/train/006ab765f.flac')

In [None]:
ipd.Audio(file_1, rate=sr)

In [None]:
ipd.Audio(file_2, rate=sr)

In [None]:
ipd.Audio(file_3, rate=sr)

In [None]:
ipd.Audio(file_4, rate=sr)

In [None]:
ipd.Audio(file_5, rate=sr)

In [None]:
#duration of one sample
sample_duration = 1/sr
print(f'sample_duration: {sample_duration:.6f} seconds')

In [None]:
#getting audio sample length
duration = sample_duration * len(file_1)
print("audio file length: {} seconds".format(duration))

visualizing the waveforms

In [None]:
#see five of the audio files
plt.figure(figsize=(13, 20))

plt.subplot(5,1,1)
librosa.display.waveplot(file_1, alpha = 0.5)
plt.title('file_1')
plt.ylim((-1,1))

plt.subplot(5,1,2)
librosa.display.waveplot(file_2, alpha = 0.5)
plt.title('file_2')
plt.ylim((-1,1))

plt.subplot(5,1,3)
librosa.display.waveplot(file_3, alpha = 0.5)
plt.title('file_3')
plt.ylim((-1,1))

plt.subplot(5,1,4)
librosa.display.waveplot(file_4, alpha = 0.5)
plt.title('file_4')
plt.ylim((-1,1))

plt.subplot(5,1,5)
librosa.display.waveplot(file_5, alpha = 0.5)
plt.title('file_5')
plt.ylim((-1,1))

plt.subplots_adjust(wspace=None, hspace=0.6)

### Geting the amplitude_envelope_feature

In [None]:
def amplitude_envelope(signal, frame_size, hop_length):
    amplitude_envelope = []
    
    for i in range(0, len(signal), hop_length):
        current_frame_amplitude_envelope = max(signal[i:i+frame_size])
        amplitude_envelope.append(current_frame_amplitude_envelope)
    
    return (np.array(amplitude_envelope))

In [None]:
FRAME_SIZE = 1024
HOP_LENGTH = 512

ae_file_1 = amplitude_envelope(file_1, FRAME_SIZE, HOP_LENGTH)
ae_file_2 = amplitude_envelope(file_2, FRAME_SIZE, HOP_LENGTH)
ae_file_3 = amplitude_envelope(file_3, FRAME_SIZE, HOP_LENGTH)
ae_file_4 = amplitude_envelope(file_4, FRAME_SIZE, HOP_LENGTH)
ae_file_5 = amplitude_envelope(file_5, FRAME_SIZE, HOP_LENGTH)

len(ae_file_1)

visualizing the amplitude envelope for the five audio files

In [None]:
#getting time per frame for the audio files
frames = range(0, len(ae_file_1))
t = librosa.frames_to_time(frames, hop_length=HOP_LENGTH)

In [None]:
#getting time per frame for the audio files
frames = range(0, len(ae_file_1))
t = librosa.frames_to_time(frames, hop_length=HOP_LENGTH)

#plot the five audio files
plt.figure(figsize=(13, 20))

plt.subplot(5,1,1)
librosa.display.waveplot(file_1, alpha = 0.5)
plt.plot(t, ae_file_1, color='r')
plt.title('file_1')
plt.ylim((-0.5,0.5))

plt.subplot(5,1,2)
librosa.display.waveplot(file_2, alpha = 0.5)
plt.plot(t, ae_file_2, color='r')
plt.title('file_2')
plt.ylim((-0.5,0.5))

plt.subplot(5,1,3)
librosa.display.waveplot(file_3, alpha = 0.5)
plt.plot(t, ae_file_3, color='r')
plt.title('file_3')
plt.ylim((-0.5,0.5))

plt.subplot(5,1,4)
librosa.display.waveplot(file_4, alpha = 0.5)
plt.plot(t, ae_file_4, color='r')
plt.title('file_4')
plt.ylim((-0.5,0.5))

plt.subplot(5,1,5)
librosa.display.waveplot(file_5, alpha = 0.5)
plt.plot(t, ae_file_5, color='r')
plt.title('file_5')
plt.ylim((-0.5,0.5))

plt.subplots_adjust(wspace=None, hspace=0.6)

### RMSE - root mean squared energy

In [None]:
rmse_file_1 = librosa.feature.rms(file_1, frame_length = FRAME_SIZE, hop_length = HOP_LENGTH)[0]
rmse_file_2 = librosa.feature.rms(file_2, frame_length = FRAME_SIZE, hop_length = HOP_LENGTH)[0]
rmse_file_3 = librosa.feature.rms(file_3, frame_length = FRAME_SIZE, hop_length = HOP_LENGTH)[0]
rmse_file_4 = librosa.feature.rms(file_4, frame_length = FRAME_SIZE, hop_length = HOP_LENGTH)[0]
rmse_file_5 = librosa.feature.rms(file_5, frame_length = FRAME_SIZE, hop_length = HOP_LENGTH)[0]

#note the shape is the same as the amplitude envelope because we are using the same frame_size and hop_length
rmse_file_1.shape

visualizing the rmse for all the five audio files

In [None]:
#getting time per frame for the audio files
frames = range(0, len(rmse_file_1))
t = librosa.frames_to_time(frames, hop_length=HOP_LENGTH)

#plot the five audio files
plt.figure(figsize=(13, 20))

plt.subplot(5,1,1)
librosa.display.waveplot(file_1, alpha = 0.5)
plt.plot(t, rmse_file_1, color='r')
plt.title('file_1')
plt.ylim((-0.5,0.5))

plt.subplot(5,1,2)
librosa.display.waveplot(file_2, alpha = 0.5)
plt.plot(t, rmse_file_2, color='r')
plt.title('file_2')
plt.ylim((-0.5,0.5))

plt.subplot(5,1,3)
librosa.display.waveplot(file_3, alpha = 0.5)
plt.plot(t, rmse_file_3, color='r')
plt.title('file_3')
plt.ylim((-0.5,0.5))

plt.subplot(5,1,4)
librosa.display.waveplot(file_4, alpha = 0.5)
plt.plot(t, rmse_file_4, color='r')
plt.title('file_4')
plt.ylim((-0.5,0.5))

plt.subplot(5,1,5)
librosa.display.waveplot(file_5, alpha = 0.5)
plt.plot(t, rmse_file_5, color='r')
plt.title('file_5')
plt.ylim((-0.5,0.5))

plt.subplots_adjust(wspace=None, hspace=0.6)

getting the zero_crossing_rate for the five files

In [None]:
zcr_file_1 = librosa.feature.zero_crossing_rate(file_1, frame_length = FRAME_SIZE, hop_length = HOP_LENGTH)[0]
zcr_file_2 = librosa.feature.zero_crossing_rate(file_2, frame_length = FRAME_SIZE, hop_length = HOP_LENGTH)[0]
zcr_file_3 = librosa.feature.zero_crossing_rate(file_3, frame_length = FRAME_SIZE, hop_length = HOP_LENGTH)[0]
zcr_file_4 = librosa.feature.zero_crossing_rate(file_4, frame_length = FRAME_SIZE, hop_length = HOP_LENGTH)[0]
zcr_file_5 = librosa.feature.zero_crossing_rate(file_5, frame_length = FRAME_SIZE, hop_length = HOP_LENGTH)[0]

In [None]:
plt.figure(figsize=(20, 8))

plt.plot(t, zcr_file_1, color='r')
plt.plot(t, zcr_file_2, color='y')
plt.plot(t, zcr_file_3, color='b')
plt.plot(t, zcr_file_4, color='g')
plt.plot(t, zcr_file_5, color='purple')
plt.ylim((0,1))
plt.show()

### Fourier Transform

In [None]:
ft = sp.fft.fft(file_1)
magnitude = np.absolute(ft)
frequency = np.linspace(0, sr, len(magnitude))

In [None]:
#plotting spectrum
plt.figure(figsize=[20,8])
plt.plot(frequency[:5000], magnitude[:5000]) #magnitude spectrum
plt.xlabel('Frequency in HZ')
plt.ylabel('Magnitude')

In [None]:
d_50_samples = 100 * sample_duration #defined earlier in notebook
d_50_samples

In [None]:
samples = range(len(file_1))
t = librosa.samples_to_time(samples, sr=sr)

plt.figure(figsize=[20,8])
plt.plot(frequency[65000:65050], magnitude[65000:65050])
plt.xlabel("time in seconds")
plt.ylabel("amplitude")

creating a sinusoid

In [None]:
f = 523
phase = 0
phase2 = 0.3 #obviously one or zero means sin waves overlap

sin = 0.5 * np.sin(2*np.pi * (f * t - phase))
sin2 = 0.5 * np.sin(2*np.pi * (f * t - phase2))

plt.figure(figsize=[20,8])
plt.plot(t[65000:65400], sin[65000:65400], color='g')
plt.plot(t[65000:65400], sin2[65000:65400], color='r')

trying to overlap the sinusoid and the file_1 waveform but havent had much success. Need to figure this out