In [2]:
# Feature extraction for sound analysis of birdsong
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa

from scipy.io import wavfile
import scipy.signal as signal
import IPython.display as ipd

In [None]:
sound_file = 'Data/2023_03_31_7_03_30.wav'
fs, audio = wavfile.read(sound_file)
t_audio = np.linspace(0, len(audio)/fs, num=len(audio))

# Bandpass filter the audio signal
nyq = 0.5 * fs
low = 300 / nyq
high = 20000 / nyq
order = 5
b, a = signal.butter(order, [low, high], btype='band')

audio_clip = audio
ipd.Audio(sound_file)

In [4]:
import SAP_features as SAP

# Get the entropy of the wav file using a sliding window
window_size = 1323
window_step = 165

sap = SAP.SAP_features(audio, fs, window_size, window_step)
ent = sap.entropy
sd = sap.spectral_derivative

# Only include Sxx indices where the ent value is above 0.4
sounds = sd[:,ent >= 0.4]

In [None]:
import umap

clusterable_embedding = umap.UMAP(
        n_neighbors=50,
        min_dist=0.5,
        n_components=2,
        metric='canberra',
        n_jobs=-1,
        ).fit_transform(sounds.T)

plt.figure(figsize=(10,10))
plt.scatter(clusterable_embedding[:, 0],
            clusterable_embedding[:, 1],
            s=1,
            alpha=0.1,
            color='black')

In [None]:
# Make a video over the umap embedding, where the point on the graph changes color when it is played in the audio
# Variables used: t, clusterable_embedding, ent

# Make the graphic 30 fps
fps = 30
video_t = np.arange(0, t[-1], 1/fps)
num_frames = 30*60

idx_buffer = []
buffer_counter = []
buffer_thresh = 10

for i in range(num_frames): #range(len(video_t)):
    # Find the index of the closest point to the current time
    plt.scatter(clusterable_embedding[:, 0],
                clusterable_embedding[:, 1],
                color='black',
                s=1,
                alpha=0.1)

    idx = np.argwhere(np.abs(t - video_t[i]) < 1/fps)
    idx = [x in idx if ent[x] >= 0.4]

    idx_buffer.append(idx)
    buffer_counter.append(np.zeros(len(idx)))

    plt.scatter(clusterable_embedding[idx, 0],
                clusterable_embedding[idx, 1],
                s=5,
                color='blue')
    plt.ylim([0, 20])
    plt.xlim([-7.5, 15])
    plt.axis('off')
    plt.savefig('Data/umap_video/' + str(i) + '.png')
    plt.clf()

#     idx_buffer = idx_buffer[old_buffer]
#     buffer_counter = buffer_counter[old_buffer]


In [22]:
# Take the images from the umap_video folder and make a video
import cv2

img_array = []
for i in range(num_frames):
    filename = 'Data/umap_video/' + str(i) + '.png'
    img = cv2.imread(filename)
    height, width, layers = img.shape
    size = (width,height)
    img_array.append(img)

out = cv2.VideoWriter('Data/umap_video/umap_video.avi', cv2.VideoWriter_fourcc(*'DIVX'), 30, size)

# Save video
for i in range(len(img_array)):
    out.write(img_array[i])

out.release()