# Assign distance score interactive

Prompts user input for call pairs that cannot be clearly be assigned to same-call or different-call

In [1]:
import pandas as pd
import os
import numpy as np
import pickle
import json
from scipy import stats
import matplotlib.pyplot as plt

# sound file generation
import soundfile as sf
from scipy.io import wavfile
import scipy.signal as sps

from tkinter import Tk, Frame, Label, Button, Entry
from pygame import mixer
from PIL import ImageTk, Image
from glob import glob
from pandas.core.common import flatten
import tkinter as tk
from IPython.display import Audio
from scipy.spatial import distance
import librosa
import librosa.display

pygame 2.0.1 (SDL 2.0.14, Python 3.7.10)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
# for later
HOME = os.getcwd()+"/"
OUTDIR = os.getcwd()+"/"
CANDIDATES_LABELFILE = HOME+"candidates_labelfile.pkl"
F_NF_FILE = HOME+'f_nf.csv'

# Create wav and spec_img folders
os.mkdir('resolver_quiz')
os.chdir('resolver_quiz')
dirs2create = ['wavs', 'spec_imgs']
for dirpath in dirs2create:
    if not os.path.exists(dirpath):
    os.mkdir(dirpath)
os.chdir(HOME)
    
WAV_FOLDER = HOME+"resolver_quiz/wavs/"
IMG_FOLDER = HOME+"resolver_quiz/spec_imgs/"


# for now
HOME = "/Users/marathomas/Documents/MPI_work/code/"
OUTDIR = "/Volumes/MaraMeerkat/"

# these should have been generated with 01_identify_focal_conflicts
CANDIDATES_LABELFILE = "/Volumes/MaraMeerkat/candidates_labelfile.pkl"
# File that contains distance scores between call pairs
# generated with 02_assign_distances
F_NF_FILE = '/Volumes/MaraMeerkat/f_nf.csv'

WAV_FOLDER = "/Volumes/MaraMeerkat/resolver_quiz/wavs/"
IMG_FOLDER = "/Volumes/MaraMeerkat/resolver_quiz/spec_imgs/"




CUTOFF = 0.25 # cutoff score for same-different. Using a very conservative cutoff here in order to avoid false-positives
             # I want to correct potential errors and not introduce new ones... thus only label nonfocal if I am pretty
             # SURE that this call is the same as another one.
RELAX_CUTOFF = 0.35 # Upper bound - only require user input for calls between CUTOFF and RELAX_CUTOFF

In [3]:
# Function that pads a spectrogram with zeros to a certain length
# Input: spectrogram (2D np array)
#        maximal length (Integer)
# Output: Padded spectrogram (2D np array)
def pad_spectro(spec,maxlen):
    padding = maxlen - spec.shape[1]
    z = np.zeros((spec.shape[0],padding))
    padded_spec=np.append(spec, z, axis=1)
    return padded_spec

In [4]:
f_nf = pd.read_csv(F_NF_FILE, sep="\t", index_col=0)

# labelfile of all potentially conflicting calls
labelfile = pd.read_pickle(CANDIDATES_LABELFILE)
labelfile.shape

(3193, 35)

In [5]:
# Select all with distance score between CUTOFF and RELAX_CUTOFF and let them be assigned manually!

unclear_df = f_nf.loc[(f_nf['dist_score']>CUTOFF) & (f_nf['dist_score']<RELAX_CUTOFF),:]
files_we_need = list(set(list(unclear_df.call_a.values)+list(unclear_df.call_b.values)))
labelfile = labelfile.loc[labelfile.callID_new.isin(files_we_need),:]

unclear_df

Unnamed: 0,call_a,call_b,dist_score,intense_a,intense_b
14,20170806_VHMF001_01_11_08_395_0_00_059_sn,20170806_VHMM003_01_11_23_234_0_00_058_sn,0.276226,-34.027026,-38.562342
21,20170806_VHMF001_01_36_14_840_0_00_154_unk,20170806_VHMM003_01_36_30_295_0_00_140_soc,0.338455,-40.511288,-34.945054
27,20170806_VHMF001_01_51_44_070_0_00_068_sn,20170806_VHMM006_01_52_14_795_0_00_082_sn,0.341427,-35.694063,-43.931466
39,20170806_VHMM002_01_51_57_620_0_00_201_unk,20170806_VHMM006_01_52_22_609_0_00_215_unk_*,0.261807,-45.727865,-36.506488
75,20170823_VHMM002_00_49_11_887_0_00_038_sn,20170823_VHMM007_00_49_08_707_0_00_031_sn,0.315335,-38.166317,-14.703301
...,...,...,...,...,...
1806,20190719_VHMF015_01_22_59_568_0_00_124_cc,20190719_VHMM023_01_25_41_897_0_00_150_cc,0.348921,-44.689925,-20.258578
1830,20190719_VHMM007_02_00_24_780_0_00_161_al,20190719_VHMM016_02_00_58_554_0_00_129_al_*,0.272317,-18.133032,-26.727692
1834,20190719_VHMM008_01_57_14_161_0_00_217_fu_cc+s...,20190719_VHMM016_01_56_46_852_0_00_210_fu_cc+agg,0.341091,-33.644835,-13.797516
1835,20190719_VHMM014_01_18_40_546_0_00_136_fu_cc+agg,20190719_VHMM016_01_18_30_400_0_00_123_fu_cc+a...,0.279708,-21.608040,-29.735975


# Generate images and wav files for dist score quiz

### Generate wavs

In [None]:
SR = 8000

def write_wav(uid, data, sr):
    filename = str(uid)+'.wav'
    if sr == SR:
        sf.write(filename, data, sr, subtype='PCM_16')
    else: # resample
        #print("Resampled for "+filename)
        number_of_samples = int(round(len(data) * float(SR) / sr))
        data = sps.resample(data, number_of_samples)
        sf.write(filename, data, SR, subtype='PCM_16')
        

os.chdir(WAV_FOLDER)
x=labelfile.apply(lambda row: write_wav(row['callID_new'], row['raw_audio'], row['samplerate_hz']), axis=1)

### Generate images

In [7]:
def pad_spectro(spec,maxlen):
    padding = maxlen - spec.shape[1]
    z = np.zeros((spec.shape[0],padding))
    padded_spec=np.append(spec, z, axis=1)
    return padded_spec

In [None]:
import pylab

n_ticks=3
FFT_WIN = 0.03
FFT_HOP = FFT_WIN/8
FMAX = 4000
MAX_FRAMES = 100 # all specs smaller than that will be padded
                 # all specs longer than that will not be affected

def write_img(uid, spec, sr):
    HOP_LEN = int(0.03*sr) # this is wrong I think
    
    
    outname = str(uid)+".jpg" 
    plt.figure()
    
    if(spec.shape[1]<MAX_FRAMES):
        spec = pad_spectro(spec,MAX_FRAMES)
    librosa.display.specshow(spec,sr=sr, hop_length=HOP_LEN , fmax=FMAX, y_axis='mel', cmap='inferno')
    n_frames = spec.shape[1]
    duration = FFT_HOP*spec.shape[1]
    step_size = round(duration/n_ticks,2)
    myticks = np.arange(0,duration, step_size)
    plt.xticks([round(x/FFT_HOP,0) for x in myticks], [str(round(x,2)) for x in myticks])

    pylab.savefig(outname, bbox_inches=None, pad_inches=0)
    pylab.close()

os.chdir(IMG_FOLDER)
x=labelfile.apply(lambda row: write_img(row['callID_new'],
                                        row['denoised_spectrograms'],
                                        row['samplerate_hz']), 
                  axis=1)

# Quiz

In [8]:
# Make audio dict
audios = sorted(glob(WAV_FOLDER+'*.wav'))
ids = [os.path.basename(x).split('.')[0] for x in audios]
audio_dict = dict(zip(ids, audios))

# Make imgs dict
imgs = sorted(glob(IMG_FOLDER+'*.jpg'))
ids = [os.path.basename(x).split('.')[0] for x in imgs]
img_dict = dict(zip(ids, imgs))

# Make spectrogram dict
spec_dict = dict(zip(labelfile.callID_new, labelfile.denoised_spectrograms))

In [9]:
# make all pairs
all_pairs = []
for i in range(unclear_df.shape[0]):
    pair = [unclear_df.iloc[i,:].call_a, unclear_df.iloc[i,:].call_b]
    all_pairs.append(pair)

# list of all dists
all_dists = list(unclear_df.dist_score)

In [None]:
os.chdir(HOME)
SR=8000
answers = []
dists = []

def check(letter, view):
    answers.append(letter)
    with open('dist_score_improvement.txt', 'w') as f:
        for pair, a in zip(all_pairs[0:len(answers)],answers):
            item = pair[0]+';'+pair[1]+';'+a
            f.write("%s\n" % item)
    f.close()
    unpackView(view)
    

def getView(window):
    global index
    view = Frame(window)
    
    dist = all_dists[index]
    label = Label(view, text=str(round(dist,4)))
    label.pack(side="top")
                
    label = Label(view, text="Same or different call?")
    label.pack(side="top")        
        
    for choice in ['same', 'different']:
        button_x = Button(view, text=choice, command=lambda choice=choice: check(choice, view))
        button_x.pack(side="top")
    
    spacer = Label(view, text=' ')
    spacer.pack(side="top")
    
    info = Label(view, text=all_pairs[index][0])
    info.pack(side="top")
        
    button_m = Button(view, text="Play sound",command=play_music)
    button_m.pack(side="top")
        
    # Show spec
    img = show_image()
    spec_img = Label(view, image=img)
    spec_img.pack(side = "top")
    
    info = Label(view, text=all_pairs[index][1])
    info.pack(side="top")
    
    button_m = Button(view, text="Play nb sound",command=play_nb_music)
    button_m.pack(side="top")
    
    # Show nb spec
    nb_img = show_nb_image()
    nb_spec_img = Label(view, image=nb_img)
    nb_spec_img.pack(side = "top")

    return view

    
def unpackView(view):
    global window
    view.pack_forget()
    askQuestion()

def askQuestion():
    global window, index, button
    if(number_of_questions == index + 1):
        Label(window, text="Thank you. You can close the window.").pack()
        return
    button.pack_forget()
    index += 1
    getView(window).pack()

index = -1
right = 0
number_of_questions = len(all_pairs)

mixer.pre_init(SR, -16, 1, 262144)
mixer.init()

def play_music():
    global questions, window, index, button, right, number_of_questions
    bout_id = all_pairs[index][0]
    path = audio_dict[bout_id]
    mixer.music.load(path)
    mixer.music.play()
    
def play_nb_music():
    global questions, window, index, button, right, number_of_questions
    bout_id = all_pairs[index][1]
    path = audio_dict[bout_id]
    mixer.music.load(path)
    mixer.music.play()

def show_image():
    global img, index
    bout_id = all_pairs[index][0]
    path = img_dict[bout_id]
    image1 = Image.open(path)
    image1 = image1.resize((360 , 240), Image.ANTIALIAS)
    img = ImageTk.PhotoImage(image1)
    return img

def show_nb_image():
    global nb_img, index
    bout_id = all_pairs[index][1]
    path = img_dict[bout_id]
    image1 = Image.open(path)
    image1 = image1.resize((360 , 240), Image.ANTIALIAS)
    nb_img = ImageTk.PhotoImage(image1)
    return nb_img

    
window = Tk()
window.title('Meerkat Sound Classification')
window.geometry("800x650")
button = Button(window, text="Start", command=askQuestion)
button.pack()

window.mainloop()

# Read in answers and update f_nf table

In [40]:
manual_assignment = pd.read_csv(HOME+'dist_score_improvement.txt', sep=";", header=None)
manual_assignment.columns = ['call_a', 'call_b', 'manual_assignment']
manual_assignment

Unnamed: 0,call_a,call_b,manual_assignment
0,20170806_VHMF001_01_11_08_395_0_00_059_sn,20170806_VHMM003_01_11_23_234_0_00_058_sn,different
1,20170806_VHMF001_01_36_14_840_0_00_154_unk,20170806_VHMM003_01_36_30_295_0_00_140_soc,same
2,20170823_VHMM002_00_49_11_887_0_00_038_sn,20170823_VHMM007_00_49_08_707_0_00_031_sn,same
3,20170823_VHMM002_01_09_08_832_0_00_046_sn,20170823_VHMM006_01_09_17_840_0_00_057_sn,same
4,20170823_VHMM002_01_10_43_259_0_00_095_cc,20170823_VHMF001_01_10_59_741_0_00_097_cc,different
5,20170823_VHMF001_01_09_54_850_0_00_128_cc,20170823_VHMM007_01_09_35_388_0_00_143_cc,different
6,20170824_VHMM002_01_06_03_434_0_00_128_cc,20170824_2_VHMM003_00_58_28_222_0_00_165_cc,same
7,20170825_VHMM002_01_01_57_014_0_00_122_cc_14,20170825_VHMF001_01_02_17_605_0_00_122_cc_05,same
8,20170825_VHMM007_01_16_01_518_0_00_063_sn,20170825_3_VHMM003_00_03_32_659_0_00_052_sn,different
9,20170825_VHMM007_01_51_31_464_0_00_164_cc_675,20170825_VHMM006_01_51_50_495_0_00_162_cc_292,different


In [41]:
for call_a, call_b, assignment in zip(manual_assignment.call_a, manual_assignment.call_b, manual_assignment.manual_assignment):   
    score = 0 if assignment=="same" else 1
    f_nf.loc[(f_nf['call_a']==call_a) & (f_nf['call_b']==call_b),'dist_score'] = score

In [None]:
# overwrite existing f_nf file
f_nf.to_csv(F_NF_FILE, sep="\t")