# audio segmentation

**ToDo's**
 - [X] implement scallable algorythme
 - [X] write load function for csv files
 - [X] write quality function for relative overlap
 - [ ] test some data

In [1]:
import os
import csv
from datetime import datetime

import librosa
import librosa.display

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

import libfmp.b
import libfmp.c3

from supportClasses import Timer
from supportClasses import Logfile

In [2]:
def loadCSV(path):
    dictionary = {}
    
    with open(path, "r") as file:
        reader = csv.reader(file, delimiter=";")
        for row in reader:
            key = row[0]
            start = int(row[1])
            end = int(row[2])
            dictionary.update({key: [start, end]})

    return dictionary

In [3]:
def relativeOverlap(a_start, a_end, b_start, b_end):    
    if b_start > a_end or a_start > b_end:
        intersection = 0.00
    else:
        intersection = min(a_end, b_end) - max(a_start, b_start)
    
    union = (a_end - a_start) + (b_end - b_start) - intersection
        
    return intersection / union, intersection

In [4]:
t0 = Timer(); t1 = Timer(); t2 = Timer(); timeStamp = datetime.now().strftime("%Y%m%d_%H%M%S")

#results
log = Logfile(r"C:\Users\Christian Stuber\Desktop\assets\audioSegmentation_log_" + timeStamp + ".csv")
results = Logfile(r"C:\Users\Christian Stuber\Desktop\assets\audioSegmentation_results_" + timeStamp + ".csv")
results.writeLine("Y;y;y_start;y_end;y'_start;y'_end;overlap;relOverlap", False)

#reference recording X
X_filePath = r"C:\Users\Christian Stuber\Desktop\assets\data\WAM-79__Track2_Channel1.wav"
X_segmPath = r"C:\Users\Christian Stuber\Desktop\assets\data\WAM-79__Track2_Channel1.csv"

#unknown recordings Y
Y_dirPath = r"C:\Users\Christian Stuber\Desktop\assets\data"
Y_durCut = 4440 #load cutoff because of memory overflow

#parameters
sampleRate = 48000
splitSecond = 10

normalisation = 2
hopLength = int(sampleRate / splitSecond)
frameLength = int(sampleRate / splitSecond * 2)
window = "hann"

In [5]:
#iterate over audio files
fileType = ".wav"
X_segments = loadCSV(X_segmPath)

t0.start()
for Y_name in os.listdir(Y_dirPath):
    if Y_name.lower().endswith(fileType):
        t1.start();
        print("start:    " + Y_name)
        
        t2.start()
        Y_data, Sr = librosa.load(path= Y_dirPath + "\\" + Y_name, sr= sampleRate, duration= Y_durCut) 
        print("load:     %.3fs" % (t2.getDuration()))
        
        t2.start()
        Y_chroma = librosa.feature.chroma_stft(y= Y_data, sr= sampleRate, norm= normalisation, hop_length= hopLength, n_fft= frameLength, window= window, tuning= None)
        print("chroma:   %.3fs" % (t2.getDuration()))

        testData = loadCSV(Y_dirPath + "\\" + Y_name.replace(fileType, ".csv"))

        print("segments: y\t¦y_st\t¦y_end\t¦y'_st\t¦y'_end\t¦ovLap\t¦relOvlap")
        for key, x in X_segments.items():
            x_data, sr = librosa.load(path=X_filePath, sr=sampleRate, offset=x[0], duration=x[1]-x[0])
            x_chroma = librosa.feature.chroma_stft(y= x_data, sr= sampleRate, norm= normalisation, hop_length= hopLength, n_fft= frameLength, window= window, tuning= None)

            #dynamic time warping
            costMatrix, warpingPath = librosa.sequence.dtw(x_chroma, Y_chroma, metric='euclidean',
                                step_sizes_sigma=np.array([[1, 1], [0, 1], [1, 0]]),
                                weights_add=np.array([0, 0, 0]),
                                weights_mul=np.array([1, 1, 1]),
                                subseq=True)
            
            #results
            y_start = int(round(warpingPath[-1][1] / splitSecond))
            y_end = int(round(warpingPath[0][1] / splitSecond))
            ys_start = testData[key][0]
            ys_end = testData[key][1]
            quality, intersection = relativeOverlap(y_start, y_end, ys_start, ys_end)

            #save data
            results.writeLine("%s;%s;%i;%i;%i;%i;%i;%f" % (Y_name, key, y_start, y_end, ys_start, ys_end, intersection, quality), False)
            print("          %s\t¦%i\t¦%i\t¦%i\t¦%i\t¦%i\t¦%.2f" % (key, y_start, y_end, ys_start, ys_end, intersection, quality))
            
        print("total:    " + str(int(round(t1.getDuration()))) + "s\n")

print("FINISHED: total duration = " + str(int(round(t0.getDuration()))) + "s")

start:    WAM-76__Track1_Channel2.wav
load:     6.792s
chroma:   24.999s
segments: y	¦y_st	¦y_end	¦y'_st	¦y'_end	¦ovLap	¦relOvlap
          1.01	¦2	¦383	¦0	¦387	¦381	¦0.98
          1.02	¦395	¦758	¦395	¦815	¦363	¦0.86
          1.03	¦814	¦820	¦823	¦838	¦0	¦0.00
          1.04	¦848	¦923	¦839	¦998	¦75	¦0.47
          1.05	¦997	¦1037	¦1007	¦1274	¦30	¦0.11
          1.06	¦2644	¦2694	¦1275	¦1511	¦0	¦0.00
          1.07	¦1510	¦1520	¦1520	¦1539	¦0	¦0.00
          1.08	¦1557	¦1610	¦1540	¦1610	¦53	¦0.76
          1.09	¦1613	¦1820	¦1611	¦1820	¦207	¦0.99
          1.10	¦999	¦1002	¦1829	¦1846	¦0	¦0.00
          1.11	¦1850	¦2192	¦1847	¦2196	¦342	¦0.98
          1.12	¦2769	¦2775	¦2205	¦2250	¦0	¦0.00
          1.13	¦2261	¦2348	¦2251	¦2353	¦87	¦0.85
          1.14	¦814	¦826	¦2354	¦2562	¦0	¦0.00
          1.15	¦2665	¦2766	¦2563	¦2768	¦101	¦0.49
          1.16	¦2784	¦2872	¦2778	¦2878	¦88	¦0.88
          1.17	¦1060	¦1199	¦2879	¦3384	¦0	¦0.00
          1.18	¦3829	¦3871	¦3386	¦3575	¦0	¦0.00
          1.19a