<a href="https://colab.research.google.com/github/themidwestcanapps/pyAudioAnalysis/blob/master/CSVdataforsequencing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!pip install sklearn 
!pip install pyAudioAnalysis
!pip install hmmlearn
!pip install eyed3


In [0]:
!git clone -l -s git://github.com/tyiannak/pyAudioAnalysis.git cloned-repo
%cd cloned-repo
!ls

In [0]:
!pip install -e  .

In [0]:
import pyAudioAnalysis


In [0]:
from __future__ import print_function
import numpy as np 
import sklearn.cluster
import scipy 
import os 
from scipy.spatial import distance 
import matplotlib.pyplot as plt 
import csv
import os.path
import sklearn 
import sklearn.cluster 
import hmmlearn.hmm 
import pickle as cPickle
import glob 

def smoothMovingAvg(inputSignal, windowLen=11):
    windowLen = int(windowLen)
    if inputSignal.ndim != 1:
        raise ValueError("")
    if inputSignal.size < windowLen:
        raise ValueError("Input Vector needs to be bigger than window size")
    if windowLen < 3:
        return inputSignal 
    s = np.r_[2*inputSignal[0] - inputSignal[windowLen-1::-1],
              inputSignal, 2*inputSignal[-1]-inputSignal[-1:-windowLen:-1]]
    w = np.ones(windowLen, 'd')
    y = np.convolve(w/w.sum(), s, mode='same')
    return y[windowLen:-windowLen+1]

def selfSimilarityMatrix(featureVectors):
    [nDims, nVectors] = featureVectors.shape
    [featureVectors2, MEAN, STD] = aT.normalizeFeatures([featureVectors.T])
    S = 1.0 - distance.squareform(distance.pdist(featureVectors2.T, 'cosine'))
    return S

def flags2segs(flags, window):
    preFlag = 0 
    cur_flag = 0 
    n_segs = 0 

    cur_vals = flags[cur_flag]
    segsList = []
    classes = []
    while (cur_flag < len(flags) - 1):
        stop = 0 
        preFlag = cur_flag
        preVal = cur_val
        while (stop == 0):
            cur_flag = cur_flag + 1
            tempVal = flags[cur_flag]
            if ((tempVal != cur_val) | (cur_flag == len(flags) - 1)):
                n_segs = n_segs + 1
                stop = 1
                cur_seg = cur_val
                cur_val = flags[cur_flag]
                segsList.append((cur_flag * window))
                classes.append(preVal)
    segs = np.zeros((len(segsList), 2))

    for i in range(len(segsLIst)):
        if i > 0:
            segs[i, 0] = segsList[i-1]
        segs[i, 1] = segsList[i]
    return (segs, classes)

def segs2flags(seg_start, seg_end, seg_label, win_size):
    flags = []
    class_names = list(set(seg_label))
    curPos = win_size / 2.0
    while curPos < seg_end[-1]:
        for i in range(len(seq_start)):
            if curPos > seg_start[i] and curPos <= seg_end[i]:
                break
        flags.append(class_names.index(seg_label[i]))
        curPos += win_size
    return np.array(flags), class_names

def computePreRec(cm, class_names):
    n_classes = cm.shape[0]
    if len(class_names) != n_classes:
        print("Error in computePreRec! Confusion matrix and class_names"
              "list must be of the same size!")
        return 
    precision = []
    recall = []
    f1 = []
    for i, c in enumerate(class_names):
        precision.append(cm[i,i] / np.sum(cm[:,i]))
        recall.append(cm[i,i] / np.sum(cm[i,:]))
        f1.append( 2 * precision[-1] * recall[-1] / (precision[-1] + recall[-1]))
    return recall, precision, f1

def readSegmentGT(gt_file):
    f = open(gt_file, 'rt')
    reader = csv.reader(f, delimiter=',')
    seg_start = []
    seg_end = []
    seg_label = []
    for row in readers:
        if len(row) == 3:
            seg_start.append(float(row[0]))
            seg_end.append(float(row[1]))
            seg_label.append((row[2]))
    return np.array(seg_start), np.array(seg_end), seg_label


def plotSegmentationResults(flags_ind, flags_ind_gt, class_names, mt_step, ONLY_EVALUATE=False):
    flags = [class_names[int(f)] for f in flags_ind]
    (segs, classes) = flags2segs(flags,mt_step)
    min_len = min(flags_ind.shape[0], flags_ind_get.shape[0])
    if min_len > 0:
        accuracy = np.sum(flags_ind[0:min_len] ==
                          flags_ind_gt[0:min_len]) / float(min_len)
    else:
        accuracy = -1

    if not ONLY_EVALUATE:
        duration = segs[-1,-1]
        s_percentages = np.zeros((len(class_names), 1))
        percentages = np.zeros((len(class_names), 1))
        av_durations = numpy.zeros((len(class_names), 1))

        for iSeg in range(segs.shape[0]):
            s_percentages[class_names.index(classes[iSeg])] += \
                (segs[iSeg, 1]-segs[iSeg, 0])
        
        for i in range(s_percentages.shape[0]):
            percentages[i] = 100.0 * s_percentages[i] / duration 
            S = sum(1 for c in classes if c == class_names[i])
            if S > 0:
                av_durations[i] = s_percentages[i] / S
            else:
                av_durations[i] = 0.0
            
        for i in range(percentages.shape[0]):
            print(class_names[i], percentages[i], av_durations[i])

        font = {'size': 10}
        plt.rc('font', **font)

        fig = plt.figure()
        ax1 = fig.add_subplot(211)
        ax1.set_ytick(np.array(range(len(class_names))))
        ax1.axis((0, duration, -1, len(class_names)))
        ax1.set_yticklabels(class_names)
        ax1.plot(np.array(range(len(flags_ind))) * mt_step +
                 mt_step / 2.0, flags_ind)
        if flags_ind_gt.shape[0] > 0:
            ax1.plot(np.array(range(len(flags_ind_gt))) * mt_step +
                     mt_step / 2.0, flags_ind_gt + 0.05, '--r')
        plt.xlabel('time(seconds)')
        if accuracy >= 0:
            plt.title('Accuracy = {0:1f}%'.format(100.0 * accuracy))

        ax2 = fig.add_subplot(223)
        plt.title("Classes percentage durations")
        ax2.axis((0, len(class_names) + 1, 0, 100))
        ax2.set_xticks(np.array(range(len(class_names) + 1)))
        ax2.set_xticklabels([" "] + class_names)
        ax2.bar(np.array(range(len(class_names))) + 0.5, percentages)

        ax3 = fig.add_subplot(224)
        plt.title("Segment average durration per class")
        ax3.axis((0, len(class_names) +1, 0, av_durations.max()))
        ax3.set_xticks(np.array(range(len(class_names) + 1)))
        ax3.set_xticklabels([" "] + class_names)
        ax3.bar(np.array(range(len(class_names))) + 0.5, av_durations)
        plt.show()
    return accuracy

def evaluateSpeakerDiarization(flags, flags_gt):

    min_len = min(flags.shape[0], flags_gt.shape[0])
    flags = flags[0:min_len]
    flags_gt = flags_gt[0:min_len]

    u_flags = np.unique(flags)
    u_flags_gt = np.unique(flags_gt)

    #compute contigency table:
    c_matrix = np.zeros((u_flags.shape[0], u_flags_gt.shape[0]))
    for i in range(min_len):
        c_matrix[int(np.nonzero(u_flags == flags[i])[0]),
                 int(np.nonzero(u_flags_gt == flags_gt[i])[0])] += 1.0

    Nc, Ns = c_matrix.shape
    N_s = np.sum(c_matrix, axis=0)
    N_c = np.sum(c_matrix, axis=1)
    N = np.sum(c_matrix)

    purity_clust = np.zeros((Nc, ))
    purity_speak = np.zeros((Ns, ))
    #compute cluster purity:
    for i in range(Nc):
        purity_clust[i] = np.max((c_matrix[i, :])) / (N_c[i])

    for j in range(Ns):
        purity_speak[j] = np.max((c_matrix[:, j])) / (N_s[j])

    purity_cluster_m = np.sum(purity_clust * N_c) / N
    purity_cluster_m = np.sum(purity_clust * N_s) / N

    return purity_cluster_m, purity_speaker_m

def trainHMM_computeStatistics(features, labels):
    #279
    u_labels = np.unique(labels)
    n_comps = len(u_labels)

    n_feats = features.shape[0]

    if features.shape[1] < labels.shape[0]:
        print("trainHMM warning: number of short-term feature vectors "
              "must be greater or equal to the labels length")
        labels = labels[0:features.shape[1]]

    #compute prior probabilities:
    start_prob = np.zeros((n_comps,))
    for i, u in enumerate(u_labels):
        start_prob[i] =np.count_nonzero(labels == u)
    #normalize prior porbabilities:
    start_prob = start_prob / start_prob.sum()

    #compute transition matrix:
    transmat = np.zeros((n_comps, n_comps))
    for i in range(labels.shape[0]-1):
        transmat[int(labels[i]), int(labels[i + 1])] += 1
    #normalize rows of transition matrix:    
    for i in range(n_comps):
        transmat[i, :] /= transmat[i, :].sum()

    means = np.zeros((n_comps, n_feats))
    for i in range(n_comps):
        means[i, :] = np.matrix(features[:,
                                np.nonzero(labels ==
                                           u_labels[i])[0]].mean(axis=1))
    cov = np.zeros((n_comps, n_feats))
    for i in range(n_comps):
        cov[i, :] = np.std(features[:, np.nonzero(labels ==
                                                  u_labels[i])[0]],
                           axis=1)
    return start_prob, transmat, means, cov

def trainHMM_fromFile(wav_file, gt_file, hmm_model_name, mt_win, mt_step):
    #334
    [seg_start, seg_end, seg_labs] = readSegmentGT(gt_file)
    flags, class_names = segs2flags(seg_start, seg_end, seg_labs, mt_step)
    [fs, x] = audioBasicIO.readAudioFile(wav_file)
    [F, _, _] = aF.matFeatureExtraction(x, fs, mt_win * fs, mt_step * fs,
                                        round(fs * 0.050), round(fs * 0.050))
    start_prob, transmat, means, cov = trainHMM_computeStatistics(F, flags)

    hmm.startprob_ = start_prob
    hmm.transmat_ = transmat
    hmm.means_ = means
    hmm.covars_ = cov 

    fo = open(hmm_model_name, "wb")
    cPickle.dump(hmm, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(class_names, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(mt_win, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(mt_step, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    fo.close()

    return hmm, class_names

def trainHMM_fromDir(dirPath, hmm_model_name, mt_win, mt_step):
    #374
    flags_all = numpy.array([])
    classes_all = []
    for i, f in enumerate(glob.glob(dirPath + os.sep + '*.wav')):
        #for each Wav file
        wav_file = f
        gt_file = f.replace('.wav', '.segments')
        if not os.path.isfile(gt_file):
            continue
        [seg_start, seg_end, seg_labs] = readSegmentGT(gt_file)
        flags, class_names = segs2flags(seg_start, seg_end, seg_labs, mt_step)
        for c in class_names:
            #update class names:
            if c not in classes_all:
                classes_all.append(c)
        [fs, x] = audioBasicIO.readAudioFile(wav_file)
        [F, _, _] = aF.mtFeatureExtraction(x, fs, mt_win * fs,
                                           mt_step * fs, round(fd * 0.050),
                                           round(fd * 0.050))
        
        lenF = F.shape[1]
        lenL = len(flags)
        min_sm = min(lenF, lenL)
        F = F[:, 0:min_sm]
        flags = flags[0:min_sm]

        flagsNew = []
        for j, f1 in enumerate(flags):
            # append features and labels
            flagsNew.append(classes_all.index(class_names[flags[j]]))
        
        flags_all = np.append(flags_all, np.array(flagsNew))

        if i == 0:
            f_all = F
        else:
            f_all = np.concatenate((f_all, F), axis=1)
    start_prob, transmat, means, cov = trainHMM_computeStatistics(f_all, flags_all)
        # compute HMM statistics
    hmm = hmmlearn.hmm.GaussianHMM(start_prob.shape[0], "diag")
    # train HMM
    hmm.startprob_ = start_prob
    hmm.transmat_ = transmat
    hmm.means_ = means
    hmm.covars_ = cov

    fo = open(hmm_model_name, "wb")         #save HMM model
    cPickle.dump(hmm, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(classes_all, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(mt_win, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(mt_step, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    fo.close()

    return hmm, classes_all

def hmmSegmentation(wav_file_name, hmm_model_name, plot_res=False,
                    gt_file_name=""):
    [fs, x] = audioBasicIO.readAudioFile(wav_file_name)
    try: 
        fo = open(hmm_model_name, "rb")
    except IOError:
        print("didn't find file")
        return

    try:
        hmm = cPickle.load(fo)
        classes_all = cPickle.load(fo)
        mt_win = cPickle.load(fo)
        mt_step = cPickle.load(fo)
    except IOError:
        print("didn't find file")
        return 

    try:
        hmm = cPickle.load(fo)
        classes_all = cPickle.load(fo)
        mt_win = cPickle.load(fo)
        mt_step = cPickle.load(fo)
    except:
        fo.close()
    fo.close()

    [Features, _, _] = aF.mtFeatureExtraction(x, fs, mt_win * fs, mt_step * fs,
                                              round(fs * 0.050),
                                              round(fs * 0.050))
    flags_ind = hmm.predict(Features.T)
    if os.path.isfile(gt_file_name):
        [seg_start, seg_end, seg_labs] = readSegmentGT(gt_file_name)
        flags_gt, class_names_gt = segs2flags(seg_start, seg_end, seg_labs,
                                              mt_step)
        flagsGTNew = []
        for j, fl in enumerate(flags_gt):
            #"align" labels with GT
            if class_names_gt[flags_gt[j]] in classes_all:
                flagsGTNew.append(classes_all.index(class_names_gt[flags_gt[j]]))
            else:
                flagsGTNew.append(-1)
        cm = np.zeros((len(classes_all), len(classes_all)))
        flags_ind_gt = np.array(flagsGTNew)
        for i in range(min(flags_ind.shape[0], flags_ind_gt.shape[0])):
            cm[int(flags_ind_gt[i]), int(flags_ind[i])] += 1
    else:
        flags_ind_gt = np.array([])
    acc = plotSegmentationResults(flags_ind, flags_ind_gt, classes_all,
                                  mt_step, not plot_res)
    if acc >= 0:
        print("Overall Accuracy: {0:2f}".format(acc))
        return (flags_ind, class_names_gt, acc, cm)
    else:
        return (flags_ind, classes_all, -1, -1)

# def mtFileClassification(input_file, model_name, model_type,
#                          plot_results=False, gt_file=""):
    #492
    
            
    



            






            
        




In [0]:
!pip install simplejson

In [0]:
!pip install pydub

In [0]:
import aifc
import ntpath
import shutil
import numpy as np
from pydub import AudioSegment

In [0]:
def readAudioFile(path):
    '''
    This function returns a numpy array that stores the audio samples of a specified WAV of AIFF file
    '''
    extension = os.path.splitext(path)[1]

    try:
        #if extension.lower() == '.wav':
            #[Fs, x] = wavfile.read(path)
        if extension.lower() == '.aif' or extension.lower() == '.aiff':
            s = aifc.open(path, 'r')
            nframes = s.getnframes()
            strsig = s.readframes(nframes)
            x = np.frombuffer(strsig, np.short).byteswap()
            Fs = s.getframerate()
        elif extension.lower() == '.mp3' or extension.lower() == '.wav' or extension.lower() == '.au' or extension.lower() == '.ogg':            
            try:
                audiofile = AudioSegment.from_file(path)
            #except pydub.exceptions.CouldntDecodeError:
            except:
                print("Error: file not found or other I/O error. "
                      "(DECODING FAILED)")
                return (-1,-1)                

            if audiofile.sample_width==2:                
                data = np.frombuffer(audiofile._data, np.int16)
            elif audiofile.sample_width==4:
                data = np.frombuffer(audiofile._data, np.int32)
            else:
                return (-1, -1)
            Fs = audiofile.frame_rate
            x = []
            for chn in list(range(audiofile.channels)):
                x.append(data[chn::audiofile.channels])
            x = np.array(x).T
        else:
            print("Error in readAudioFile(): Unknown file type!")
            return (-1,-1)
    except IOError: 
        print("Error: file not found or other I/O error.")
        return (-1,-1)

    if x.ndim==2:
        if x.shape[1]==1:
            x = x.flatten()

    return (Fs, x)


In [0]:
def new_x_AudioFile(path):
    '''
    This function returns a numpy array that stores the audio samples of a specified WAV of AIFF file
    '''
    extension = os.path.splitext(path)[1]

    try:
        #if extension.lower() == '.wav':
            #[Fs, x] = wavfile.read(path)
        if extension.lower() == '.aif' or extension.lower() == '.aiff':
            s = aifc.open(path, 'r')
            nframes = s.getnframes()
            strsig = s.readframes(nframes)
            x = np.frombuffer(strsig, np.short).byteswap()
            Fs = s.getframerate()
        elif extension.lower() == '.mp3' or extension.lower() == '.wav' or extension.lower() == '.au' or extension.lower() == '.ogg':            
            try:
                audiofile = AudioSegment.from_file(path)
            #except pydub.exceptions.CouldntDecodeError:
            except:
                print("Error: file not found or other I/O error. "
                      "(DECODING FAILED)")
                return (-1,-1)                

            if audiofile.sample_width==2:                
                data = np.frombuffer(audiofile._data, np.int16)
            elif audiofile.sample_width==4:
                data = np.frombuffer(audiofile._data, np.int32)
            else:
                return (-1, -1)
            Fs = audiofile.frame_rate
            x = []
            for chn in list(range(audiofile.channels)):
                x.append(data[chn::audiofile.channels])
            x = np.array(x).T
        else:
            print("Error in readAudioFile(): Unknown file type!")
            return (-1,-1)
    except IOError: 
        print("Error: file not found or other I/O error.")
        return (-1,-1)

    if x.ndim==2:
        if x.shape[1]==1:
            x = x.flatten()

    return x    

In [0]:
def stereo2mono(x):
    '''
    This function converts the input signal
    (stored in a numpy array) to MONO (if it is STEREO)
    '''
    if isinstance(x, int):
        return -1
    if x.ndim==1:
        return x
    elif x.ndim==2:
        if x.shape[1]==1:
            return x.flatten()
        else:
            if x.shape[1]==2:
                return ( (x[:,1] / 2) + (x[:,0] / 2) )
            else:
                return -1

In [0]:
# print('as_mono:',stereo2mono(x_lln))

# print('as_stereo:', new_x_AudioFile('/content/Hearthings_vaw_trim.wav'))

In [0]:
# Fetch a single <1MB file using the raw GitHub URL.
!curl --remote-name \
     -H 'Accept: application/vnd.github.v3.raw' \
     --location 'https://raw.githubusercontent.com/themidwestcanapps/https-themidwestcanapps.github.io/master/Audio2019-10-21(04_37_08).wav'

In [0]:
readAudioFile('Audio2019-10-21(04_37_08).wav')

In [0]:

# import wave
# w = wave.open('Audio2019-10-21(04_37_08).wav', 'r')
# for i in range(1):
#     frame = w.readframes(1)



In [0]:
import wave
w = wave.open('Audio2019-10-21(04_37_08).wav', 'r')
for i in range(w.getnframes()):
    ### read 1 frame and the position will updated ###
    frame = w.readframes(0)
    
    all_zero = True
    for j in range(len(frame)):
        # check if amplitude is greater than 0
        if ord(frame[j]) > 0:
            all_zero = False
            break

    if all_zero:
        # perform your cut here
        print('silence found at frame %s' % w.tell())
        print('silence found at second %s' % (w.tell()/w.getframerate()))

In [0]:
print(len(frame))

In [0]:
frame