# Speech Prominence Detection
Speech Prominence Detection is the process of identifying the most important or prominent parts of speech in an audio signal. Prominence refers to the degree of emphasis or attention that a particular word or phrase receives in spoken language, which is often conveyed through variations in pitch, loudness, and timing. Speech Prominence Detection is an essential task in speech processing and natural language understanding, with a wide range of applications including speech recognition, sentiment analysis, and language translation. The goal of this project is to develop a machine learning model that can accurately identify the prominent parts of speech in a given audio signal. This project will involve feature extraction, model training, and evaluation, with the aim of achieving high accuracy and generalizability on a diverse range of speech datasets. The results of this project could have significant implications for improving speech recognition and understanding systems in real-world applications.

In [2]:
import numpy as np
import wave
import struct
import math
from scipy.signal import butter, lfilter

In [3]:
#Auxilliary functions for feature computation
def spectral_selection(x, n):                              #out of the 19 subband energies computed, this function selects the n energies with the highest values
    y = x.shape
    row = y[0]
    col = y[1]
    xx = []
    for i in range(0,col,1):
        v = x[:,i]                                           # his line selects the i-th column of x and assigns it to the variable v
        v = np.array([v])
        v = v.T                 
        t = np.array(np.arange(1,row+1)).reshape(-1,1)  #This line generates a column vector t containing values from 1 to row, representing the row numbers.

        v = np.hstack((v, t))                      #This line horizontally stacks v and t, resulting in a matrix where the first column contains the values of v and the second column contains the row numbers.
        v_sort = v[v[:,0].argsort(),]                 #This line sorts v based on the values in the first column, resulting in v_sort.
        v_sort_sel = v_sort[row-n:row, :]             #This line selects the last n rows from v_sort and assigns the result to v_sort_sel.
        vv = v_sort_sel[v_sort_sel[:,1].argsort(),]   #This line sorts v_sort_sel based on the values in the second column, resulting in vv
        #tt = numpy.array([vv[:,0]])                  #The subsequent code block handles the concatenation of vv[:, 0] (the first column of vv) with the previous iterations' results stored in xx
        if i!=0:
            if i==1:
                pp = np.array([xx])
                pp = pp.T
            else:
                pp = xx
            pp2 = np.array([vv[:,0]])
            pp2 = pp2.T
            xx = np.hstack((pp, pp2))
        else:
            xx = np.concatenate((xx, vv[:,0]))
    return xx                                         # xx is a matrix containing the selected energies for all the frames with size n x col


def temp_vec_corr(x2, t_sigma):
    from scipy.stats import norm
    y = x2.shape
    row = y[0]
    col = y[1]
    wn = norm.pdf(np.arange(1,col+1,1), (col+1)/2, t_sigma)              # pdf function with mean = (col+1)/2 and std = t_sigma as window
    # NOTE: if we use continue to manipulate the variable x2, (the function argument), then it gets reflected back in
    # in the parent function. (No idea why). So create a copy of x2 and work with that.
    x3 = np.zeros((row,col))
    for i in range(0,row,1):
        x3[i,:] = np.multiply(x2[i,:],wn)                                # windowing the frame energies
    s=0
    for i in range(0,col-1,1):
        for j in range(i+1,col,1):
            s+= np.multiply(x3[:,i], x3[:,j])                             # computing the correlation between the consecutive frames 
    if col!=1:
        s = np.sqrt(np.divide(s, (col-1)*col/2))
    else:
        s = x3                                                             
    return s

def temporal_corr(x, win, t_sigma):
    hwin = (win-1)/2           # hwin is the half window size
    yy = x.shape
    row = yy[0]
    col = yy[1]

    row = int(row)
    hwin = int(hwin)

    x = np.array([np.concatenate((np.zeros((row,hwin)), x, np.zeros((row, hwin))), axis = 1)])              # zero padding the input matrix, hwin zeros on each sides of the columns
    y = []
    for i in range(hwin,col+hwin,1):
        temp2 = x[0,:,i-hwin:i+hwin+1]
        z = temp_vec_corr(temp2, t_sigma)
        z = np.array([z]).T
        if i==hwin:
            y = np.concatenate((y, z[:,0]))
        else:
            if i==hwin+1:
                y = np.array([y]).T
            y = np.hstack((y, z))
    return y

def spectral_corr(x):
    yy = x.shape
    row = yy[0]
    col = yy[1]

    s = np.zeros((1, col))
    for i in range(0, row-1, 1):
        for j in range(i+1, row, 1):
            s = s+np.multiply(x[i,:], x[j,:])

    if row!=1:
        s = np.sqrt(np.divide(s, (row*(row-1)/2)))
    else:
        s = x
    return s

def statFunctions_Syl(t):
    from scipy.stats.mstats import gmean
    if np.min(t)<0:
        t = np.subtract(t,min(t[0]))
        #out = []
        #return out
    out = np.array([np.median(t[0]), np.mean(t[0]), gmean(np.absolute(t[0])), np.max(t[0])-np.min(t[0]), np.std(t[0])])
    out = np.array([out]).T
    t = np.subtract(t,np.min(t[0]))
    t = np.divide(t, np.sum(t[0]))
    tempArr = np.array([np.arange(1,len(t[0])+1)])
    temporalMean = np.sum(np.multiply(tempArr,t)[0])
    temporalStd = np.sqrt(np.sum(np.multiply(np.power(np.subtract(np.array([np.arange(1,len(t[0])+1)]),temporalMean),2),t[0])))
    temporalSkewness = np.sum(np.divide(np.multiply(np.power(np.subtract(np.array([np.arange(1,len(t[0])+1)]),temporalMean),3),t[0]),np.power(temporalStd,3)))
    temporalKurthosis = np.sum(np.divide(np.multiply(np.power(np.subtract(np.array([np.arange(1,len(t[0])+1)]),temporalMean),4),t[0]),np.power(temporalStd,4)))
    arr1 = np.array([np.array([temporalStd, temporalSkewness, temporalKurthosis])]).T
    out = np.vstack((out,arr1))
    return out

def statFunctions_Vwl(t):
    if np.min(t)<0:
        t = np.subtract(t,min(t[0]))
        #out = []
        #eturn out
    out = np.array([np.median(t[0]), np.mean(t[0]), np.max(t[0])-np.min(t[0]), np.std(t[0])])
    out = np.array([out]).T
    t = np.subtract(t,np.min(t[0]))
    t = np.divide(t, np.sum(t[0]))
    tempArr = np.array([np.arange(1,len(t[0])+1)])
    temporalMean = np.sum(np.multiply(tempArr,t)[0])
    temporalStd = np.sqrt(np.sum(np.multiply(np.power(np.subtract(np.array([np.arange(1,len(t[0])+1)]),temporalMean),2),t[0])))
    temporalSkewness = np.sum(np.divide(np.multiply(np.power(np.subtract(np.array([np.arange(1,len(t[0])+1)]),temporalMean),3),t[0]),np.power(temporalStd,3)))
    temporalKurthosis = np.sum(np.divide(np.multiply(np.power(np.subtract(np.array([np.arange(1,len(t[0])+1)]),temporalMean),4),t[0]),np.power(temporalStd,4)))
    arr1 = np.array([np.array([temporalStd, temporalSkewness, temporalKurthosis])]).T
    out = np.vstack((out,arr1))
    return out

def smooth(t_cor, swin, sigma):
    from scipy.stats import norm
    ft = norm.pdf(np.arange(1,swin+1), (swin+1)/2, sigma)
    ft = np.array([ft])
    t_cor = np.array([t_cor])
    convRes = np.zeros((1, t_cor.shape[2]+ft.shape[1]-1))
    convRes = np.convolve(t_cor[0,0,:], ft[0,:])
    y = convRes[np.arange((swin+1)//2-1, len(convRes)-(swin-1)//2, 1)]
    return y

def get_labels(lab_list,fa,fileName):
        L=[]; fb=fa; filenm=[];
        
        for num in range(0,len(lab_list)):
            if str((lab_list[num][0].tolist())[0]) == str('P'):
                L.append(1)
                filenm.append(fileName)           
            else:
                L.append(0)
                filenm.append(fileName)
        fb = np.vstack((fa,L))
#        fb = np.vstack((fb,np.asarray(filenm,object)))
        return fb,filenm

def get_labels_seq2seq(lab_list):
        L=[];# filenm=[];
        
        for num in range(0,len(lab_list)):
            if str((lab_list[num][0].tolist())[0]) == str('P'):
                L.append(1)
#                filenm.append(fileName)           
            else:
                L.append(0)
                #filenm.append(fileName)
        #fb = np.vstack((fa,L))
#        fb = np.vstack((fb,np.asarray(filenm,object)))
        return L
    
def vocoder_func(wavPath):

    # FILTER DEFINITIONS

    def butter_bandpass(lowcut, highcut, fs, order):
        nyq = 0.5*fs
        low = float(lowcut) / nyq
        high = float(highcut) / nyq
        b, a = butter(order, [low, high], btype='band')
        return b, a

    def butter_lowpass(lowcut, fs, order):
        nyq = 0.5*fs
        low = float(lowcut) / nyq
        b ,a = butter(order, low, btype='lowpass')
        return b, a

    def butter_bandpass_filter(data, lowcut, highcut, fs, order):
        b, a = butter_bandpass(lowcut, highcut, fs, order=order)
        y = lfilter(b, a, data)
        return y

    def butter_lowpass_filter(data, lowcut, fs, order):
        b, a = butter_lowpass(lowcut, fs, order=order)
        y = lfilter(b, a, data)
        return y

    # FUNCTION TO READ A .wav FILE MATLAB STYLE

    def readWav(wavPath):
        waveFile = wave.open(wavPath)
        fs = waveFile.getframerate()
        length = waveFile.getnframes()
        data = []
        for i in range(0, length):
            waveData = waveFile.readframes(1)
            data.append(struct.unpack("<h", waveData))
        waveFile.close()
        data = np.array([data])
        data = data.astype(float)/np.max(np.abs(data))
        data = data[0]
        return data, fs, length

    # BUFFER FUNCTION AS DEFINED IN MATLAB

    def buffer(x, n, p=0, opt=None):
        import numpy
        if p >= n:
            raise ValueError('p ({}) must be less than n ({}).'.format(p,n))
        cols = int(numpy.ceil(len(x)/float(n-p)))+1
        if opt == 'nodelay':
            cols += 1
        elif opt != None:
            raise SystemError('Only `None` (default initial condition) and '
                              '`nodelay` (skip initial condition) have been '
                              'implemented')
        b = numpy.zeros((n, cols))
        j = 0
        for i in range(cols):
            if i == 0 and opt == 'nodelay':
                b[0:n,i] = x[0:n]
                continue
            elif i != 0 and p != 0:
                b[:p, i] = b[-p:, i-1]
            else:
                b[:p, i] = 0
            k = j + n - p
            n_end = p+len(x[j:k])
            b[p:n_end,i] = x[j:k,0]
            j = k
        return b

    fltcF= np.array([240,360,480,600,720,840,1000,1150,1300,1450,1600,1800,2000,2200,2400,2700,3000,3300,3750])
    fltBW= np.array([120,120,120,120,120,120,150,150,150,150,150,200,200,200,200,300,300,300,500])

    fltFc= np.array([np.subtract(fltcF,np.divide(fltBW,2)),np.add(fltcF,np.divide(fltBW,2))])
    fltLpFc= 50

    sig, Fs, length = readWav(wavPath)
    # print("sig.shape: ", sig.shape)
    # print("Fs: ", Fs)

    # Saving the audio in a txt file
    xx = np.append(Fs,sig)                       # sig is the amplitude of the audio signal

    nWndw = int(round(Fs*0.02))
    # print("nWndw is: ", nWndw)

    nOverlap = int(round(Fs*0.01))
    # print("nOverlap is: ", nOverlap)

    sig = 0.99*sig/max(abs(sig))                 # Normalizing the signal
    
    # Windowing first and filtering next
    sigFrames= buffer(sig*32768,nWndw,nOverlap)          # sig Frames is a 2D array where each column represents an analysis frame 
    subBandEnergies= np.zeros([19,sigFrames.shape[1]])   # 2D array with 19 rows and number of columns equal to number of frames

    for j in range(0,sigFrames.shape[1]): 
        currFrame = np.array([sigFrames[:,j]])                  # 1D array with the selected frame
        for i in range(0,fltFc.shape[1]):
            fltFrame = butter_bandpass_filter(currFrame[0], fltFc[0][i], fltFc[1][i], Fs, 2); fltFrame = fltFrame.T  # this line applies the bandpass filter to the current frame with fltFc[0][i] as lowcut and fltFc[1][i] as highcut
            rectFrame = np.abs(fltFrame[0:nWndw])
            lpFltFrame = butter_lowpass_filter(rectFrame, float(fltLpFc), Fs, 2)
            currEnergy = lpFltFrame[nWndw-1]
            if currEnergy < 1:
                currEnergy = 0.5
            subBandEnergies[i,j] = math.exp(2*math.log(currEnergy)/math.log(10))
    subBandEnergies = np.concatenate((np.exp(0.5*np.ones((19,1))),subBandEnergies[:,0:-2]),axis=1).T

    return Fs, subBandEnergies, xx

# so you have the signal in frames. Each column is a frame and the frames and are obtained by windowing the original signal and the frames have a 50% overlap.
# you calculate the subbandenergies frame by frame 
# for each frame you bandpass filter it through the 19 subbandenergies and take log of the last value of the filtered signal as the sub-band energy value.
# you concatenate a 0.5 to the first column of the subbandenergies matrix and remove the last two columns of the subbandenergies matrix.

In [4]:
fltcF= np.array([240,360,480,600,720,840,1000,1150,1300,1450,1600,1800,2000,2200,2400,2700,3000,3300,3750])
fltBW= np.array([120,120,120,120,120,120,150,150,150,150,150,200,200,200,200,300,300,300,500])

fltFc= np.array([np.subtract(fltcF,np.divide(fltBW,2)),np.add(fltcF,np.divide(fltBW,2))])
fltLpFc= 50

print(fltFc)



[[ 180.  300.  420.  540.  660.  780.  925. 1075. 1225. 1375. 1525. 1700.
  1900. 2100. 2300. 2550. 2850. 3150. 3500.]
 [ 300.  420.  540.  660.  780.  900. 1075. 1225. 1375. 1525. 1675. 1900.
  2100. 2300. 2500. 2850. 3150. 3450. 4000.]]


In [5]:
# imports
import numpy as np
import pandas as pd
import re
import os
import collections
import scipy
from scipy.signal import medfilt
from IPython.display import clear_output

import warnings
warnings.filterwarnings('ignore')

In [6]:
data_dir = "../data/"
ger_test_dir = os.path.join(data_dir, "GER/test/")
ger_train_dir = os.path.join(data_dir, "GER/train/")
# ita_test_dir = os.path.join(data_dir, "ITA/test/")
# ita_train_dir = os.path.join(data_dir, "ITA/train/")

phn_dir = os.path.join(data_dir, "fisher-2000_FA_GT_ESTphnTrans_estStress/lab/txt/phn/")
dict_name = "nativeEnglishDict_gt100_manoj.syl"
stressLabelspath = data_dir + "FA_htkCorrectedLabWithFullAudio" + "/lab/mat/sylStress/"

In [7]:
# check if the directories exist
if not os.path.exists(data_dir):
    print("Data directory does not exist")
if not os.path.exists(ger_train_dir):
    print("German Train directory does not exist")
# if not os.path.exists(ita_train_dir):
#     print("Italian Train directory does not exist")
if not os.path.exists(phn_dir):
    print("Phoneme directory does not exist")

In [8]:
ger_test_files = os.listdir(ger_test_dir)
ger_train_files = os.listdir(ger_train_dir)
# ita_test_files = os.listdir(ita_test_dir)
# ita_train_files = os.listdir(ita_train_dir)

In [9]:
# Compute features
twin = 5
t_sigma = 1.4
swin = 7
s_sigma = 1.5
mwin = 13
max_threshold = 25

vwlSB_num = 4
vowelSB = [1, 2, 4, 5, 6, 7, 8, 13, 14, 15, 16, 17]
sylSB_num = 5
sylSB = [1, 2, 3, 4, 5, 6, 13, 14, 15, 16, 17, 18]

startWordFrame_all = []
spurtStartFrame_all = []
spurtEndFrame_all = []
vowelStartFrame_all = []
vowelEndFrame_all = []
eng_full_all = []
spurtStress_all = []

In [10]:
def get_data_array(phn_file):
    data_array = []
    try:
        fid = open(phn_file, 'r')
        data_array = np.loadtxt(fid, dtype={'names': ('a', 'b', 'c'), 'formats': ('f4', 'f4', 'S16')})
        fid.close
    except:
        print('File does not exist')
        return

    ghastly = []
    for i in range(len(data_array)):
        tuple_list = list(data_array[i])
        tuple_list[2] = tuple_list[2].decode()
        ghastly.append((tuple_list[0], tuple_list[1], tuple_list[2]))
    return np.array(ghastly)

In [11]:
def get_phone_data(data_array):
    phnTimes1 = [row[0] for row in data_array]
    phnTimes1 = np.array([phnTimes1]).T

    phnTimes2 = [row[1] for row in data_array]
    phnTimes2 = np.array([phnTimes2]).T

    phnTimes = np.hstack((phnTimes1, phnTimes2))
    phones = [row[2] for row in data_array]
    phones = np.array([phones])

    # Made them lowercase since the syl dictionary is in lowercase
    for i in range(0, len(phones[0])):
        phones[0][i] = phones[0][i].lower()
        
    origPhones = phones
    index = np.argwhere(origPhones[0] == 'sil')
    phones = phones[phones != 'sil']
    phones = np.array([phones])
    phones = phones.reshape(1, -1)

    phnTimes2 = np.delete(phnTimes2, index, axis=0)
    phnTimes = np.delete(phnTimes, index, axis=0)
    
    return phones, phnTimes

In [12]:
# Getting vowel data
def get_vowel_data(data_array):
    # VOWEL LIST
    vowelList = ['aa', 'ae', 'ah', 'ao', 'aw', 'ay', 'eh', 'er', 'ey', 'ih', 'iy', 'ow', 'oy', 'uh', 'uw']
    vowel_start_time = []
    vowel_end_time = []
    vowel = []

    for i in range(0, len(data_array)):
        if data_array[i][2].lower() in vowelList:
            vowel_start_time.append(data_array[i][0])
            vowel_end_time.append(data_array[i][1])
            vowel.append(data_array[i][2].lower())
            
    vowel_start_time = np.array([vowel_start_time])
    vowel_end_time = np.array([vowel_end_time])
    vowel = np.array([vowel])
    return vowel, vowel_start_time, vowel_end_time

In [13]:
def get_words(file_name):
    # Define the path to the transcript file
    trans_path = data_dir + "ISLEtrans.txt"

    # Read the contents of the transcript file
    with open(trans_path, 'r') as trans_file:
        trans_contents = trans_file.read()

    # Extract the lines containing the specified filename from the transcript
    lines = [line for line in trans_contents.split('\n') if file_name in line]

    # Extract the words from the lines and clean them up
    words = []
    for line in lines:
        _, word_list = line.split(' ', 1)
        words.extend(re.findall(r'\b\w+\b', word_list))
    words = [word.lower() for word in words]
    return words

In [14]:
def get_word_syls(words):
    d = collections.defaultdict(list)
    with open(data_dir + dict_name, 'r') as f:
        for line in f:
            key = line.split()[0]
            val = line.split('=')[1].strip()
            d[key].append(val)

    word_syls = []
    for i in range(len(words)):
        curr_word_syls = []
        if words[i] in d:
            curr_word_syls = d[words[i]]
        word_syls.append(curr_word_syls)
    return word_syls

In [15]:
def get_path_indices(words, word_syls, phones):
    newSuccessInds_all = []
    newSuccessInds_all2 = []

    prevSuccessInds_all = []
    prevSuccessInds_all.append(0)

    # I said white not bait
    for iterWord in range(0, len(words)):
        currWordSyls = word_syls[iterWord]   #
        countSuccess = 1

        for iterPrev in range(0, len(prevSuccessInds_all)):
            prevWordSyls = ""
            if prevSuccessInds_all[iterPrev] == 0:
                currPrevSylInds = []
            else:
                currPrevSylInds = prevSuccessInds_all[iterPrev]
                for iterPrevSyls in range(0, len(currPrevSylInds)):
                    temp = word_syls[iterPrevSyls]
                    prevWordSyls = prevWordSyls + \
                        temp[currPrevSylInds[iterPrevSyls]]+" "

            # iterating through the syllables of the current word
            for iterCurr in range(0, len(currWordSyls)):
                currTestWordSyls = prevWordSyls + currWordSyls[iterCurr]
                temp2 = currTestWordSyls.replace(' . ', ' ')
                
                
                inds = [m.start() for m in re.finditer(' ', temp2)]
                if len(inds) == 0:
                    inds = [len(temp2)]

                count = 1
                temp = []

                for iterTemp in range(len(inds)):
                    if iterTemp == 0:
                        temp1 = temp2[0:inds[iterTemp]]
                        # print(temp2 + "\t\t| " + temp1)
                    else:
                        temp1 = temp2[inds[iterTemp-1]+1:inds[iterTemp]]
                    if not ((np.unique(temp1) == ' ').any() or (len(temp1) == 0)):
                        temp.append(temp1)
                        count += 1
                        
                if iterTemp == len(inds) - 1 and len(inds) < len(currTestWordSyls):
                    temp1 = temp2[inds[iterTemp]+1:len(temp2)]
                    if not ((len(temp1) == 0) or (np.unique(temp1) == ' ').any()):
                        temp.append(temp1)
                        count = count+1

                if iterWord + 1 == len(words):
                    currPhones = phones[0, 0:len(phones[0])]
                else:
                    currPhones = phones[0][0:len(temp)]

                    
                flag = 1
                for iterFlag in range(0, len(currPhones), 1):
                    if len(currPhones) != len(temp):
                        flag = 0
                    else:
                        if currPhones[iterFlag] != temp[iterFlag]:
                            flag = 0
                if flag == 1:
                    if not currPrevSylInds == []:
                        for i in range(0, len(currPrevSylInds)):
                            #                            print('line 122::::::yes')
                            newSuccessInds_all.append(currPrevSylInds[i])
                    newSuccessInds_all.append(iterCurr)
                    newSuccessInds_all2.append(newSuccessInds_all)
                    newSuccessInds_all = []
                    countSuccess = countSuccess+1
                    
        prevSuccessInds_all = newSuccessInds_all2
        newSuccessInds_all2 = []
    if len(prevSuccessInds_all) == 0:
        return None, None
    return prevSuccessInds_all[0], currTestWordSyls

In [16]:
def get_syls_count(path_indices, currTestWordSyls, words, word_syls):
    sylCount = 1
    phnCount = 1
    spurtSyl = []  # spurtSylTimes= np.zeros((len(phnTimes),2))

    syls_word = np.zeros((1, len(path_indices)))
    spurtWordTimes = np.zeros((len(path_indices), 2))                 # dont need this in this function 

    for iterPath in range(0, len(path_indices)):
        # current word and syllables
        currWord = words[iterPath]                                    # current word   
        currWordSyls = word_syls[iterPath]                            # current word syllables

        currSyl = currWordSyls[path_indices[iterPath]]                # current syllable
        currSyl = currSyl.replace(' . ', '.')                         # replace ' . ' with '.'
        # print(currSyl)
        inds = [m.start() for m in re.finditer('\.', currSyl)]        # indices of periods in the syllable 

        if len(inds) == 0:                                            # if there are no periods in the syllable
            inds = [len(currSyl)]                                     # then the last index is the length of the syllable

        count = 0
        for iterTemp in range(0, len(inds)):                          # iterating through the indices of the periods in the syllable
            if iterTemp == 0:
                temp1 = currSyl[0:inds[iterTemp]]                     # temp1 is the substring from the beginning of the syllable to the first period
            else:
                temp1 = currSyl[inds[iterTemp-1]+1:inds[iterTemp]]    # temp1 is the substring from the previous period to the current period
            if not (temp1 == ' ' or len(temp1) == 0):                 # if temp1 is not a space or empty
                spurtSyl.append(temp1)                                # append temp1 to spurtSyl
                sylCount = sylCount + 1                               # increment sylCount
                count = count + 1                                     
                
        if iterTemp is len(inds)-1 and len(inds) < len(currTestWordSyls):   
            temp1 = currSyl[inds[iterTemp]+1:len(currSyl)]            # temp1 is the substring from the last period to the end of the syllable
            if not (temp1 == ' ' or len(temp1) == 0):                 # if temp1 is not a space or empty
                spurtSyl.append(temp1)                                # append temp1 to spurtSyl
                sylCount = sylCount + 1                               # increment sylCount
                count = count + 1                                     # increment count
        syls_word[0][iterPath] = count                                # syls_word is a 1D array with the number of syllables in each word

    return syls_word, spurtSyl, spurtWordTimes

In [17]:
def get_spurts(spurtSyl, currTestWordSyls, phnTimes):
    phnCount = 1
    spurtSylTimes = np.zeros((len(spurtSyl), 2))

    for iterSyl in range(0, len(spurtSyl)):                               # iterating through the syllables in the spurt
        temp2 = spurtSyl[iterSyl]                                         # temp2 is the current syllable
        inds = [m.start() for m in re.finditer(' ', temp2)]               # indices of spaces in the syllable
        if len(inds) == 0:                                                # if there are no spaces in the syllable
            inds = [len(temp2)]                                           # then the last index is the length of the syllable
        count = 1                                                         # count is the number of phonemes in the syllable
        temp = []                                                         # temp is a list of phonemes in the syllable
        for iterTemp in range(0, len(inds)):                              # iterating through the indices of the spaces in the syllable
            if iterTemp == 0:                                             # if it is the first index
                temp1 = temp2[0:inds[iterTemp]]                           # temp1 is the substring from the beginning of the syllable to the first space
            else:                                                         # if it is not the first index
                temp1 = temp2[inds[iterTemp-1]+1:inds[iterTemp]]          # temp1 is the substring from the previous space to the current space
            if not (temp1 == ' ' or len(temp1) == 0):                     # if temp1 is not a space or empty
                temp.append(temp1)                                        # append temp1 to temp
                count = count+1                                           # increment count
        if iterTemp == len(inds)-1 and len(inds) < len(currTestWordSyls): # if it is the last index and there are less spaces than syllables
            temp1 = temp2[inds[iterTemp]+1:len(temp2)]                    # temp1 is the substring from the last space to the end of the syllable
            if not (temp1 == ' ' or len(temp1) == 0):                     # if temp1 is not a space or empty
                temp.append(temp1)                                        # append temp1 to temp
                count = count+1                                           # increment count

        nPhns_syl = len(temp)                                             # nPhns_syl is the number of phonemes in the syllable
        spurtSylTimes[iterSyl, 0] = phnTimes[phnCount-1, 0]               # the start time of the syllable is the start time of the first phoneme in the syllable
        phnCount = phnCount + nPhns_syl                                   # increment phnCount by the number of phonemes in the syllable
        spurtSylTimes[iterSyl, 1] = phnTimes[phnCount-1-1, 1]             # the end time of the syllable is the end time of the last phoneme in the syllable
    return spurtSylTimes

In [18]:
def get_spurt_word_times(path_indices, syls_word, spurtSylTimes):
    spurtWordTimes = np.zeros((len(path_indices), 2))
    sylIdx = 1

    for iterWordTimes in range(0, len(syls_word[0])):                        # iterating through the number of syllables in each word
        spurtWordTimes[iterWordTimes][0] = spurtSylTimes[sylIdx-1][0]        # spurtWordTimes is a 2D array with the start and end times of each word 
        sylIdx = sylIdx + syls_word[0][iterWordTimes].astype(int)            # sylIdx is the index of the last syllable of the current word
        spurtWordTimes[iterWordTimes][1] = spurtSylTimes[sylIdx-1-1][1]      # sylIdx-1-1 is the index of the last phoneme of the last syllable of the current word
    length_spurtWordTimes = iterWordTimes + 1
    return spurtWordTimes

In [19]:
def process_word_boundaries(spurtWordTimes, words, spurtSylTimes):
    # Processing word boundary file
    # FILE READ DELETED HERE
    a = spurtWordTimes
    b = words
    if (len(a) is not len(b)):
        print("error")
    wordData = np.hstack((a, np.array([b], dtype='S32').T))
    # print(wordData)

    # Extract first coloumn of wordData
    startWordTime = [row[0] for row in wordData]
    endWordTime = [row[1] for row in wordData]

    startWordFrame = np.round((np.subtract(np.array(startWordTime, dtype='float'), spurtSylTimes[0][0].astype(float))*100))
    endWordFrame = np.round((np.subtract(np.array(endWordTime, dtype='float'), spurtSylTimes[0][0].astype(float))*100) + 1)
    startWordFrame = np.append(startWordFrame, endWordFrame[-1])

    return startWordFrame, endWordFrame

In [20]:
def get_sylTCSSBC(sylSB, eng_full, sylSB_num, twin, t_sigma, swin, s_sigma, spurtStartTime, vowelStartTime, startWordFrame):
    # TCSSBC computation
    # vowelStartTime = np.squeeze(vowelStartTime)        #NOTE you changed this, not in original code 
    startWordFrame = np.squeeze(startWordFrame)        #NOTE you changed this, not in original code 

    if len(sylSB) > sylSB_num:
        eng = spectral_selection(
            eng_full[np.subtract(sylSB, 1), :], sylSB_num)
    else:
        eng = eng_full[sylSB, :]               # extract only the sub-band energies that are in the sylSB list

    # print("eng.shape: ", eng.shape)

    t_cor = temporal_corr(eng, twin, t_sigma)             # calculate correlation spectrally and temporally 
    # print("t_cor.shape: ", t_cor.shape)
    # print("t_cor")
    # print(t_cor)

    s_cor = spectral_corr(t_cor)
    # print("s_cor.shape: ", s_cor.shape)
    # print("s_cor")
    # print(s_cor)

    sylTCSSBC = smooth(s_cor, swin, s_sigma)                # smooth the correlation
    # print(sylTCSSBC)
    sylTCSSBC = np.array([sylTCSSBC])                       
    # print("sylTCSSBC.shape: ", sylTCSSBC.shape)
    # print("sylTCSSBC")
    # print(sylTCSSBC)

    start_idx = np.round(spurtStartTime[0]*100).astype(int)             # get the start index of the spurt
    # print("start_idx: ", start_idx)

    sylTCSSBC = np.array([sylTCSSBC[0][start_idx:-1]])                  # clip the TCSSBC contour from the spurt start
    # print("sylTCSSBC.shape: ", sylTCSSBC.shape)

    sylTCSSBC = np.divide(sylTCSSBC, max(sylTCSSBC[0]))                 # normalize the TCSSBC contour

    if len(vowelSB) > vwlSB_num:                                       
        eng = spectral_selection(eng_full[np.subtract(vowelSB, 1), :], vwlSB_num)
    else:
        eng = eng_full[vowelSB, :]                                       # extract only the sub-band energies that are in the vowelSB list

    t_cor = temporal_corr(eng, twin, t_sigma)
    s_cor = spectral_corr(t_cor)                     
    vwlTCSSBC = smooth(s_cor, swin, s_sigma)      

    vwlTCSSBC = np.array([vwlTCSSBC])

    # Modify TCSSBC contour by clipping from the vowel start
    start_idx = np.round(vowelStartTime[0][0]*100).astype(int)         # get the start index of the vowel
    vwlTCSSBC = np.array([vwlTCSSBC[0][start_idx:-1]])                 # clip the TCSSBC contour from the vowel start

    vwlTCSSBC = np.divide(vwlTCSSBC, max(vwlTCSSBC[0]))                 # normalize the TCSSBC contour
    # print("vwlTCSSBC.shape: ", vwlTCSSBC.shape)

    # Compute silence statistics
    # Preprocessing of the data
    # print("sylTCSSBC", sylTCSSBC)
    # print("vwlTCSSBC", vwlTCSSBC)
    # print(sylTCSSBC.shape)
    # print(vwlTCSSBC.shape)

     
    word_duration = np.zeros((1, len(startWordFrame) - 1))
    # print(startWordFrame.shape)
    # print(startWordFrame)
    # print(len(startWordFrame) - 1)
    # print("word_duration.shape: ", word_duration.shape)
    word_Sylsum = np.zeros((1, len(startWordFrame) - 1))
    word_Vwlsum = np.zeros((1, len(startWordFrame) - 1))
    
    for j in range(0, len(startWordFrame) - 1):
        temp_start = startWordFrame[j].astype(int)
        temp_end = startWordFrame[j + 1].astype(int) - 1
        # print("temp_start: ", temp_start)
        # print("temp_end: ", temp_end)
        # jhansi
        if (temp_end >= sylTCSSBC.shape[1]):
            temp_end1 = sylTCSSBC.shape[1]-1
            sylTCSSBC[0, np.arange(temp_start, temp_end1)] = medfilt(sylTCSSBC[0, np.arange(temp_start, temp_end1)], 3)
            sylTCSSBC[0, temp_start] = sylTCSSBC[0, temp_start+1]
            sylTCSSBC[0, temp_end1] = sylTCSSBC[0, temp_end1 - 1]                          # median filteringv the TCSSBC contour
            tempArr = sylTCSSBC[0, np.arange(temp_start, temp_end1)]
            word_Sylsum[0, j] = tempArr.sum(axis=0)                                        # calculate the sum of the TCSSBC contour for the word  # need word boundaries for this 
        else:
            sylTCSSBC[0, np.arange(temp_start, temp_end)] = medfilt(
                sylTCSSBC[0, np.arange(temp_start, temp_end)], 3)
            sylTCSSBC[0, temp_start] = sylTCSSBC[0, temp_start+1]
            sylTCSSBC[0, temp_end] = sylTCSSBC[0, temp_end - 1]
            tempArr = sylTCSSBC[0, np.arange(temp_start, temp_end)]
            word_Sylsum[0, j] = tempArr.sum(axis=0)
            
        if (temp_end >= vwlTCSSBC.shape[1]):
            temp_end = vwlTCSSBC.shape[1]-1

        # temp_end = np.min([temp_end,len(vwlTCSSBC)])
        vwlTCSSBC[0, np.arange(temp_start, temp_end)] = medfilt(
            vwlTCSSBC[0, np.arange(temp_start, temp_end)], 3)
        vwlTCSSBC[0, temp_start] = vwlTCSSBC[0, temp_start+1]
        vwlTCSSBC[0, temp_end] = vwlTCSSBC[0, temp_end - 1]

        word_duration[0, j] = temp_end - temp_start + 1                      # calculate the duration of the word in frames ## need word boundaries for this

        tempArr = vwlTCSSBC[0, np.arange(temp_start, temp_end)]
        word_Vwlsum[0, j] = tempArr.sum(axis=0)                              # calculate the sum of the TCSSBC contour for the word  # need word boundaries for this

    sylTCSSBC[np.isnan(sylTCSSBC)] = 0   # Feature vector 1
    vwlTCSSBC[np.isnan(vwlTCSSBC)] = 0   # Feature vector 2
    return sylTCSSBC, vwlTCSSBC, word_Sylsum, word_duration

# you need word duration keep it okay bye 

In [21]:
# def chunk_feature_contour(sylTCSSBC, spurtStartFrame, spurtEndFrame):
#     # Chunking the feature contour
#     sylTCSSBC_chunk = []
#     for i in range(0, len(spurtStartFrame)):
#         sylTCSSBC_chunk.append(sylTCSSBC[int(spurtStartFrame[i]):int(spurtEndFrame[i])])
#     return np.array(sylTCSSBC_chunk)

In [22]:
import librosa

In [23]:
def feature_contour(i, wav_file, test_data):
    file_name = wav_file[:-4]
    # print("file_name", file_name)
    phn_file = phn_dir + file_name + ".txt"
    # print("phn_file", phn_file)
    mat_file = stressLabelspath + file_name + ".mat"
    # print("mat_file", mat_file)

    if not os.path.exists(phn_file):
        print("phn file doesn't exist")
        return None, False
    
    if not os.path.exists(mat_file):
        print("mat file doesn't exist")
        return None, False

    data_array = get_data_array(phn_file)
    print("data array", data_array)
    phones, phn_times = get_phone_data(data_array)
    print("phones", phones)
    print("phn_times", phn_times)
    vowel, vowel_start_time, vowel_end_time = get_vowel_data(data_array)
    print("vowel", vowel)
    print("vowel_start_time", vowel_start_time)
    print("vowel_end_time", vowel_end_time)
    words = get_words(file_name)
    print("words", words)
    word_syls = get_word_syls(words)
    print("word_syls", word_syls)
    path_indices, currTestWordSyls = get_path_indices(words, word_syls, phones)    
    print("path_indices", path_indices) 
    print("currTestWordSyls", currTestWordSyls)

    if path_indices == None:
        return None, False

    syls_word, spurtSyl, spurtWordTimes = get_syls_count(path_indices, currTestWordSyls, words, word_syls)
    print("syls_word", syls_word)
    print("spurtSyl", spurtSyl)
    print("spurtWordTimes", spurtWordTimes)

    spurtSylTimes = get_spurts(spurtSyl, currTestWordSyls, phn_times)
    print("spurtSylTimes", spurtSylTimes)

    syls_word = syls_word.astype('i')
    print("syls_word", syls_word)

    spurtWordTimes = get_spurt_word_times(path_indices, syls_word, spurtSylTimes)
    print("spurtWordTimes", spurtWordTimes)

    # Execute the vocoder [MODIFICATION]: Get the audio file back so that it can be stored in a text file for C code.
    file_dir = ger_test_dir if test_data else ger_train_dir
    Fs, eng_full, xx = vocoder_func(file_dir + wav_file)
    # print("eng_full", eng_full)     # eng_full contains the sub-band energies of the audio file
    # print("xx", xx)                 # xx contains the amplitude of the waveform of the audio file
    eng_full = eng_full.conj().transpose()
    # print("eng_full", eng_full)
    # print(eng_full.shape) 

    startWordFrame, endWordFrame = process_word_boundaries(spurtWordTimes, words, spurtSylTimes)
    # print("startWordFrame", startWordFrame)                         # startWordFrame contains the start frame of each word
    # print("endWordFrame", endWordFrame)                             # endWordFrame contains the end frame of each word
    
    # Processing of stress and syllable boundary file
    spurtSylTime = spurtSylTimes
    spurtStartTime = spurtSylTime[:, 0]
    spurtEndTime = spurtSylTime[:, 1]
    spurtStartFrame = np.round((spurtStartTime - spurtStartTime[0]) * 100)
    # print("spurtStartFrame", spurtStartFrame)
    spurtEndFrame = np.round((spurtEndTime - spurtStartTime[0]) * 100)
    # print("spurtEndFrame", spurtEndFrame)

    # Processing of Vowel boundary file
    vowel_start_time = vowel_start_time.astype(float)
    vowel_end_time = vowel_end_time.astype(float)

    vowelStartFrame = np.round(vowel_start_time*100 - spurtStartTime[0]*100)   # vowelStartFrame contains the start frame of each vowel
    vowelEndFrame = np.round(vowel_end_time*100 - spurtStartTime[0]*100)       # vowelEndFrame contains the end frame of each vowel
    # print("vowelStartFrame", vowelStartFrame)
    # print("vowelEndFrame", vowelEndFrame)

    # print("words = ", words)
    # print("spurt start time = ", spurtStartTime, len(spurtStartTime))
    # print("vowel start time = ", vowel_start_time, len(vowel_start_time[0]))
    # print("start word frame = ", startWordFrame, len(startWordFrame))
    # print("spurt start frame = ", spurtStartFrame, len(spurtStartFrame))
    # print("spurt end frame = ", spurtEndFrame, len(spurtEndFrame))

    # polym = []
    # poly = []
    # for n in syls_word[0]:
    #     if n == 1:
    #         polym.append(False)
    #         poly.append(False)
    #     else:
    #         polym.extend([True] * n)
    #         poly.append(True)
    # num_poly = np.sum(polym)
    # print("num_poly", num_poly)
    # if num_poly == 0:
    #     return None, False
    # print()

    # # eliminate the monosyllabic words using the poly list
    # syls_word = syls_word[0][poly]
    # print("syls_word", syls_word)

    # spurtStartTime = spurtStartTime[polym]
    # print("spurtStartTime", spurtStartTime)

    # vowel_start_time = np.array([vowel_start_time[0][polym]])
    # print("vowel_start_time", vowel_start_time)

    # startWordFrame = startWordFrame[poly.append(True)]
    # print("startWordFrame", startWordFrame)

    # spurtStartFrame = spurtStartFrame[polym]
    # print("spurtStartFrame", spurtStartFrame)

    # spurtEndFrame = spurtEndFrame[polym]
    # print("spurtEndFrame", spurtEndFrame)

    # print()
    # print("words = ", words)
    # print("spurt start time = ", spurtStartTime, len(spurtStartTime))
    # print("vowel start time = ", vowel_start_time, len(vowel_start_time))
    # print("start word frame = ", startWordFrame, len(startWordFrame))
    # print("spurt start frame = ", spurtStartFrame, len(spurtStartFrame))
    # print("spurt end frame = ", spurtEndFrame, len(spurtEndFrame))

    sylTCSSBC, vwlTCSSBC, word_duration, word_Sylsum = get_sylTCSSBC(sylSB, eng_full, sylSB_num, twin, t_sigma, swin, s_sigma, spurtStartTime, vowel_start_time, startWordFrame)         # NOTE
    
    # print("sylTCSSBC", sylTCSSBC)
    # print(sylTCSSBC.shape)
    # print("vwlTCSSBC", vwlTCSSBC)
    # print(vwlTCSSBC.shape)
    # print("word_duration", word_duration)
    # print("word_Sylsum", word_Sylsum)
    
    # sylTCSSBC_chunk = chunk_feature_contour(sylTCSSBC, spurtStartFrame, spurtEndFrame)
  
    tempOut = np.array([[]])
            
    wordIndication = []
    peakVals = []
    avgVals = []
    
    # Generating the features
    for j in range(0, len(spurtSyl), 1):
            inds = (startWordFrame <= spurtStartFrame[j]).nonzero()   # finds the word that the syllable belongs to
            word_ind = inds[0][-1]                           # finds the index of the word that the syllable belongs to
            wordIndication.append(word_ind)                  # stores the index of the word that the syllable belongs to
    #       print([0, np.arange(spurtStartFrame[j], spurtEndFrame[j]-1, 1).astype(int)])
            currFtr1SylSeg = sylTCSSBC[0, np.arange(spurtStartFrame[j], spurtEndFrame[j]-1, 1).astype(int)]  # extracts the syllable segment from the TCSSBC contour
            currFtr1SylSeg = np.array([currFtr1SylSeg])
            temp = np.multiply(currFtr1SylSeg, len(currFtr1SylSeg[0]) / word_duration[0, word_ind])  # normalizes the syllable segment by the duration of the syllable and the duration of the word            # need word duration for this 
            arrResampled = np.array([librosa.resample(temp[0], Fs, Fs*float(30) / len(temp[0]), 'sinc_best')])       ##change   # resamples the syllable segment to 30 frames
            
            F_new = Fs*float(30) / len(temp[0])      ##change   # resampling frequency
                                       
            #To be put in the output file
            peakVals.append(np.amax(arrResampled))
            avgVals.append(np.average(arrResampled))
        
            currSylFtrs = statFunctions_Syl(arrResampled)   # calculates the statistical features of the syllable segment
            arr1 = np.array([np.array([np.sum(currFtr1SylSeg) / word_Sylsum[0, word_ind]])]).T     # calculates ratio of the area under the TCSSBC contour of the syllable segment and the area under the TCSSBC contour for the word
            currSylFtrs = np.vstack((currSylFtrs, arr1))     # appends the sum of the TCSSBC contour for the word to the statistical features of the syllable segment
            #########jhansi
            if (j>= vowelEndFrame.shape[1]):
                break
            if (vowelEndFrame [0,j] >= vwlTCSSBC.shape[1]):
                vowelEndFrame[0,j] = vwlTCSSBC.shape[1]-1
        
            currFtr1VowelSeg = vwlTCSSBC[0, np.arange(vowelStartFrame[0, j], vowelEndFrame[0, j]-1, 1).astype(int)]   # extracts the vowel segment from the TCSSBC contour
            currFtr1VowelSeg = np.array([currFtr1VowelSeg])
            temp = np.multiply(currFtr1VowelSeg, len(currFtr1VowelSeg[0]) / word_duration[0, word_ind])  # normalizes the vowel segment by the duration of the syllable
            if (len(temp[0])==0):
                break
                
            arrResampled = np.array([librosa.resample(temp[0], F_new, F_new*float(20) / len(temp[0]), 'sinc_best')])     ##change  # resamples the vowel segment to 20 frames
            currVowelFtrs = statFunctions_Vwl(arrResampled)      # calculates the statistical features of the vowel segment
            arr1 = np.array([np.array([np.sum(currFtr1VowelSeg) / word_Sylsum[0, word_ind]])]).T        # calculates ratio of the area under the TCSSBC contour of the vowel segment and the area under the TCSSBC contour for the word
            currVowelFtrs = np.vstack((currVowelFtrs, arr1))
            if j == 0:
                tempOut = np.vstack((currSylFtrs, currVowelFtrs, len(currFtr1VowelSeg[0]), len(currFtr1SylSeg[0])))    
            else:
                tempOut = np.hstack((tempOut, np.vstack((currSylFtrs, currVowelFtrs,len(currFtr1VowelSeg[0]), len(currFtr1SylSeg[0])))))                # tempOut columns contain the statistical features of the syllable segment and the vowel segment, the duration of the vowel segment and the duration of the syllable segment

    if (len(temp[0])==0):
            return None, False   ###
    
    sylDurations = spurtEndTime - spurtStartTime
    
    ftrs = tempOut    
    
    wordLabls = np.unique(wordIndication)
    for iterWrd in range(0, len(wordLabls)):
        inds = [i for i, x in enumerate(wordIndication) if x == wordLabls[iterWrd]] #doing argwhere(wordIndication==wordLabls[iterWrd]
        if len(inds)>1 :
            ftrs[-1, inds] = ftrs[-1, inds] / sum(ftrs[-1, inds])
            ftrs[-2, inds] = ftrs[-2, inds] / sum(ftrs[-2, inds])
    end=1
    # print(ftrs.shape)
    fa = ftrs

    print(fa.shape)
    print("fa")
    print(fa)
    
    mat = scipy.io.loadmat(stressLabelspath + file_name + '.mat')
    lab = mat['spurtStress']
    lab_list = lab.tolist()
    # print(lab_list)

    # print(len(lab_list))
    
    if (fa.shape[1] is not len(lab_list)):
        # label_mismatch = label_mismatch+1
#            is_looping = False
        return None, False 
    
    else:
        fb,filenm = get_labels(lab_list,fa,file_name)
        feats = fb #features ,last row:labels
        # print("feats")
        # print(feats)
        # print(feats.shape)
        w=[]#polysyl_feat=[];
        for w_l in range(len(words)):
    #        cou = 0
            w_st = spurtWordTimes[w_l][0]
            w_ed = spurtWordTimes[w_l][1]
            for s_l in range(len(w),len(spurtSyl)):
                sy_st = spurtSylTimes[s_l][0]
                sy_ed = spurtSylTimes[s_l][1]
                if (sy_ed <= w_ed):
                    w.append(w_l+1)
#                        w.append('W'+str(w_l+1))
    #                cou=cou+1
                else:
                    break

        # print("length of w = ",len(w))
        if (len(w)>np.shape(feats)[1]):
            return None, False 
        
        # print(w)
        feats = np.vstack((feats,w))#features ,last row:labels, word labels
        return feats, True
        
        print(feats.shape)
        print("feats")
        print(feats)
        
        # AF_inform = filenm
        # CF_feats,CF_inform = contextFeats(spurtSyl,spurtSylTimes,spurtWordTimes,vowel);  
        # if i == 0:#411 or fileN ==412:
        #     AF = feats
        #     AF_info = AF_inform
        #     CF = CF_feats
        #     CF_info =CF_inform                
        # else:
        #     AF = np.hstack((AF,feats)) 
        #     AF_info = np.hstack((AF_info,AF_inform))
        #     CF = np.hstack((CF,CF_feats))
        #     CF_info = np.hstack((CF_info,CF_inform))
        #     done=done+1
     


    

In [24]:
ger_train_files_subset = ger_train_files[4:5]
all_contours = []
all_labels = []

for i, file in enumerate(ger_train_files_subset):
  

    contours, valid = feature_contour(i, file, False)
    if valid:
        # print(contours.shape)
    # print(contours)
        all_contours.extend(contours)

data array [['0.0' '0.61' 'sil']
 ['0.61' '0.79' 'ay']
 ['0.79' '0.95' 's']
 ['0.95' '1.0' 'eh']
 ['1.0' '1.05' 'd']
 ['1.05' '1.28' 'f']
 ['1.28' '1.43' 'ay']
 ['1.43' '1.55' 't']
 ['1.55' '1.59' 'sil']
 ['1.59' '1.65' 'n']
 ['1.65' '1.68' 'aa']
 ['1.68' '1.74' 't']
 ['1.74' '1.93' 's']
 ['1.93' '2.01' 'eh']
 ['2.01' '2.07' 'n']
 ['2.07' '2.21' 't']
 ['2.21' '2.39' 'er']
 ['2.39' '3.18' 'sil']]
phones [['ay' 's' 'eh' 'd' 'f' 'ay' 't' 'n' 'aa' 't' 's' 'eh' 'n' 't' 'er']]
phn_times [['0.61' '0.79']
 ['0.79' '0.95']
 ['0.95' '1.0']
 ['1.0' '1.05']
 ['1.05' '1.28']
 ['1.28' '1.43']
 ['1.43' '1.55']
 ['1.59' '1.65']
 ['1.65' '1.68']
 ['1.68' '1.74']
 ['1.74' '1.93']
 ['1.93' '2.01']
 ['2.01' '2.07']
 ['2.07' '2.21']
 ['2.21' '2.39']]
vowel [['ay' 'eh' 'ay' 'aa' 'eh' 'er']]
vowel_start_time [['0.61' '0.95' '1.28' '1.65' '1.93' '2.21']]
vowel_end_time [['0.79' '1.0' '1.43' '1.68' '2.01' '2.39']]
words ['i', 'said', 'fight', 'not', 'centre']
word_syls [['aa', 'ae', 'ah', 'ay', 'ay . ah', 'ay 

In [25]:
print(pancho)

NameError: name 'pancho' is not defined

In [None]:
# train data
all_contours = []
# all_labels = []
for i, file in enumerate(ger_train_files):
    contours, success = feature_contour(i, file, False)

    if success:
        all_contours.extend(contours)
        # all_labels.extend(labels)   

    clear_output(wait=True)
    print("Processed: {}/{}".format(i+1, len(ger_train_files)), end="\r")
    print("Progress: {:.2f}%".format((i+1)/len(ger_train_files)*100), end="\r")

data array [['0.0' '0.25' 'sil']
 ['0.25' '0.42' 'ay']
 ['0.42' '0.52' 's']
 ['0.52' '0.56' 'eh']
 ['0.56' '0.59' 'd']
 ['0.59' '0.68' 'hh']
 ['0.68' '0.85' 'w']
 ['0.85' '0.98' 'ay']
 ['0.98' '1.13' 't']
 ['1.13' '1.16' 'sil']
 ['1.16' '1.26' 'n']
 ['1.26' '1.3' 'aa']
 ['1.3' '1.41' 't']
 ['1.41' '1.48' 'sil']
 ['1.48' '1.55' 'b']
 ['1.55' '1.7' 'ey']
 ['1.7' '1.84' 't']
 ['1.84' '2.54' 'sil']]
phones [['ay' 's' 'eh' 'd' 'hh' 'w' 'ay' 't' 'n' 'aa' 't' 'b' 'ey' 't']]
phn_times [['0.25' '0.42']
 ['0.42' '0.52']
 ['0.52' '0.56']
 ['0.56' '0.59']
 ['0.59' '0.68']
 ['0.68' '0.85']
 ['0.85' '0.98']
 ['0.98' '1.13']
 ['1.16' '1.26']
 ['1.26' '1.3']
 ['1.3' '1.41']
 ['1.48' '1.55']
 ['1.55' '1.7']
 ['1.7' '1.84']]
vowel [['ay' 'eh' 'ay' 'aa' 'ey']]
vowel_start_time [['0.25' '0.52' '0.85' '1.26' '1.55']]
vowel_end_time [['0.42' '0.56' '0.98' '1.3' '1.7']]
words ['i', 'said', 'white', 'not', 'bait']
word_syls [['aa', 'ae', 'ah', 'ay', 'ay . ah', 'ay hh', 'eh', 'ey', 'hh ae', 'hh ah', 'hh ay', '

KeyboardInterrupt: 

In [None]:
# save train data as pickle
df = pd.DataFrame({'contour': all_contours})
df.to_pickle('../saved/ger_train.pkl')

In [None]:
# test data
test_chunks = []
# test_labels = []

for i, file in enumerate(ger_test_files):
    chunks, success = feature_contour(i, file, True)
    
    if success:
        test_chunks.extend(chunks)
        # test_labels.extend(labels)

    clear_output(wait=True)
    print("Progress: {:.2f}%".format((i+1)/len(ger_test_files)*100), end="\r")
    print("Processed: {}/{}".format(i+1, len(ger_test_files)), end="\r")

Progress: 100.00%Processed: 1768/1768

In [None]:
# save train data as pickle
df = pd.DataFrame({'contour': test_chunks})
df.to_pickle('../saved/ger_test.pkl')

In [None]:
# change the pkl file to csv
df = pd.read_pickle('data.pkl')
df.to_csv('data.csv')

In [None]:
def dataloader(path,featType):#if featType = 1 ==> acoustic
    data = scipy.io.loadmat(path)
    print(data.keys())
    if featType == 1:
     AF = data['AF']; x = AF[0:-2]; y = AF[-2]; w = AF[-1];
    else:
        AF = data['AF']; x1 = AF[0:-2]; x2 = data['CF']; y = AF[-2]; w = AF[-1];
        x = np.concatenate((x1,x2), axis=0)
    return x.T, y.T, w.T, data['CF_info']


In [None]:
# convert a csv file into a data frame 
df = pd.read_csv('data.csv')
# print(df.head())

# convert a data frame into a numpy array
data = df.to_numpy()
# print(data.shape)


In [None]:
loaded_model = joblib.load('model.pkl')

path = '/content/7.jpg'
name = 7
img = cv2.imread(path) #bgr to rgb
img = cv2.resize(img, (100, 100))
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
features = extraction(img_hsv)

features = np.array(features)
features = features.reshape(1, -1)

soc_level = loaded_model.predict(features)
print("predicted soc level", soc_level)
print("actual value", [y[name-1]])

DATADIR = "/content/drive/MyDrive/kit_2_final/kit_2"

X = [0]*(len(os.listdir(DATADIR)))
rows = 100
cols = 100
prediction = []
for img in os.listdir(DATADIR):
    path = os.path.join(DATADIR,img)
    # print(path)
    name = img.split(".")[0]
    name = int(name)
    # print(name)
    img_array = cv2.imread(path)
    image = cv2.resize(img_array, (rows, cols))
    img_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    features = extraction(img_hsv)
    features = np.array(features)
    features = features.reshape(1, -1)
    soc_level = loaded_model.predict(features)
    # append the soc level to the list prediction 
    prediction.append (soc_level)

X = np.array(X)
y = np.array(y)

# squeeze the prediction list
prediction = np.squeeze(prediction)

# list all the elements in the prediction list that are less than `0.5`

list = []
for i in range(len(prediction)):
    if prediction[i] < 0.5:
        list.append(prediction[i])

# find average of the list
average = sum(list) / len(list)
print(average)

In [None]:
# write code to get the image out of the link 
# https://soilimages.s3.amazonaws.com/b7a28b21-96a9-44f0-be51-47cc98c95265.jpg

from PIL import Image
import requests
from io import BytesIO

url = 'https://soilimages.s3.amazonaws.com/b7a28b21-96a9-44f0-be51-47cc98c95265.jpg'
response = requests.get(url)
img = Image.open(BytesIO(response.content))
img = np.array(img)


# download the image from the link
import urllib.request
urllib.request.urlretrieve('https://soilimages.s3.amazonaws.com/b7a28b21-96a9-44f0-be51-47cc98c95265.jpg', 'image.jpg')

# display the image 
plt.imshow(img)
plt.show()

# crop the image 
crop_img = img[0:100, 0:100]
plt.imshow(crop_img)
plt.show()

import pandas as pd

# Load the CSV file into a DataFrame
df = pd.read_csv('/content/DEVICE DATA3.xlsx')

# load a xlsx file into a DataFrame
df = pd.read_excel('/content/DEVICE DATA3.xlsx')




#   extract the entries in the third column one by one and assign it to url
for i in range(len(new_df)):
    url = new_df[i]
    print(url)

for i in range(len(new_df)):
    print(new_df[i])



In [None]:
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

y_pred = rf_model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error:", mae)
print("R-squared Score:", r2)


# plot an roc curve for the model

from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
import random

# generate a random prediction

y_pred = [random.randint(0, 1) for _ in range(100)]
y_test = [random.randint(0, 1) for _ in range(100)]

# calculate the fpr and tpr for all thresholds of the classification
fpr, tpr, threshold = roc_curve(y_test, y_pred)
roc_auc = auc(fpr, tpr)

# plot the roc curve
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' %roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1], 'r--')
plt.xlim([-0.1, 1.0])
plt.ylim([-0.1, 1.0])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()



In [None]:
from sklearn.metrics import roc_curve, auc
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Calculate absolute differences between y_true and y_pred
differences = abs(y_test - y_pred)

# Convert regression problem to binary classification problem
y_true_binary = (differences <= 0.16]).astype(int)

y_ = 
# Compute ROC curve and ROC area
fpr, tpr, _ = roc_curve(y_true_binary, differences)
roc_auc = auc(fpr, tpr)

# Plot ROC curve
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

y_ = np.ones(len(y_true_binary))

In [None]:
# assign all the values in y less than 1 to 1 and all the values greater than 1 to 0
y_[y_true_binary < 1] = 0
