In [1]:
import parselmouth

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import math

In [2]:
def fast_scandir(dirname):
    subfolders= [f.path for f in os.scandir(dirname) if f.is_dir()]
    for dirname in list(subfolders):
        subfolders.extend(fast_scandir(dirname))
    return subfolders

In [3]:
directory1='mini_speech_commands'
directory2=fast_scandir(directory1)
directory2

['mini_speech_commands/right',
 'mini_speech_commands/go',
 'mini_speech_commands/no',
 'mini_speech_commands/left',
 'mini_speech_commands/stop',
 'mini_speech_commands/up',
 'mini_speech_commands/down',
 'mini_speech_commands/yes']

In [4]:
dataset_table = []

In [None]:
dataset_table = []
#### iterate files in the folder
for d2 in directory2:
    label = str(d2).replace(directory1+'/','')
    #print(label)
    for filename in os.listdir(d2):
        f = os.path.join(d2, filename)
        # checking if it is a file we need
        if os.path.isfile(f) and not f.startswith(directory1+'/.') and not(filename.startswith('README')):

            snd = parselmouth.Sound(f)
            intensity=snd.to_intensity()

            pitch=snd.to_pitch()
            pitch_values=pitch.selected_array['frequency']
            pitch_values[pitch_values != 0]

            harmonicity=snd.to_harmonicity()

            f0min=75
            f0max=300
            pointProcess = parselmouth.praat.call(snd, "To PointProcess (periodic, cc)", f0min, f0max)
            formants = parselmouth.praat.call(snd, "To Formant (burg)", 0.0025, 5, 5000, 0.025, 50)

            numPoints = parselmouth.praat.call(pointProcess, "Get number of points")
            f1_list = []
            f2_list = []
            f3_list = []
            for point in range(0, numPoints):
                point += 1
                t = parselmouth.praat.call(pointProcess, "Get time from index", point)
                f1 = parselmouth.praat.call(formants, "Get value at time", 1, t, 'Hertz', 'Linear')
                f2 = parselmouth.praat.call(formants, "Get value at time", 2, t, 'Hertz', 'Linear')
                f3 = parselmouth.praat.call(formants, "Get value at time", 3, t, 'Hertz', 'Linear')


                f1_list.append(f1)
                f2_list.append(f2)
                f3_list.append(f3)
            dataset_row = []
            dataset_row.append(label)
            dataset_row.append(filename)

            dataset_row.append(np.max(intensity.values.T))
            dataset_row.append(np.mean(intensity.values.T))

            dataset_row.append(np.max(pitch_values))
            dataset_row.append(np.mean(pitch_values))

            dataset_row.append(np.mean(harmonicity))
            dataset_row.append(np.mean(f1_list))
            dataset_row.append(np.mean(f2_list))
            dataset_row.append(np.mean(f3_list))


            dataset_table.append(dataset_row)

In [6]:
import pandas as pd

In [7]:
dataset_table = pd.DataFrame(dataset_table,\
                             columns=['label','file_name','intensity_max','intensity_mean',\
                                      'pitch_max','pitch_mean', 'harmonicity', 'f1_mean','f2_mean','f3_mean'])
dataset_table.head(5)

Unnamed: 0,label,file_name,intensity_max,intensity_mean,pitch_max,pitch_mean,harmonicity,f1_mean,f2_mean,f3_mean
0,right,988e2f9a_nohash_0.wav,61.097081,38.47516,422.678445,89.390172,-123.131611,555.058584,1601.669536,2320.522792
1,right,6272b231_nohash_1.wav,66.845281,37.597035,558.135921,49.267791,-164.296653,512.683734,1535.837242,2064.67239
2,right,2f813234_nohash_1.wav,77.935995,62.019151,125.728632,42.167443,-146.656255,,,
3,right,97f4c236_nohash_2.wav,82.57174,57.877327,221.551033,70.21409,-127.560476,520.68617,1916.816935,2625.872414
4,right,fac74f6a_nohash_1.wav,67.975174,56.604372,460.772269,30.653279,-115.485625,417.02752,1643.379016,2151.579999


In [8]:
### backup csv
dataset_table.to_csv('pandas_df.csv',index=False)

In [9]:
dataset_table.describe()

Unnamed: 0,intensity_max,intensity_mean,pitch_max,pitch_mean,harmonicity,f1_mean,f2_mean,f3_mean
count,8000.0,8000.0,8000.0,8000.0,8000.0,6914.0,6914.0,6914.0
mean,75.145934,46.746846,244.781201,49.831409,-132.943196,605.498968,1451.860642,2523.257928
std,9.632973,15.420563,152.214791,34.893855,31.020852,139.995463,275.56878,279.340591
min,9.219821,-146.094185,0.0,0.0,-200.0,253.075061,633.8526,1160.132936
25%,70.450399,40.32715,133.69201,26.624615,-154.072683,503.300138,1256.8844,2361.152775
50%,76.733861,48.428962,193.492985,41.833717,-137.661063,591.584936,1418.844652,2537.63723
75%,81.951915,55.52254,277.230699,63.805314,-119.102024,689.238785,1607.416839,2712.095422
max,93.193258,85.027586,599.977696,349.51191,17.967082,1421.196373,2597.192278,3492.007209
