In [10]:
import librosa
import pandas as pd
import numpy as np
import os

In [36]:
bp_thresh=50
phrase_voice_thresh=0
time_step=0.01
data_folder = '../Data/Pakad/'

In [37]:
def get_phrases(pitch_df, unvoiced_frame_val=-3000):
    # create a dataframe with time, pitch, duration and end time columns with groups for repeated occurence of the same pitch value
    group_pitches = pitch_df.iloc[(np.diff(pitch_df['pitch'].values, prepend=np.nan) != 0).nonzero()][['time', 'pitch']].copy()
    group_pitches['duration'] = np.diff(group_pitches['time'], append=(pitch_df.iloc[-1, 0]+0.1))
    group_pitches['end time'] = group_pitches['time'] + group_pitches['duration']
    # dataframe of breath phrases 
    bps = group_pitches.loc[(group_pitches['pitch'] == unvoiced_frame_val) & (group_pitches['duration'] >= bp_thresh * time_step)].reset_index(drop=True)
    # create a dataframe with both breath and singing phrases. Columns are - start time, end time, duration, type 
    phrases_df = {
        'start time': [],
        'end time': [],
        'duration': [],
        'type': []
    }
    for i, row in list(bps.iterrows())[:-1]:
        # breath phrase
        phrases_df['start time'].append(row['time'])
        phrases_df['end time'].append(row['end time'])
        phrases_df['duration'].append(row['duration'])
        phrases_df['type'].append('BP')
        # singing phrase
        phrases_df['start time'].append(row['end time'])
        phrases_df['end time'].append(bps.iloc[i+1, 0])
        phrases_df['duration'].append(phrases_df['end time'][-1] - phrases_df['start time'][-1])
        phrases_df['type'].append('SP')
    phrases_df = pd.DataFrame(phrases_df)
    return phrases_df

def get_voiced_frame_len(pitch_df, start_time, end_time, unvoiced_frame_val=-3000):
    '''
    Returns the number of voiced frames in the pitch dataframe between start and end time

    Parameters
        pitch_df (pd.DataFrame): dataframe of tpe
        start_time (float): time to start reading pitch values at
        end_time (float): time to stop reading pitch values at

    Returns
        voiced_frames (int): number of voiced frames within start and stop time

    '''
    return pitch_df.loc[(pitch_df['time'] >= start_time) & (pitch_df['time'] <= end_time) & (pitch_df['pitch'] != unvoiced_frame_val)].shape[0]

In [38]:
sp_len = {'Filename': [], 'Singer': [], 'Raag': [], '# SP': [], 'Avg. SP Len': []}

In [39]:
for root, _, fileNames in os.walk(data_folder):
    for fileName in fileNames:
        if fileName.endswith('-pitch.csv'):
            print(fileName)
            pitch_df = pd.read_csv(os.path.join(root, fileName))
            phrases_df = get_phrases(pitch_df)
            # print(phrases_df.loc[phrases_df['type'] == 'SP'])
            sp_vals = []
            for i, row in phrases_df.iterrows():
                if row['type'] == 'SP':
                    if row['duration'] >= 2000*time_step:
                        # only one singing phrase
                        start_time = row['start time']
                        end_time = 1999*time_step + row['start time']
                    else:
                        subset = phrases_df.loc[(phrases_df['start time'] >= row['start time']) & (phrases_df['end time'] < np.around((row['start time']+(2000*time_step))/time_step)*time_step)]
                        start_time = row['start time']
                        end_time = subset.iloc[-1, 1]
                    phrase_voice_thresh_val = np.around(get_voiced_frame_len(pitch_df, start_time, end_time)/2000, 2)
                    if phrase_voice_thresh_val >= phrase_voice_thresh:
                        # print(row)
                        sp_vals.append(row['end time'] - row['start time'])
            sp_len['Filename'].append(os.path.join(root, fileName))
            sp_len['Singer'].append(fileName.split('_', 1)[0])
            sp_len['Raag'].append(fileName.rsplit('-', 1)[0].rsplit('_', 1)[1])
            sp_len['# SP'].append(len(sp_vals))
            sp_len['Avg. SP Len'].append(np.mean(sp_vals))

AG_P4_Bahar-pitch.csv
SCh_P6a_Kedar-pitch.csv
SCh_P6b_Kedar-pitch.csv
AG_P2_Jaun-pitch.csv
AG_P9_Bilas-pitch.csv
CC_P8_Bag-pitch.csv
CC_P3_MM-pitch.csv
SCh_P9a_Bahar-pitch.csv
SCh_P2b_Jaun-pitch.csv
SCh_P2a_Jaun-pitch.csv
AG_P1_MM-pitch.csv
CC_P7_Marwa-pitch.csv
SCh_P3b_MM-pitch.csv
CC_P1b_Bilas-pitch.csv
CC_P9_Bahar-pitch.csv
AG_P6_Nand-pitch.csv
SCh_P4a_Nand-pitch.csv
CC_P2_Jaun-pitch.csv
SCh_P4b_Nand-pitch.csv
SCh_P1a_Bilas-pitch.csv
AG_P3_Kedar-pitch.csv
CC_P4_Nand-pitch.csv
SCh_P8b_Bag-pitch.csv
SCh_P7b_Marwa-pitch.csv
SCh_P5a_Shree-pitch.csv
CC_P6_Kedar-pitch.csv
AG_P7_Bag-pitch.csv
AG_P8_Marwa-pitch.csv
AG_P5_Shree-pitch.csv
SCh_P1b_Bilas-pitch.csv
CC_P1a_Bilas-pitch.csv
SCh_P5b_Shree-pitch.csv
CC_P5_Shree-pitch.csv
SCh_P7a_Marwa-pitch.csv
SCh_P8a_Bag-pitch.csv
SCh_P9b_Bahar-pitch.csv
SCh_P3a_MM-pitch.csv


In [40]:
sp_len = pd.DataFrame(sp_len)

In [41]:
sp_len.groupby('Singer').mean()

Unnamed: 0_level_0,# SP,Avg. SP Len
Singer,Unnamed: 1_level_1,Unnamed: 2_level_1
AG,2.111111,4.323148
CC,10.0,5.582192
SCh,1.833333,8.897083
