In [1]:
def rttm_to_speaker_activity(df, outfile=None):
    """ Given an RTTM input file, generate a dataframe structured
        to support a visualization of type 'Speaker Activity' and optionally
        export to a csv located at {outfile}

        The df's column names must be
            'task',
            'inputFile',
            'one',
            'start',
            'duration',
            'NA_1',
            'NA_2',
            'class',
            'NA_3', and
            'NA_4'

        df = Pandas DataFrame containing a standard .rttm file
        outfile = destination for exported CSV (path, filename, extension)
    """

    # Check whether an outfile has been defined
    if outfile is not None:
        export = True

    # Drop the columns we don't care about from a base RTTM
    vizframe = copy.deepcopy(df) \
        .drop(
        columns=[
            'task',
            'inputFile',
            'one',
            'NA_1',
            'NA_2',
            'NA_3',
            'NA_4'])

    # Rename columns for our viz's purposes
    vizframe = vizframe.rename(columns={
        'start': 'START',
        'duration': 'DUR',
        'class': 'LABEL'
    })

    # Remap the model classes for this viz's purposes
    vizframe['LABEL'] = vizframe['LABEL'].replace({
        'KCHI': 'CHILD',
        'CHI': 'CHILD',
        'FEM': 'ADULT',
        'MAL': 'ADULT',
       # 'SPEECH':'SPEECH'
    })

    # Filter the dataframe to just the 'clean' (non-'SPEECH') classes
    vizframe = vizframe[vizframe['LABEL'].isin(['CHILD', 'ADULT'])]
    vizframe['LABEL_NUM'] = vizframe['LABEL'] \
        .apply(lambda x: 1 if x == 'CHILD'
               else (-1 if x == 'ADULT' else NaN))
    vizframe['DUR_TRANS'] = vizframe['LABEL_NUM'] * vizframe['DUR']
    vizframe['COUNT'] = 1

    if export:
        vizframe.to_csv(outfile)

    return vizframe

In [5]:
import pandas as pd
import copy

In [3]:
def df_from_rttm(rttm):
    """ Given an RTTM file, parses it into a Pandas DataFrame.
    """
    df = pd.read_csv(rttm,
                     sep=' ',
                     names=['task','inputFile','one','start','duration',
                     'NA_1','NA_2','class','NA_3', 'NA_4'])
    return df

In [6]:
df = df_from_rttm('/home/leemoore/voice_type_classifier/output_voice_type_classifier/stand-and-deliver/all.rttm')
rttm_to_speaker_activity(df, outfile='stand-and-deliver.csv')

Unnamed: 0,START,DUR,LABEL,LABEL_NUM,DUR_TRANS,COUNT
1,1.011,3.743,ADULT,-1,-3.743,1
2,1.406,0.175,ADULT,-1,-0.175,1
3,4.011,2.0,ADULT,-1,-2.0,1
4,4.86,3.652,ADULT,-1,-3.652,1
5,9.097,1.396,ADULT,-1,-1.396,1
6,10.645,0.226,ADULT,-1,-0.226,1
9,13.51,1.001,CHILD,1,1.001,1
10,14.492,1.561,ADULT,-1,-1.561,1
11,18.012,6.507,ADULT,-1,-6.507,1
15,25.491,0.461,ADULT,-1,-0.461,1


In [7]:
!cat stand-and-deliver.csv

,START,DUR,LABEL,LABEL_NUM,DUR_TRANS,COUNT
1,1.011,3.7430000000000003,ADULT,-1,-3.7430000000000003,1
2,1.406,0.175,ADULT,-1,-0.175,1
3,4.011,2.0,ADULT,-1,-2.0,1
4,4.86,3.6519999999999997,ADULT,-1,-3.6519999999999997,1
5,9.097000000000001,1.396,ADULT,-1,-1.396,1
6,10.645,0.226,ADULT,-1,-0.226,1
9,13.51,1.001,CHILD,1,1.001,1
10,14.492,1.561,ADULT,-1,-1.561,1
11,18.012,6.507000000000001,ADULT,-1,-6.507000000000001,1
15,25.491,0.461,ADULT,-1,-0.461,1
16,27.493000000000002,0.19699999999999998,ADULT,-1,-0.19699999999999998,1
17,29.585,0.26,CHILD,1,0.26,1
18,32.633,2.378,ADULT,-1,-2.378,1
19,32.992,0.39,ADULT,-1,-0.39,1
20,33.775,18.714000000000002,ADULT,-1,-18.714000000000002,1
22,53.762,0.9059999999999999,ADULT,-1,-0.9059999999999999,1
24,54.992,2.44,ADULT,-1,-2.44,1
25,56.992,5.781000000000001,ADULT,-1,-5.781000000000001,1
26,63.511,3.4810000000000003,ADULT,-1,-3.4810000000000003,1
27,66.012,12.979000000000001,ADULT,-1,-12.979000000000001,1
28,67.152,0.3389999999999999