In [1]:
import copy
import pandas as pd
import numpy as np



In [None]:
def rttm_to_utterance_indexed_speaker_activity(df, outfile=None):
    """ Given an RTTM input file, generate a dataframe structured
        to support a visualization of type 'Speaker Activity' and optionally
        export to a csv located at {outfile}

        df = Pandas DataFrame containing a standard .rttm file
        outfile = destination for exported CSV (path, filename, extension)
    """

    # Check whether an outfile has been defined
    if outfile is not None:
        export = True

    # Drop the columns we don't care about from a base RTTM
    vizframe = copy.deepcopy(df) \
        .drop(
        columns=[
            'task',
            'inputFile',
            'one',
            'NA_1',
            'NA_2',
            'NA_3',
            'NA_4'])

    # Rename columns for our viz's purposes
    vizframe = vizframe.rename(columns={
        'start': 'START',
        'duration': 'DUR',
        'class': 'LABEL'
    })

    # Remap the model classes for this viz's purposes
    vizframe['LABEL'] = vizframe['LABEL'].map({
        'KCHI': 'CHILD',
        'CHI': 'CHILD',
        'FEM': 'ADULT',
        'MAL': 'ADULT'
    })

    # Filter the dataframe to just the 'clean' (non-'SPEECH') classes
    vizframe = vizframe[vizframe['LABEL'].isin(['CHILD', 'ADULT'])]
    vizframe['LABEL_NUM'] = vizframe['LABEL'] \
        .apply(lambda x: 1 if x == 'CHILD'
               else (-1 if x == 'ADULT' else NaN))
    vizframe['DUR_TRANS'] = vizframe['LABEL_NUM'] * vizframe['DUR']
    vizframe['COUNT'] = 1

    if export:
        vizframe.to_csv(outfile)

    return vizframe

In [2]:
def df_from_rttm(rttm):
    """ Given an RTTM file, parses it into a Pandas DataFrame.
    """
    df = pd.read_csv(rttm,
                     sep=' ',
                     names=['task','inputFile','one','start','duration',
                     'NA_1','NA_2','class','NA_3', 'NA_4'])
    return df

In [3]:
def get_latest_timestamp_needed(input_df):
    """ Given an RTTM-derived dataframe,
        extract the last timestamp we'll need
        as a scalar
    """
    last_row = input_df[input_df['start']==input_df['start'].max()][['start', 'duration']]
    last_row.reset_index(drop=True, inplace=True)
    latest_timestamp = last_row.at[0,'start'] + last_row.at[0,'duration']
    return round(latest_timestamp, 1)

In [4]:
ny_7759 = df_from_rttm('./TestData/eNY-7759.rttm')
ny_7759

Unnamed: 0,task,inputFile,one,start,duration,NA_1,NA_2,class,NA_3,NA_4
0,SPEAKER,eNY-7759,1,6.038,5.191,,,SPEECH,,
1,SPEAKER,eNY-7759,1,6.198,0.813,,,KCHI,,
2,SPEAKER,eNY-7759,1,7.773,0.145,,,KCHI,,
3,SPEAKER,eNY-7759,1,9.560,0.380,,,MAL,,
4,SPEAKER,eNY-7759,1,10.694,0.240,,,KCHI,,
...,...,...,...,...,...,...,...,...,...,...
143,SPEAKER,eNY-7759,1,235.567,3.714,,,FEM,,
144,SPEAKER,eNY-7759,1,238.321,0.120,,,MAL,,
145,SPEAKER,eNY-7759,1,238.641,0.125,,,MAL,,
146,SPEAKER,eNY-7759,1,238.888,0.351,,,KCHI,,


In [None]:
# We need times to be in milliseconds
# ny_7759[['start', 'duration']] = ny_7759[['start', 'duration']]*1000

In [5]:
# This is to make our lives easier later
ny_7759_bak = copy.deepcopy(ny_7759)
# ny_7759 = copy.deepcopy(ny_7759_bak)

In [34]:
def build_second_range(start, duration, value='', valname='value', verbose=False):
    """ Given a start time, and end time, and a value,
        create a dataframe with a timedelta index containing
        that value for the range between the endpoints
    """
    
    # Turn the endpoints into a second-denominated target
    low_end = pd.to_timedelta(round(start, 1), unit='milliseconds')
    span = pd.to_timedelta(round(duration, 1), unit='milliseconds')
    
    if verbose:
        print(f"Low end: {low_end}\nDuration: {span}\n")
    
    # Create a range between them
    rng = pd.timedelta_range(low_end, low_end+span, freq='100L')

    # Turn that series into a DataFrame and rename the index for clarity
    df = pd.Series(value, index=rng).to_frame(name=valname)
    df.index.name='seconds'
    if verbose:
        print(df.head(3))
    return df

In [44]:
build_second_range(0, 3000, np.nan,'base', verbose=True)    

Low end: 0 days 00:00:00
Duration: 0 days 00:00:03

                 base
seconds              
00:00:00          NaN
00:00:00.100000   NaN
00:00:00.200000   NaN


Unnamed: 0_level_0,base
seconds,Unnamed: 1_level_1
00:00:00,
00:00:00.100000,
00:00:00.200000,
00:00:00.300000,
00:00:00.400000,
00:00:00.500000,
00:00:00.600000,
00:00:00.700000,
00:00:00.800000,
00:00:00.900000,


In [27]:
# We only care about a subset of the RTTM's columns. We'll ignore the others.
# Also, for now, we only want to worry about the 'SPEECH' class.
ny_7759_short = ny_7759[['start', 'duration', 'class']][ny_7759['class']=='SPEECH'][0:10]
ny_7759_short.reset_index(drop=True, inplace=True)
ny_7759_short = ny_7759_short.round(0)

In [28]:
ny_7759_short

Unnamed: 0,start,duration,class
0,6.0,5.0,SPEECH
1,14.0,3.0,SPEECH
2,18.0,0.0,SPEECH
3,19.0,0.0,SPEECH
4,21.0,4.0,SPEECH
5,25.0,39.0,SPEECH
6,64.0,6.0,SPEECH
7,70.0,15.0,SPEECH
8,85.0,2.0,SPEECH
9,87.0,25.0,SPEECH


In [9]:
def build_second_indexed_df(input_df, label_list=[], verbose=True):
    """ Given an RTTM-generated DataFrame, generate a pivoted DF
        containing all of the labels of interest, one-hot encoded
    """
    
    max_seconds_needed = get_latest_timestamp_needed(input_df)
    if verbose:
        print(f'max_seconds_needed is of type {type(max_seconds_needed)} and equal to {max_seconds_needed}')
    outer_df = build_second_range(
                0,
                max_seconds_needed,
                np.nan,
                'base',
                verbose=True)
    
    if verbose:
        print(f'The outer_df frame will contain {len(outer_df)} records.')
    
    # Loop through labels, subsetting the original DF so
    # we can merge it back into the main outer DF
    for label in label_list:
        print(f'Processing label: {label}\n')

        if label not in input_df['class'].unique():
            print(f'Label {label} not found in this dataset')
            label_base_df = pd.DataFrame(columns=[label])
            continue
        
        # Gotta avoid errors from accidentally manipulating original DFs
        temp_df = copy.deepcopy(input_df)

        # Generate a temp_df that contains only records for the label of interest
        temp_df = input_df[['start','duration','class']][input_df['class']==label]

        if verbose:
            print(f'The temp_df subset for label {label} contains {len(temp_df)} rows')
            print(temp_df.head(5))
        
        # The subsetted DF retains the original index unless you reset it
        temp_df.reset_index(drop=True, inplace=True)

        if verbose:
            print(f'The temp_df frame is as follows:\n{temp_df}')

        # Creating the base DF for this label - ranges from 0 to the earliest record
        label_base_df = build_second_range(0, temp_df['start'].min(),
                                     np.nan, label, verbose=verbose)
        if verbose:
            # The head() will always be the same, so we need to look at the tail() to verify
            print(f'\n>>> The last few rows of the label_base_df for label {label} are:\n{label_base_df.tail()}')
            print(f"\n>>> Base DF size for label {label}: {len(temp_df)}\n")

        for i in range(1, len(temp_df)):
            label_base_df = label_base_df.append(
                build_second_range(
                    temp_df.loc[i]['start'],
                    temp_df.loc[i]['duration'],
                    str(label), str(label),
                    verbose=verbose
                ))

            if verbose:
                print(f'\n>>> Base DF size after {i} rounds: {len(label_base_df)}')
                print(f'\n>>> The head:\n{label_base_df.head(10)}\n>>> The tail:\n{label_base_df.tail(10)}')
                print(f'\n>>> A few of its contents:\n{label_base_df[~label_base_df[label].isna()].head(5)}')

        # Creating a placeholder for the update call
        outer_df[label] = np.nan
        
        # When attempting the update method:
        print(f'Attempting update with DF from label {label}')
        outer_df.update(label_base_df, overwrite=True)
        
        if verbose:
            try:
                print(outer_df[~outer_df[label].isna()].head())
            except:
                print(outer_df)
            
        # When attempting the merge method:
#         outer_df = pd.merge(left=outer_df, left_index=True,
#                             right=label_base_df, right_index=True,
#                             how='inner', suffixes=('_base',''),
#                             indicator=True, validate='1:1')
#         if verbose:
#             print(outer_df[~outer_df[f'{label}_y'].isna()].head(5))

    return outer_df

In [54]:
tester = df_from_rttm('./TestData/eNY-7759.rttm').round(1)
tester[['start', 'duration']] = tester[['start', 'duration']]*1000
labels = ['MAL', 'CHI', 'KCHI', 'FEM', 'SPEECH']
result = build_second_indexed_df(tester, label_list=labels, verbose=True)

max_seconds_needed is of type <class 'numpy.float64'> and equal to 239100.0
Low end: 0 days 00:00:00
Duration: 0 days 00:03:59.100000

                 base
seconds              
00:00:00          NaN
00:00:00.100000   NaN
00:00:00.200000   NaN
The outer_df frame will contain 2392 records.
Processing label: MAL

The temp_df subset for label MAL contains 26 rows
      start  duration class
3    9600.0     400.0   MAL
5   12600.0    1800.0   MAL
8   18400.0    1600.0   MAL
15  25400.0     100.0   MAL
16  25800.0     200.0   MAL
The temp_df frame is as follows:
       start  duration class
0     9600.0     400.0   MAL
1    12600.0    1800.0   MAL
2    18400.0    1600.0   MAL
3    25400.0     100.0   MAL
4    25800.0     200.0   MAL
5    56000.0    2700.0   MAL
6    59500.0    1600.0   MAL
7    72400.0     400.0   MAL
8    75000.0    2200.0   MAL
9    79300.0     600.0   MAL
10  101000.0    1000.0   MAL
11  102100.0     100.0   MAL
12  106800.0     200.0   MAL
13  121000.0     800.0   MAL


                 MAL
seconds             
00:01:46.800000  MAL
00:01:46.900000  MAL
00:01:47         MAL

>>> Base DF size after 12 rounds: 234

>>> The head:
                 MAL
seconds             
00:00:00         NaN
00:00:00.100000  NaN
00:00:00.200000  NaN
00:00:00.300000  NaN
00:00:00.400000  NaN
00:00:00.500000  NaN
00:00:00.600000  NaN
00:00:00.700000  NaN
00:00:00.800000  NaN
00:00:00.900000  NaN
>>> The tail:
                 MAL
seconds             
00:01:41.600000  MAL
00:01:41.700000  MAL
00:01:41.800000  MAL
00:01:41.900000  MAL
00:01:42         MAL
00:01:42.100000  MAL
00:01:42.200000  MAL
00:01:46.800000  MAL
00:01:46.900000  MAL
00:01:47         MAL

>>> A few of its contents:
                 MAL
seconds             
00:00:12.600000  MAL
00:00:12.700000  MAL
00:00:12.800000  MAL
00:00:12.900000  MAL
00:00:13         MAL
Low end: 0 days 00:02:01
Duration: 0 days 00:00:00.800000

                 MAL
seconds             
00:02:01         MAL
00:02:01.100000  MAL
00:02

00:03:30.900000  MAL

>>> A few of its contents:
                 MAL
seconds             
00:00:12.600000  MAL
00:00:12.700000  MAL
00:00:12.800000  MAL
00:00:12.900000  MAL
00:00:13         MAL
Low end: 0 days 00:03:31
Duration: 0 days 00:00:01.300000

                 MAL
seconds             
00:03:31         MAL
00:03:31.100000  MAL
00:03:31.200000  MAL

>>> Base DF size after 22 rounds: 312

>>> The head:
                 MAL
seconds             
00:00:00         NaN
00:00:00.100000  NaN
00:00:00.200000  NaN
00:00:00.300000  NaN
00:00:00.400000  NaN
00:00:00.500000  NaN
00:00:00.600000  NaN
00:00:00.700000  NaN
00:00:00.800000  NaN
00:00:00.900000  NaN
>>> The tail:
                 MAL
seconds             
00:03:31.400000  MAL
00:03:31.500000  MAL
00:03:31.600000  MAL
00:03:31.700000  MAL
00:03:31.800000  MAL
00:03:31.900000  MAL
00:03:32         MAL
00:03:32.100000  MAL
00:03:32.200000  MAL
00:03:32.300000  MAL

>>> A few of its contents:
                 MAL
seconds            

00:00:41         CHI
Low end: 0 days 00:00:54
Duration: 0 days 00:00:00.400000

                 CHI
seconds             
00:00:54         CHI
00:00:54.100000  CHI
00:00:54.200000  CHI

>>> Base DF size after 8 rounds: 378

>>> The head:
                 CHI
seconds             
00:00:00         NaN
00:00:00.100000  NaN
00:00:00.200000  NaN
00:00:00.300000  NaN
00:00:00.400000  NaN
00:00:00.500000  NaN
00:00:00.600000  NaN
00:00:00.700000  NaN
00:00:00.800000  NaN
00:00:00.900000  NaN
>>> The tail:
                 CHI
seconds             
00:00:51.100000  CHI
00:00:51.200000  CHI
00:00:51.300000  CHI
00:00:51.400000  CHI
00:00:51.500000  CHI
00:00:54         CHI
00:00:54.100000  CHI
00:00:54.200000  CHI
00:00:54.300000  CHI
00:00:54.400000  CHI

>>> A few of its contents:
                 CHI
seconds             
00:00:37.100000  CHI
00:00:37.200000  CHI
00:00:37.300000  CHI
00:00:37.400000  CHI
00:00:41         CHI
Low end: 0 days 00:01:20
Duration: 0 days 00:00:00.100000

          

00:03:48.800000  CHI

>>> A few of its contents:
                 CHI
seconds             
00:00:37.100000  CHI
00:00:37.200000  CHI
00:00:37.300000  CHI
00:00:37.400000  CHI
00:00:41         CHI
Low end: 0 days 00:03:54.300000
Duration: 0 days 00:00:01

                 CHI
seconds             
00:03:54.300000  CHI
00:03:54.400000  CHI
00:03:54.500000  CHI

>>> Base DF size after 22 rounds: 546

>>> The head:
                 CHI
seconds             
00:00:00         NaN
00:00:00.100000  NaN
00:00:00.200000  NaN
00:00:00.300000  NaN
00:00:00.400000  NaN
00:00:00.500000  NaN
00:00:00.600000  NaN
00:00:00.700000  NaN
00:00:00.800000  NaN
00:00:00.900000  NaN
>>> The tail:
                 CHI
seconds             
00:03:54.400000  CHI
00:03:54.500000  CHI
00:03:54.600000  CHI
00:03:54.700000  CHI
00:03:54.800000  CHI
00:03:54.900000  CHI
00:03:55         CHI
00:03:55.100000  CHI
00:03:55.200000  CHI
00:03:55.300000  CHI

>>> A few of its contents:
                 CHI
seconds            

00:01:29         KCHI

>>> A few of its contents:
                 KCHI
seconds              
00:00:07.800000  KCHI
00:00:07.900000  KCHI
00:00:10.700000  KCHI
00:00:10.800000  KCHI
00:00:10.900000  KCHI
Low end: 0 days 00:01:53
Duration: 0 days 00:00:00.500000

                 KCHI
seconds              
00:01:53         KCHI
00:01:53.100000  KCHI
00:01:53.200000  KCHI

>>> Base DF size after 11 rounds: 161

>>> The head:
                KCHI
seconds             
00:00:00         NaN
00:00:00.100000  NaN
00:00:00.200000  NaN
00:00:00.300000  NaN
00:00:00.400000  NaN
00:00:00.500000  NaN
00:00:00.600000  NaN
00:00:00.700000  NaN
00:00:00.800000  NaN
00:00:00.900000  NaN
>>> The tail:
                 KCHI
seconds              
00:01:28.700000  KCHI
00:01:28.800000  KCHI
00:01:28.900000  KCHI
00:01:29         KCHI
00:01:53         KCHI
00:01:53.100000  KCHI
00:01:53.200000  KCHI
00:01:53.300000  KCHI
00:01:53.400000  KCHI
00:01:53.500000  KCHI

>>> A few of its contents:
               

Low end: 0 days 00:00:27.500000
Duration: 0 days 00:00:04.400000

                 FEM
seconds             
00:00:27.500000  FEM
00:00:27.600000  FEM
00:00:27.700000  FEM

>>> Base DF size after 1 rounds: 266

>>> The head:
                 FEM
seconds             
00:00:00         NaN
00:00:00.100000  NaN
00:00:00.200000  NaN
00:00:00.300000  NaN
00:00:00.400000  NaN
00:00:00.500000  NaN
00:00:00.600000  NaN
00:00:00.700000  NaN
00:00:00.800000  NaN
00:00:00.900000  NaN
>>> The tail:
                 FEM
seconds             
00:00:31         FEM
00:00:31.100000  FEM
00:00:31.200000  FEM
00:00:31.300000  FEM
00:00:31.400000  FEM
00:00:31.500000  FEM
00:00:31.600000  FEM
00:00:31.700000  FEM
00:00:31.800000  FEM
00:00:31.900000  FEM

>>> A few of its contents:
                 FEM
seconds             
00:00:27.500000  FEM
00:00:27.600000  FEM
00:00:27.700000  FEM
00:00:27.800000  FEM
00:00:27.900000  FEM
Low end: 0 days 00:00:32
Duration: 0 days 00:00:02.600000

                 FEM
sec

                 FEM
seconds             
00:01:01         FEM
00:01:01.100000  FEM

>>> Base DF size after 10 rounds: 503

>>> The head:
                 FEM
seconds             
00:00:00         NaN
00:00:00.100000  NaN
00:00:00.200000  NaN
00:00:00.300000  NaN
00:00:00.400000  NaN
00:00:00.500000  NaN
00:00:00.600000  NaN
00:00:00.700000  NaN
00:00:00.800000  NaN
00:00:00.900000  NaN
>>> The tail:
                 FEM
seconds             
00:00:58.400000  FEM
00:00:58.500000  FEM
00:00:58.600000  FEM
00:00:58.700000  FEM
00:01:00.500000  FEM
00:01:00.600000  FEM
00:01:00.700000  FEM
00:01:00.800000  FEM
00:01:01         FEM
00:01:01.100000  FEM

>>> A few of its contents:
                 FEM
seconds             
00:00:27.500000  FEM
00:00:27.600000  FEM
00:00:27.700000  FEM
00:00:27.800000  FEM
00:00:27.900000  FEM
Low end: 0 days 00:01:01.300000
Duration: 0 days 00:00:02.500000

                 FEM
seconds             
00:01:01.300000  FEM
00:01:01.400000  FEM
00:01:01.500000  FE

00:01:24.500000  FEM

>>> A few of its contents:
                 FEM
seconds             
00:00:27.500000  FEM
00:00:27.600000  FEM
00:00:27.700000  FEM
00:00:27.800000  FEM
00:00:27.900000  FEM
Low end: 0 days 00:01:25.500000
Duration: 0 days 00:00:01.800000

                 FEM
seconds             
00:01:25.500000  FEM
00:01:25.600000  FEM
00:01:25.700000  FEM

>>> Base DF size after 20 rounds: 723

>>> The head:
                 FEM
seconds             
00:00:00         NaN
00:00:00.100000  NaN
00:00:00.200000  NaN
00:00:00.300000  NaN
00:00:00.400000  NaN
00:00:00.500000  NaN
00:00:00.600000  NaN
00:00:00.700000  NaN
00:00:00.800000  NaN
00:00:00.900000  NaN
>>> The tail:
                 FEM
seconds             
00:01:26.400000  FEM
00:01:26.500000  FEM
00:01:26.600000  FEM
00:01:26.700000  FEM
00:01:26.800000  FEM
00:01:26.900000  FEM
00:01:27         FEM
00:01:27.100000  FEM
00:01:27.200000  FEM
00:01:27.300000  FEM

>>> A few of its contents:
                 FEM
seconds     

00:02:14         FEM

>>> A few of its contents:
                 FEM
seconds             
00:00:27.500000  FEM
00:00:27.600000  FEM
00:00:27.700000  FEM
00:00:27.800000  FEM
00:00:27.900000  FEM
Low end: 0 days 00:02:15
Duration: 0 days 00:00:03.400000

                 FEM
seconds             
00:02:15         FEM
00:02:15.100000  FEM
00:02:15.200000  FEM

>>> Base DF size after 30 rounds: 1129

>>> The head:
                 FEM
seconds             
00:00:00         NaN
00:00:00.100000  NaN
00:00:00.200000  NaN
00:00:00.300000  NaN
00:00:00.400000  NaN
00:00:00.500000  NaN
00:00:00.600000  NaN
00:00:00.700000  NaN
00:00:00.800000  NaN
00:00:00.900000  NaN
>>> The tail:
                 FEM
seconds             
00:02:17.500000  FEM
00:02:17.600000  FEM
00:02:17.700000  FEM
00:02:17.800000  FEM
00:02:17.900000  FEM
00:02:18         FEM
00:02:18.100000  FEM
00:02:18.200000  FEM
00:02:18.300000  FEM
00:02:18.400000  FEM

>>> A few of its contents:
                 FEM
seconds           

00:03:08.300000  FEM

>>> Base DF size after 39 rounds: 1516

>>> The head:
                 FEM
seconds             
00:00:00         NaN
00:00:00.100000  NaN
00:00:00.200000  NaN
00:00:00.300000  NaN
00:00:00.400000  NaN
00:00:00.500000  NaN
00:00:00.600000  NaN
00:00:00.700000  NaN
00:00:00.800000  NaN
00:00:00.900000  NaN
>>> The tail:
                 FEM
seconds             
00:03:10.200000  FEM
00:03:10.300000  FEM
00:03:10.400000  FEM
00:03:10.500000  FEM
00:03:10.600000  FEM
00:03:10.700000  FEM
00:03:10.800000  FEM
00:03:10.900000  FEM
00:03:11         FEM
00:03:11.100000  FEM

>>> A few of its contents:
                 FEM
seconds             
00:00:27.500000  FEM
00:00:27.600000  FEM
00:00:27.700000  FEM
00:00:27.800000  FEM
00:00:27.900000  FEM
Low end: 0 days 00:03:11.300000
Duration: 0 days 00:00:09.500000

                 FEM
seconds             
00:03:11.300000  FEM
00:03:11.400000  FEM
00:03:11.500000  FEM

>>> Base DF size after 40 rounds: 1612

>>> The head:
     

00:00:14.400000  SPEECH
Low end: 0 days 00:00:18.500000
Duration: 0 days 00:00:00.200000

                 SPEECH
seconds                
00:00:18.500000  SPEECH
00:00:18.600000  SPEECH
00:00:18.700000  SPEECH

>>> Base DF size after 2 rounds: 96

>>> The head:
                SPEECH
seconds               
00:00:00           NaN
00:00:00.100000    NaN
00:00:00.200000    NaN
00:00:00.300000    NaN
00:00:00.400000    NaN
00:00:00.500000    NaN
00:00:00.600000    NaN
00:00:00.700000    NaN
00:00:00.800000    NaN
00:00:00.900000    NaN
>>> The tail:
                 SPEECH
seconds                
00:00:16.500000  SPEECH
00:00:16.600000  SPEECH
00:00:16.700000  SPEECH
00:00:16.800000  SPEECH
00:00:16.900000  SPEECH
00:00:17         SPEECH
00:00:17.100000  SPEECH
00:00:18.500000  SPEECH
00:00:18.600000  SPEECH
00:00:18.700000  SPEECH

>>> A few of its contents:
                 SPEECH
seconds                
00:00:14         SPEECH
00:00:14.100000  SPEECH
00:00:14.200000  SPEECH
00:00:14.300


                 SPEECH
seconds                
00:02:32.200000  SPEECH
00:02:32.300000  SPEECH
00:02:32.400000  SPEECH

>>> Base DF size after 17 rounds: 1378

>>> The head:
                SPEECH
seconds               
00:00:00           NaN
00:00:00.100000    NaN
00:00:00.200000    NaN
00:00:00.300000    NaN
00:00:00.400000    NaN
00:00:00.500000    NaN
00:00:00.600000    NaN
00:00:00.700000    NaN
00:00:00.800000    NaN
00:00:00.900000    NaN
>>> The tail:
                 SPEECH
seconds                
00:02:32.900000  SPEECH
00:02:33         SPEECH
00:02:33.100000  SPEECH
00:02:33.200000  SPEECH
00:02:33.300000  SPEECH
00:02:33.400000  SPEECH
00:02:33.500000  SPEECH
00:02:33.600000  SPEECH
00:02:33.700000  SPEECH
00:02:33.800000  SPEECH

>>> A few of its contents:
                 SPEECH
seconds                
00:00:14         SPEECH
00:00:14.100000  SPEECH
00:00:14.200000  SPEECH
00:00:14.300000  SPEECH
00:00:14.400000  SPEECH
Low end: 0 days 00:02:34.800000
Duration: 0 days 0

In [55]:
result.loc['00:03:30.000':'00:03:40.000']

Unnamed: 0_level_0,base,MAL,CHI,KCHI,FEM,SPEECH
seconds,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
00:03:30,,MAL,,,FEM,SPEECH
00:03:30.100000,,MAL,,,FEM,SPEECH
00:03:30.200000,,MAL,,,FEM,SPEECH
00:03:30.300000,,MAL,,,FEM,SPEECH
00:03:30.400000,,MAL,,,FEM,SPEECH
...,...,...,...,...,...,...
00:03:40.500000,,,CHI,,FEM,SPEECH
00:03:40.600000,,,CHI,,FEM,SPEECH
00:03:40.700000,,,CHI,,FEM,SPEECH
00:03:40.800000,,,CHI,,FEM,SPEECH


In [None]:
for col in result.columns:
    print(f""" This DataFrameresult[col].describe()

In [52]:
result.shape

(2392, 6)

In [56]:
result.to_csv('./TestData/eNY-7759.csv')