In [2]:
import copy
import pandas as pd
import numpy as np



In [3]:
file = './TestData/eNY-7759.rttm'

### RTTM to Dataframe

In [6]:
# Step 1: Load the .rttm into a dataframe
def DfFromRttm(rttm):
    """ Given an RTTM file, parses it into a Pandas DataFrame.
    """
    df = pd.read_csv(rttm,
                     sep=' ',
                     names=['task','inputFile','one','start','duration',
                     'NA_1','NA_2','class','NA_3', 'NA_4'])    
    
    # Drop the columns we don't care about from a base RTTM
    vizframe = copy.deepcopy(df) \
        .drop(
        columns=[
            'task',
            'inputFile',
            'one',
            'NA_1',
            'NA_2',
            'NA_3',
            'NA_4'])
    
    return vizframe

### Input Data for Audiozation

In [16]:
def RttmToUtteranceIndexedSpeakerActivity(df, outfile=None):
    """ Given an RTTM input file, generate a dataframe structured
        to support a visualization of type 'Speaker Activity' and optionally
        export to a csv located at {outfile}

        df = Pandas DataFrame containing a standard .rttm file
        outfile = destination for exported CSV (path, filename, extension)
    """

    # Check whether an outfile has been defined
    if outfile is not None:
        export = True
    else: 
        export = False

    # Drop the columns we don't care about from a base RTTM
    vizframe = copy.deepcopy(df) 

    # Rename columns for our viz's purposes
    vizframe = vizframe.rename(columns={
        'start': 'START',
        'duration': 'DUR',
        'class': 'LABEL'
    })

    # Remap the model classes for this viz's purposes
    vizframe['LABEL'] = vizframe['LABEL'].map({
        'KCHI': 'CHILD',
        'CHI': 'CHILD',
        'FEM': 'ADULT',
        'MAL': 'ADULT'
    })

    # Filter the dataframe to just the 'clean' (non-'SPEECH') classes
    vizframe = vizframe[vizframe['LABEL'].isin(['CHILD', 'ADULT'])]
    vizframe['LABEL_NUM'] = vizframe['LABEL'] \
        .apply(lambda x: 1 if x == 'CHILD'
               else (-1 if x == 'ADULT' else NaN))
    vizframe['DUR_TRANS'] = vizframe['LABEL_NUM'] * vizframe['DUR']
    vizframe['COUNT'] = 1

    if export:
        vizframe.to_csv(outfile)

    return vizframe

In [17]:
RttmToUtteranceIndexedSpeakerActivity(DfFromRttm(file), outfile=None)

Unnamed: 0,START,DUR,LABEL,LABEL_NUM,DUR_TRANS,COUNT
1,6.198,0.813,CHILD,1,0.813,1
2,7.773,0.145,CHILD,1,0.145,1
3,9.560,0.380,ADULT,-1,-0.380,1
4,10.694,0.240,CHILD,1,0.240,1
5,12.565,1.787,ADULT,-1,-1.787,1
...,...,...,...,...,...,...
143,235.567,3.714,ADULT,-1,-3.714,1
144,238.321,0.120,ADULT,-1,-0.120,1
145,238.641,0.125,ADULT,-1,-0.125,1
146,238.888,0.351,CHILD,1,0.351,1


### Time-Indexed Dataframe

In [18]:
## Identify the latest timestamp we need.
def GetLatestTimestampNeeded(input_df, verbose=True):
    """ Given an RTTM-derived dataframe,
        extract the last timestamp we'll need
        as a scalar. It will be the maximum value of
        the `start` + `duration` columns.
    """
    input_df['end_time'] = input_df['start'] + input_df['duration']
    end_row = input_df['end_time'].idxmax()
    latest_timestamp = input_df.at[end_row,'start'] + input_df.at[end_row,'duration']
    if verbose:
        print(f'''
        >> This DF has data that runs until {latest_timestamp}.
        >> That value was found at row {end_row} and is the sum of
           {input_df.at[end_row, 'start']} and {input_df.at[end_row, 'duration']}
        ''')
    return latest_timestamp

In [19]:
## Create 100 millisecond index.
def Make100MillisecondIntegerIndexedDf(
        start,
        duration,
        colname='value',
        colvalue='',
        decimals=2,
        verbose=False):
    """ Create an integer-indexed dataframe covering a 
            {duration} from
            {start} having a single column with the name
            {colname} that contains a default value of
            {colvalue} for that named column.
        By default it runs quietly rather than {verbose}.
    """

    if verbose:
        print(f' >>> From [{start}] for [{duration}] seconds' \
              f' until [{start+duration}] the col [{colname}]' \
              f' will contain the value [{colvalue}]')
    
    # Create a range between the start and stop
    rng = pd.RangeIndex(start = int(round(start, decimals)*10),
                        stop  = int(round(start+duration, decimals)*10),
                        step  = 1)
    
    # Turn that series into a DataFrame and rename the index for clarity
    df = pd.Series(colvalue, index=rng).to_frame(name=colname)
    df.index.name=f'100millisecond_ints'
    if verbose:
        print(df.head(3), df.tail(3))
    return df

In [20]:
## Subset by label (for testing only).
def SubsetDfByLabel(df, column_list, key_col, value):
    """ Given a dataframe {df}, return the subset
        of the dataframe defined by {column_list}
        containing {value} in the {key_col} column
    """
    return df[df[key_col] == value][[x for x in column_list]]

In [21]:
# This is to make our lives easier later
ny_7759 = DfFromRttm(file)
ny_7759_bak = copy.deepcopy(ny_7759)

In [22]:
def BuildOneHotEncodedDf(
    input_df,
    label_list=[],
    verbose=True
    ):
    """ Given an RTTM-generated DataFrame, generate a DF containing
        all of the labels of interest in one-hot encoded format
        against an integer-indexed DF representing milliseconds from
        the beginning of the recording
    """
    
    max_seconds_needed = GetLatestTimestampNeeded(input_df)
    if verbose:
        print(f'max_seconds_needed is equal to {max_seconds_needed}')

    outer_df = Make100MillisecondIntegerIndexedDf(
                start      = 0,
                duration   = max_seconds_needed,
                colname    = 'base_col',
                colvalue   = np.nan,
                decimals   = 2,
                verbose    = True)

    if verbose:
        print(f'The outer_df frame will contain {outer_df.shape[0]} records.')
    
    # Loop through labels, subsetting the original DF so
    # we can merge it back into the main outer DF
    for label in label_list:
        print(f'>>> Processing label: {label}\n')

        if label not in input_df['class'].unique():
            print(f'Label {label} not found in this dataset')
            label_base_df = pd.DataFrame(columns=[label])
            continue
        
        # Gotta avoid errors from accidentally manipulating original DFs
        temp_df = copy.deepcopy(input_df)

        # Replace the deep-copied DF with a subset of itself
        # that contains only records for the label of interest
        temp_df = SubsetDfByLabel(
            input_df,
            column_list = ['start', 'duration', 'class'],
            key_col     = 'class',
            value       = label
        )

        if verbose:
            print(f'The temp_df subset for label {label}' \
                  f' contains {temp_df.shape[0]} rows')
            print(temp_df.head(5),
                  temp_df.tail(5))
        
        # The subsetted DF retains the original index unless you reset it
        temp_df.reset_index(drop=True,
                            inplace=True)
        
        if verbose:
            print(f'The temp_df frame is as follows:\n{temp_df}')

        # Creating the base DF for this label - ranges from 0 to the earliest record
        label_base_df = Make100MillisecondIntegerIndexedDf(
                start      = 0,
                duration   = temp_df['start'].min(),
                colname    = label,
                colvalue   = np.nan,
                decimals   = 2,
                verbose    = verbose)
        
        if verbose:
            # The head() will always be the same, so we need to look at the tail() to verify
            print('\n>>> The last few rows of the label_base_df for label' \
                  f' {label} are:\n{label_base_df.tail()}' \
                  f'\n>>> Base DF size for label {label}: {len(temp_df)}\n')

        for i in temp_df.index:
            if verbose:
                print('*' * 10 + f'{label}: i = {i}' + '*' * 10)
            s, d, cn = temp_df.loc[i, ['start', 'duration', 'class']]
            label_base_df = label_base_df.append(
                Make100MillisecondIntegerIndexedDf(
                    start    = s,
                    duration = d,
                    colname  = cn,
                    colvalue = 1,
                    decimals = 2,
                    verbose  = verbose
                ))
            
            if verbose:
                print(f'\n>>> Base DF size after {i}' \
                      ' rounds: {len(label_base_df)}')
                print(f'\n>>> The head:\n{label_base_df.head(10)}'\
                      f'\n>>> The tail:\n{label_base_df.tail(10)}')
                print('\n>>> A few of its contents:'\
                      f'\n{label_base_df[~label_base_df[label].isna()].head(5)}')

        # Creating a placeholder for the update call
        outer_df[label] = np.nan
        
        # When attempting the update method:
        print(f'Attempting update with DF from label {label}')
        outer_df.update(
            other     = label_base_df,
            overwrite = True)
        
        if verbose:
            try:
                print(outer_df[~outer_df[label].isna()].head())
            except:
                print(outer_df)
            
    return outer_df

In [23]:
tester = DfFromRttm(file)
labels = ['MAL', 'CHI', 'KCHI', 'FEM', 'SPEECH']
tester[['start', 'duration']].loc[2]

start       7.773
duration    0.145
Name: 2, dtype: float64

In [24]:
result = BuildOneHotEncodedDf(tester, label_list=labels, verbose=True)


        >> This DF has data that runs until 239.67000000000002.
        >> That value was found at row 142 and is the sum of
           235.49200000000002 and 4.178
        
max_seconds_needed is equal to 239.67000000000002
 >>> From [0] for [239.67000000000002] seconds until [239.67000000000002] the col [base_col] will contain the value [nan]
                     base_col
100millisecond_ints          
0                         NaN
1                         NaN
2                         NaN                      base_col
100millisecond_ints          
2393                      NaN
2394                      NaN
2395                      NaN
The outer_df frame will contain 2396 records.
>>> Processing label: MAL

The temp_df subset for label MAL contains 26 rows
     start  duration class
3    9.560     0.380   MAL
5   12.565     1.787   MAL
8   18.416     1.573   MAL
15  25.397     0.118   MAL
16  25.822     0.250   MAL        start  duration class
131  209.993     0.857   MAL
132  210.9

 >>> From [102.12200000000001] for [0.12] seconds until [102.24200000000002] the col [MAL] will contain the value [1]
                     MAL
100millisecond_ints     
1021                   1                      MAL
100millisecond_ints     
1021                   1

>>> Base DF size after 11 rounds: {len(label_base_df)}

>>> The head:
                     MAL
100millisecond_ints     
0                    NaN
1                    NaN
2                    NaN
3                    NaN
4                    NaN
5                    NaN
6                    NaN
7                    NaN
8                    NaN
9                    NaN
>>> The tail:
                     MAL
100millisecond_ints     
1010                 1.0
1011                 1.0
1012                 1.0
1013                 1.0
1014                 1.0
1015                 1.0
1016                 1.0
1017                 1.0
1018                 1.0
1021                 1.0

>>> A few of its contents:
                   

2251                   1                      MAL
100millisecond_ints     
2251                   1

>>> Base DF size after 23 rounds: {len(label_base_df)}

>>> The head:
                     MAL
100millisecond_ints     
0                    NaN
1                    NaN
2                    NaN
3                    NaN
4                    NaN
5                    NaN
6                    NaN
7                    NaN
8                    NaN
9                    NaN
>>> The tail:
                     MAL
100millisecond_ints     
2113                 1.0
2114                 1.0
2115                 1.0
2116                 1.0
2117                 1.0
2118                 1.0
2119                 1.0
2120                 1.0
2121                 1.0
2251                 1.0

>>> A few of its contents:
                     MAL
100millisecond_ints     
95                   1.0
96                   1.0
97                   1.0
98                   1.0
125                  1.0
**********MA

542                  1.0

>>> A few of its contents:
                     CHI
100millisecond_ints     
330                  1.0
331                  1.0
332                  1.0
371                  1.0
372                  1.0
**********CHI: i = 9**********
 >>> From [80.048] for [0.11] seconds until [80.158] the col [CHI] will contain the value [1]
                     CHI
100millisecond_ints     
800                    1                      CHI
100millisecond_ints     
800                    1

>>> Base DF size after 9 rounds: {len(label_base_df)}

>>> The head:
                     CHI
100millisecond_ints     
0                    NaN
1                    NaN
2                    NaN
3                    NaN
4                    NaN
5                    NaN
6                    NaN
7                    NaN
8                    NaN
9                    NaN
>>> The tail:
                     CHI
100millisecond_ints     
481                  1.0
482                  1.0
511          

                     CHI
100millisecond_ints     
2342                   1
2343                   1
2344                   1                      CHI
100millisecond_ints     
2349                   1
2350                   1
2351                   1

>>> Base DF size after 22 rounds: {len(label_base_df)}

>>> The head:
                     CHI
100millisecond_ints     
0                    NaN
1                    NaN
2                    NaN
3                    NaN
4                    NaN
5                    NaN
6                    NaN
7                    NaN
8                    NaN
9                    NaN
>>> The tail:
                     CHI
100millisecond_ints     
2342                 1.0
2343                 1.0
2344                 1.0
2345                 1.0
2346                 1.0
2347                 1.0
2348                 1.0
2349                 1.0
2350                 1.0
2351                 1.0

>>> A few of its contents:
                     CHI
100milliseco

401                     1                      KCHI
100millisecond_ints      
400                     1
401                     1

>>> Base DF size after 7 rounds: {len(label_base_df)}

>>> The head:
                     KCHI
100millisecond_ints      
0                     NaN
1                     NaN
2                     NaN
3                     NaN
4                     NaN
5                     NaN
6                     NaN
7                     NaN
8                     NaN
9                     NaN
>>> The tail:
                     KCHI
100millisecond_ints      
382                   1.0
383                   1.0
384                   1.0
385                   1.0
386                   1.0
387                   1.0
388                   1.0
389                   1.0
400                   1.0
401                   1.0

>>> A few of its contents:
                     KCHI
100millisecond_ints      
62                    1.0
63                    1.0
64                    1.0
65  

1677                    1                      KCHI
100millisecond_ints      
1683                    1
1684                    1
1685                    1

>>> Base DF size after 19 rounds: {len(label_base_df)}

>>> The head:
                     KCHI
100millisecond_ints      
0                     NaN
1                     NaN
2                     NaN
3                     NaN
4                     NaN
5                     NaN
6                     NaN
7                     NaN
8                     NaN
9                     NaN
>>> The tail:
                     KCHI
100millisecond_ints      
1676                  1.0
1677                  1.0
1678                  1.0
1679                  1.0
1680                  1.0
1681                  1.0
1682                  1.0
1683                  1.0
1684                  1.0
1685                  1.0

>>> A few of its contents:
                     KCHI
100millisecond_ints      
62                    1.0
63                    1.0
64 

                     FEM
100millisecond_ints     
390                    1
391                    1
392                    1                      FEM
100millisecond_ints     
427                    1
428                    1
429                    1

>>> Base DF size after 5 rounds: {len(label_base_df)}

>>> The head:
                     FEM
100millisecond_ints     
0                    NaN
1                    NaN
2                    NaN
3                    NaN
4                    NaN
5                    NaN
6                    NaN
7                    NaN
8                    NaN
9                    NaN
>>> The tail:
                     FEM
100millisecond_ints     
420                  1.0
421                  1.0
422                  1.0
423                  1.0
424                  1.0
425                  1.0
426                  1.0
427                  1.0
428                  1.0
429                  1.0

>>> A few of its contents:
                     FEM
100millisecon


>>> Base DF size after 15 rounds: {len(label_base_df)}

>>> The head:
                     FEM
100millisecond_ints     
0                    NaN
1                    NaN
2                    NaN
3                    NaN
4                    NaN
5                    NaN
6                    NaN
7                    NaN
8                    NaN
9                    NaN
>>> The tail:
                     FEM
100millisecond_ints     
751                  1.0
752                  1.0
753                  1.0
754                  1.0
755                  1.0
756                  1.0
757                  1.0
758                  1.0
759                  1.0
760                  1.0

>>> A few of its contents:
                     FEM
100millisecond_ints     
219                  1.0
220                  1.0
221                  1.0
222                  1.0
223                  1.0
**********FEM: i = 16**********
 >>> From [76.51100000000001] for [4.258] seconds until [80.769] the col [FEM] w


>>> A few of its contents:
                     FEM
100millisecond_ints     
219                  1.0
220                  1.0
221                  1.0
222                  1.0
223                  1.0
**********FEM: i = 26**********
 >>> From [127.51100000000001] for [1.348] seconds until [128.859] the col [FEM] will contain the value [1]
                     FEM
100millisecond_ints     
1275                   1
1276                   1
1277                   1                      FEM
100millisecond_ints     
1285                   1
1286                   1
1287                   1

>>> Base DF size after 26 rounds: {len(label_base_df)}

>>> The head:
                     FEM
100millisecond_ints     
0                    NaN
1                    NaN
2                    NaN
3                    NaN
4                    NaN
5                    NaN
6                    NaN
7                    NaN
8                    NaN
9                    NaN
>>> The tail:
                     F

1740                   1

>>> Base DF size after 36 rounds: {len(label_base_df)}

>>> The head:
                     FEM
100millisecond_ints     
0                    NaN
1                    NaN
2                    NaN
3                    NaN
4                    NaN
5                    NaN
6                    NaN
7                    NaN
8                    NaN
9                    NaN
>>> The tail:
                     FEM
100millisecond_ints     
1731                 1.0
1732                 1.0
1733                 1.0
1734                 1.0
1735                 1.0
1736                 1.0
1737                 1.0
1738                 1.0
1739                 1.0
1740                 1.0

>>> A few of its contents:
                     FEM
100millisecond_ints     
219                  1.0
220                  1.0
221                  1.0
222                  1.0
223                  1.0
**********FEM: i = 37**********
 >>> From [175.265] for [7.271] seconds until [182.5359

142                       1                      SPEECH
100millisecond_ints        
168                       1
169                       1
170                       1

>>> Base DF size after 1 rounds: {len(label_base_df)}

>>> The head:
                     SPEECH
100millisecond_ints        
0                       NaN
1                       NaN
2                       NaN
3                       NaN
4                       NaN
5                       NaN
6                       NaN
7                       NaN
8                       NaN
9                       NaN
>>> The tail:
                     SPEECH
100millisecond_ints        
161                     1.0
162                     1.0
163                     1.0
164                     1.0
165                     1.0
166                     1.0
167                     1.0
168                     1.0
169                     1.0
170                     1.0

>>> A few of its contents:
                     SPEECH
100millisecond_ints 

1394                    1.0

>>> A few of its contents:
                     SPEECH
100millisecond_ints        
60                      1.0
61                      1.0
62                      1.0
63                      1.0
64                      1.0
**********SPEECH: i = 16**********
 >>> From [140.94799999999998] for [11.043] seconds until [151.99099999999999] the col [SPEECH] will contain the value [1]
                     SPEECH
100millisecond_ints        
1409                      1
1410                      1
1411                      1                      SPEECH
100millisecond_ints        
1516                      1
1517                      1
1518                      1

>>> Base DF size after 16 rounds: {len(label_base_df)}

>>> The head:
                     SPEECH
100millisecond_ints        
0                       NaN
1                       NaN
2                       NaN
3                       NaN
4                       NaN
5                       NaN
6              

In [25]:
result.head(20)

Unnamed: 0_level_0,base_col,MAL,CHI,KCHI,FEM,SPEECH
100millisecond_ints,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,,,,,,
1,,,,,,
2,,,,,,
3,,,,,,
4,,,,,,
5,,,,,,
6,,,,,,
7,,,,,,
8,,,,,,
9,,,,,,


In [26]:
result.tail(20)

Unnamed: 0_level_0,base_col,MAL,CHI,KCHI,FEM,SPEECH
100millisecond_ints,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2376,,,,,1.0,1.0
2377,,,,,1.0,1.0
2378,,,,,1.0,1.0
2379,,,,,1.0,1.0
2380,,,,,1.0,1.0
2381,,,,,1.0,1.0
2382,,,,,1.0,1.0
2383,,1.0,,,1.0,1.0
2384,,,,,1.0,1.0
2385,,,,,1.0,1.0


In [27]:
result.loc[60:70,]

Unnamed: 0_level_0,base_col,MAL,CHI,KCHI,FEM,SPEECH
100millisecond_ints,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
60,,,,,,1.0
61,,,,,,1.0
62,,,,1.0,,1.0
63,,,,1.0,,1.0
64,,,,1.0,,1.0
65,,,,1.0,,1.0
66,,,,1.0,,1.0
67,,,,1.0,,1.0
68,,,,1.0,,1.0
69,,,,1.0,,1.0


In [28]:
result.describe()

Unnamed: 0,base_col,MAL,CHI,KCHI,FEM,SPEECH
count,0.0,199.0,200.0,175.0,1690.0,2173.0
mean,,1.0,1.0,1.0,1.0,1.0
std,,0.0,0.0,0.0,0.0,0.0
min,,1.0,1.0,1.0,1.0,1.0
25%,,1.0,1.0,1.0,1.0,1.0
50%,,1.0,1.0,1.0,1.0,1.0
75%,,1.0,1.0,1.0,1.0,1.0
max,,1.0,1.0,1.0,1.0,1.0


In [29]:
result.shape

(2396, 6)

### Data Cleanup

In [30]:
# Drop base_col and SPEECH columns
result = result.drop(['base_col', 'SPEECH'], axis=1).fillna(0)

In [31]:
# Combine MAL/FEM and KCHI/CHI
result['ADULT'] = result.apply(lambda x: 0 if (x['MAL']==0 and x['FEM']==0) else 1, axis=1)
result['CHILD'] = result.apply(lambda x: 0 if (x['KCHI']==0 and x['CHI']==0) else 1, axis=1)

In [32]:
# Drop MAL/FEM and KCHI/CHI columns
result = result.drop(['MAL', 'CHI', 'KCHI', 'FEM'], axis=1)

In [33]:
# Identify overlaps and pauses.
result['OVERLAP'] = np.where(result[['ADULT', 'CHILD']].sum(axis = 1) > 1, 1, 0)
result['PAUSE'] = np.where(result[['ADULT', 'CHILD']].sum(axis = 1) == 0, 1, 0)

In [34]:
# Identify turns, where 1 indicates the start of a turn, 
# and -1 indicates the start of a pause.
result['TURN_A'] = result.ADULT.diff()
result['TURN_C'] = result.CHILD.diff()

In [23]:
result.tail(10)

Unnamed: 0_level_0,ADULT,CHILD,OVERLAP,PAUSE,TURN_A,TURN_C
100millisecond_ints,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2386,1,0,0,0,0.0,0.0
2387,1,0,0,0,0.0,0.0
2388,1,1,1,0,0.0,1.0
2389,1,1,1,0,0.0,0.0
2390,1,1,1,0,0.0,0.0
2391,1,1,1,0,0.0,0.0
2392,0,0,0,1,-1.0,-1.0
2393,0,0,0,1,0.0,0.0
2394,0,0,0,1,0.0,0.0
2395,0,0,0,1,0.0,0.0


In [35]:
result.to_csv('./TestData/eNY-7759.csv')

### Summary

In [36]:
# Calculate the total time (sec).
tp_time = round(len(result.index)*.1,1)
tp_time

239.6

### Talk Time

In [37]:
# Calculate total talktime (sec).
tp_tt = round(((result['ADULT'].sum() + result['CHILD'].sum())*.1),1)
print(tp_tt)

# Calculate teacher talktime (sec and %).
tp_ttt = result['ADULT'].sum()*.1
tp_ttt_pc = int(round((tp_ttt/tp_tt)*100, 0))
print(tp_ttt, tp_ttt_pc)

# Calculate student talktime (sec and %).
tp_stt = result['CHILD'].sum()*.1
tp_stt_pc = int(round((tp_stt/tp_tt)*100, 0))
print(tp_stt, tp_stt_pc)

print('During your lesson you spoke ' 
      + str(tp_ttt_pc) + '% of the time and the Students spoke ' 
      + str(tp_stt_pc) + '% of the time.')

# Calculate the Talk Time metric ("For every second of student talk time...".
tp_tt_metric = int(round(tp_ttt/tp_stt,0))
print('For every second of Student talk time, there were about ' 
      + str(tp_tt_metric) + ' seconds of Teacher talk time.')

215.2
180.3 84
34.9 16
During your lesson you spoke 84% of the time and the Students spoke 16% of the time.
For every second of Student talk time, there were about 5 seconds of Teacher talk time.


### Turns

In [38]:
#Calculate number of turns.
tp_turn_t = result['TURN_A'].value_counts()[1]
tp_turn_c = result['TURN_C'].value_counts()[1]
tp_turn = tp_turn_t + tp_turn_c
print(tp_turn, tp_turn_t, tp_turn_c)

93 52 41


### Overlaps

In [39]:
# Calculate overlap time (sec).
tp_ot = round(result['OVERLAP'].sum()*.1, 1)
tp_ot

19.7

### Switches

In [None]:
# Calculate switches.

### Pauses

In [40]:
# Calculate pause time (sec).
tp_pt = round(result['PAUSE'].sum()*.1, 1)
tp_pt

44.1

### Speaker Wheel

In [41]:
# Calculate Speaker Wheel input values (percent).
sw_adult = int(round((tp_ttt-tp_ot)/tp_time*100))
sw_overlap = int(round(tp_ot/tp_time*100))
sw_child = int(round((tp_stt-tp_ot)/tp_time*100))
sw_pause = int(round(tp_pt/tp_time*100))
sw_df = {"TEACHER": sw_adult, "OVERLAPS": sw_overlap, "STUDENT": sw_child, "PAUSES": sw_pause}
sw_df

{'TEACHER': 67, 'OVERLAPS': 8, 'STUDENT': 6, 'PAUSES': 18}