In [324]:
import os
import unittest
from speaker_id.transcribe_result import TranscribeResult
import bokeh
from bokeh.io import output_notebook
import pandas as pd

from bokeh.models import ColumnDataSource, HoverTool, ranges
from bokeh.plotting import figure, show
from bokeh.transform import factor_cmap
output_notebook()


# Summary

This notebooks show a visualization of the results from amazon transcribe service. The initial idea was to build a dashboard that analyzes recorded meeting to see how long each speaker talks.  

https://aws.amazon.com/transcribe/
The audio in spoken text was taken from an open source audio service. Transcribe could correctly transcribe the file and identify the different speakers. 

The documentation stated that it is possible to stream the audio data via http2 but the documentation for the python library does not show how it works. 

Compared to aws the documentation of the corresponding google services is much more detailed and has examples in many different languages (Ruby, Java, Python, Javascript ...)
Google supports 120 hours whereas AWS offers only Spanish and English. 

# Load results of AWS Transcribe to python

In [325]:
result = TranscribeResult()
result.file_path = os.path.join('/Users/renzo/workspace/speaker_id/speaker_id/tests/../transcribe_results/test_2.json')
result.load_result()

# Create a Pandas dataframe for further Processing

In [326]:
df = pd.DataFrame(data=result.raw_dict['results']['speaker_labels']['segments'])
df[["end_time", "start_time"]] = df[["end_time", "start_time"]].apply(pd.to_numeric)
df["duration"] = df['end_time'] - df['start_time']
df[0:5]

Unnamed: 0,end_time,items,speaker_label,start_time,duration
0,3.15,"[{'start_time': '0.54', 'speaker_label': 'spk_...",spk_1,0.54,2.61
1,15.85,"[{'start_time': '4.3', 'speaker_label': 'spk_1...",spk_1,4.3,11.55
2,20.05,"[{'start_time': '17.44', 'speaker_label': 'spk...",spk_1,17.44,2.61
3,26.95,"[{'start_time': '21.34', 'speaker_label': 'spk...",spk_1,21.34,5.61
4,29.05,"[{'start_time': '28.31', 'speaker_label': 'spk...",spk_1,28.31,0.74


# Plot a speakers timeline



In [327]:

source = ColumnDataSource(df)

categories  = ['spk_0', 'spk_1', 'spk_2', 'spk_3', 'spk_4', 'spk_5']

p = figure(y_range=categories, plot_width=800, plot_height=300, title="Speaker Graph",)
p.hbar(y= df['speaker_label'].values, left=df['start_time'].values, right=df['end_time'].values, height=0.4)
show(p)

# List the Speakers total Speech time

This can be achieved by a simple group by statement

In [328]:
sums = df.groupby('speaker_label').sum()
sums

Unnamed: 0_level_0,end_time,start_time,duration
speaker_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
spk_0,4328.121,4233.205,94.916
spk_1,2575.202,2506.372,68.83
spk_2,29818.231,29330.758,487.473
spk_3,1321.726,1287.966,33.76
spk_4,21001.536,20845.856,155.68
spk_5,7840.746,7710.864,129.882


In [329]:
p = figure(y_range=categories, plot_width=800, plot_height=300, title="Speaker Total time Graph",)
p.hbar(y=sums['duration'].index, right=sums['duration'].values, height=0.5, left=0,
        color="navy")

show(p)