### Importing libraries

In [73]:
import json
import pandas as pd
import numpy as np

### Loading and merging data from two keys of json file.

In [74]:
with open('speechToText.json') as json_file:
    data=json.load(json_file)

In [75]:
df1 = pd.DataFrame.from_records(data['speaker_labels'])
df1.head()

Unnamed: 0,from,to,speaker,confidence,final
0,0.63,0.75,0,0.58,False
1,0.75,1.08,0,0.58,False
2,1.08,1.19,0,0.58,False
3,1.19,1.44,0,0.58,False
4,1.44,1.68,0,0.58,False


In [76]:
stamps = pd.DataFrame.from_records(
    [a for r in data['results'] 
       for a in r['alternatives']])
z=stamps['timestamps'].loc[0]

In [77]:
df2=pd.DataFrame(z,columns=['word','from','to'])
df2.head()

Unnamed: 0,word,from,to
0,so,0.63,0.75
1,thank,0.75,1.08
2,you,1.08,1.19
3,very,1.19,1.44
4,much,1.44,1.68


In [78]:
df=df1.merge(df2)
df.head()

Unnamed: 0,from,to,speaker,confidence,final,word
0,0.63,0.75,0,0.58,False,so
1,0.75,1.08,0,0.58,False,thank
2,1.08,1.19,0,0.58,False,you
3,1.19,1.44,0,0.58,False,very
4,1.44,1.68,0,0.58,False,much


Now, we have to sort the data based on from and to timestamps, turns out it was already sorted.
Then, we have to cumulate all the words in sequence said by one user - which will be found from 'speaker' column as it will have continous values along the rows (either 0 or 1),stating which speaker spoke all the words. 
We just have to join such continous speaker label's respective words to form sentences.

In [79]:
df= df.sort_values(['from','to'])
df.head()

Unnamed: 0,from,to,speaker,confidence,final,word
0,0.63,0.75,0,0.58,False,so
1,0.75,1.08,0,0.58,False,thank
2,1.08,1.19,0,0.58,False,you
3,1.19,1.44,0,0.58,False,very
4,1.44,1.68,0,0.58,False,much


In [80]:
df['curr_speaker']=(df.speaker.shift()!=df.speaker).cumsum()
df.curr_speaker.value_counts()

6     48
10    32
1     14
9     12
4     12
2     10
8      6
3      5
7      1
5      1
Name: curr_speaker, dtype: int64

In [81]:
df.head()

Unnamed: 0,from,to,speaker,confidence,final,word,curr_speaker
0,0.63,0.75,0,0.58,False,so,1
1,0.75,1.08,0,0.58,False,thank,1
2,1.08,1.19,0,0.58,False,you,1
3,1.19,1.44,0,0.58,False,very,1
4,1.44,1.68,0,0.58,False,much,1


In [82]:
transcripts = df.groupby('curr_speaker').agg({
   'word': lambda x: ' '.join(x),
   'speaker': min
}).rename(columns={'word':'sentence'})
transcripts.reset_index(drop='True',inplace=True)


### Required Transcript Segregation

In [83]:
size=transcripts.shape[0]
for i in range(size):
    print("Speaker ",transcripts.speaker.loc[i], ": " ,transcripts.sentence.loc[i])
        

Speaker  0 :  so thank you very much for coming David it's good to have you here
Speaker  1 :  good as my pleasure Michael glad to be with you
Speaker  0 :  how real is artificial intelligence
Speaker  1 :  the question of how real is artificial intelligence is a complex one
Speaker  0 :  on
Speaker  1 :  I would say %HESITATION if if we define artificial intelligence is the ability of a machine on its own to understand large volumes of data to reason that data with a purpose to it to predict the future and then tell you continue to learn and get better
Speaker  0 :  that
Speaker  1 :  is happening today in certain fields
Speaker  0 :  how far in the continuum is IBM Watson in operability artificial intelligence
Speaker  1 :  yes so so first of all once once it's actually intelligent it will no longer be artificial so we're moving to the point that these systems increasingly understand enormous volumes of data


In [84]:
transcripts=transcripts.reindex(columns=['speaker','sentence'])
transcripts

Unnamed: 0,speaker,sentence
0,0,so thank you very much for coming David it's g...
1,1,good as my pleasure Michael glad to be with you
2,0,how real is artificial intelligence
3,1,the question of how real is artificial intelli...
4,0,on
5,1,I would say %HESITATION if if we define artifi...
6,0,that
7,1,is happening today in certain fields
8,0,how far in the continuum is IBM Watson in oper...
9,1,yes so so first of all once once it's actually...
