In [27]:
#import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.offline as offline
import random
from __future__ import absolute_import, division, print_function
import re, string

In [28]:
# read in files
dialogue = pd.read_csv('dialogue_typeDF.csv')
insults = pd.read_csv('insultsDF.csv')
response_pause = pd.read_csv('responsePause.csv')
sentence = pd.read_csv('sentenceDF.csv')
sentiment = pd.read_csv('sentimentDF.csv')
laughter = pd.read_csv('laughter.csv')
yelling = pd.read_csv('yelling.csv')

# Percent of Time Talked by Teacher vs. Student

In [29]:
sentence['time_talked'] = sentence['end_time'] - sentence['start_time']
pauses_df = response_pause[['start_time','end_time']]
pauses_df['response_duration'] = pauses_df['end_time'] - pauses_df['start_time']



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [30]:
teacher_df = sentence[sentence.speaker_label == 'spk_0']
students_df = sentence[sentence.speaker_label != 'spk_0']

In [31]:
total_talk_and_pause = sentence['time_talked'].sum() + pauses_df['response_duration'].sum() 

In [32]:
teacher_percent = round((teacher_df['time_talked'].sum()/total_talk_and_pause)*100, 2)
students_percent = round((students_df['time_talked'].sum()/total_talk_and_pause)*100, 2)
paused_percent = round((pauses_df['response_duration'].sum()/total_talk_and_pause)*100, 2)

In [33]:
trace_teacher = go.Bar(
    x = [0],
    y = [teacher_percent],
    name = 'Teacher',
    text = 'Teacher',
    marker = dict(color='rgb(0, 59, 174)'),
)

trace_students = go.Bar(
    x = [0],
    y = [students_percent],
    name = 'Students',
    text = 'Students',
    marker = dict(color='rgb(229, 83, 0)'),
)

trace_pauses = go.Bar(
    x = [0],
    y = [paused_percent],
    name = 'Paused',
    text = 'Paused'
)

layout_1 = go.Layout(
    barmode = 'stack',
    title = 'Percent of Time Talked by Teacher vs. Student vs. Time Spent Paused',
    xaxis = dict(
        range = [0,1],
        showline = False,
        showgrid = False,
        showticklabels = False
    ),
    yaxis = dict(
        range = [0,100],
        showgrid = False,
        showline = True,
        showticklabels = True,
        ),
    annotations=[
        dict(
            x = .6,
            y = teacher_percent-10,
            xref = 'x',
            yref = 'y',
            text = "The teacher spoke {0}% of the time.".format(teacher_percent),
            showarrow = False,
        ),
        dict(
            x = .6,
            y = teacher_percent+10,
            xref = 'x',
            yref = 'y',
            text = "The students spoke {0}% of the time.".format(students_percent),
            showarrow = False,
        ),
        dict(
            x = .6,
            y = 100-paused_percent/2,
            xref = 'x',
            yref = 'y',
            text = "The lesson was paused {0}% of the time.".format(paused_percent),
            showarrow = False,           
        ),
    ]
)

data_1 = [trace_teacher, trace_students, trace_pauses] 

fig = go.Figure(data = data_1, layout = layout_1)
offline.init_notebook_mode(connected=True)
offline.iplot(fig)

In [34]:
teacher_start_end = list(teacher_df['start_time']) + list(teacher_df['end_time'])
students_start_end = list(students_df['start_time']) + list(students_df['end_time'])

In [35]:
teacher_x_data = []
for i in range(len(teacher_df['start_time'])):
    start = list(teacher_df['start_time'])[i]/60
    end = list(teacher_df['end_time'])[i]/60
    start_end = start, end
    teacher_x_data.append(start_end)

In [36]:
students_x_data = []
for i in range(len(students_df['start_time'])):
    start = list(students_df['start_time'])[i]/60
    end = list(students_df['end_time'])[i]/60
    start_end = start, end
    students_x_data.append(start_end)

In [37]:
pauses_x_data = []
for i in range(len(pauses_df['start_time'])):
    start = list(pauses_df['start_time'])[i]/60
    end = list(pauses_df['end_time'])[i]/60
    start_end = start, end
    pauses_x_data.append(start_end)

In [38]:
# iterate over the start and end time data to create a plot that links the start and end time for each
# instance in which someone speaks. For example, spk_1 from 7.141 - 7.521 are connected by a line and 
# the next time spk_1 speaks is from 19.341 - 21.361, and these points are connected by a line, but
# there is a gap in between these two speaking instances.


# create any empty list to append with graph data
traces = []

# spk_0 data
# iterate over the list of start and end times to create separate traces for every speaking instance
for i in range(len(teacher_x_data)):
# use only the first instance to create a legend, or there will be a legend entry for every speaking instance
    if i == 0:
        traces.append(go.Scatter(
            x = teacher_x_data[i],
            y = [1/3,1/3],
            mode = 'lines + markers',
            line = dict(color = 'rgb(0, 59, 174)'),
            marker = dict(color = 'rgb(0, 59, 174)'),
            name = 'Teacher'
        ))
# hide the legend for the rest of the speaking instances so that there is only one legend entry for each speaker,
# rather than each speaking instance
    else:
        traces.append(go.Scatter(
            x = teacher_x_data[i],
            y = [1/3,1/3],
            mode = 'lines + markers',
            line = dict(color = 'rgb(0, 59, 174)'),
            marker = dict(color = 'rgb(0, 59, 174)'),
            name = 'Teacher',
            showlegend = False
        ))

# spk_1 data
for i in range(len(students_x_data)):
# use only the first instance to create a legend, or there will be a legend entry for every speaking instance
    if i == 0:
        traces.append(go.Scatter(
            x = students_x_data[i],
            y = [2/3,2/3],
            mode = 'lines + markers',
            line = dict(color='rgb(229, 83, 0)'),
            marker = dict(color='rgb(229, 83, 0)'),
            name='Students'
        ))
# hide the legend for the rest of the speaking instances so that there is only one legend entry for each speaker,
# rather than each speaking instance
    else:
        traces.append(go.Scatter(
            x = students_x_data[i],
            y = [2/3,2/3],
            mode = 'lines + markers',
            line = dict(color = 'rgb(229, 83, 0)'),
            marker = dict(color = 'rgb(229, 83, 0)'),
            name = 'Students',
            showlegend = False
        ))

# pause data
for i in range(len(pauses_x_data)):
# use only the first pause to create a legend, or there will be a legend entry for every pause
    if i == 0:
        traces.append(go.Scatter(
            x = pauses_x_data[i],
            y = [0.5,0.5],
            mode = 'lines + markers',
            line = dict(color='rgb(282, 0, 0)'),
            marker = dict(color='rgb(282, 0, 0)', symbol = 'triangle-down'),
            name='pause',
        ))
# hide the legend for the rest of the pauses so that there is only one legend entry for all pauses,
# rather than each pause
    else:
        traces.append(go.Scatter(
            x = pauses_x_data[i],
            y = [0.5,0.5],
            mode = 'lines + markers',
            line = dict(color = 'rgb(282, 0, 0)'),
            marker = dict(color = 'rgb(282, 0, 0)', symbol = 'triangle-down'),
            name = 'pause',
            showlegend = False
        ))    
        
data = traces
layout = go.Layout(
    title = 'Teacher vs. Student',
    xaxis = dict(
        zeroline = True,
        showline = True,
        title = 'Time (minutes)'
        ),
    yaxis = dict(
        range = [0,1],
        showgrid = False,
        zeroline = False,
        showline = False,
        ticks = '',
        showticklabels = False
        ),
)

fig = go.Figure(data = data, layout = layout)
offline.init_notebook_mode(connected=True)
offline.iplot(fig)

# Number of Questions

In [39]:
num_q = pd.merge(dialogue, sentence, on = 'Unnamed: 0', how = 'outer', suffixes = ('_dialogue', '_sentence'))

In [40]:
num_q = num_q[['sentence_type','start_time','end_time']]

In [41]:
whQuestion = num_q[num_q.sentence_type == 'whQuestion']
ynQuestion = num_q[num_q.sentence_type == 'ynQuestion']
num_q_only_qs = whQuestion.append(ynQuestion)

In [42]:
num_q_pt = pd.pivot_table(num_q, index = 'sentence_type', values='start_time', aggfunc='count')

In [43]:
total_qs = num_q_pt['whQuestion'] + num_q_pt['ynQuestion']

In [44]:
# questions data
trace_questions = go.Scatter(
    x = list(num_q_only_qs['start_time']/60),
    y = [0.5 for i in list(num_q['start_time'])],
    mode = 'markers',
    name = 'Questions'
)

data = [trace_questions]
layout=go.Layout(
    title = 'Questions',
    xaxis = dict(
        zeroline = True,
        showline = True,
        title = 'Time (minutes)'
        ),
    yaxis = dict(
        range = [0,1],
        showgrid = False,
        zeroline = False,
        showline = False,
        ticks = '',
        showticklabels = False
        ),
    annotations = [
        dict(
            x = float(list(num_q['start_time']/60)[-1])/2,
            y = 0.75,
            xref = 'x',
            yref = 'y',
            text = "There were {} questions asked during the lesson.".format(total_qs),
            showarrow = False
        ),
    ]
)

fig = go.Figure(data = data, layout = layout)
offline.init_notebook_mode(connected = True)
offline.iplot(fig)

# Number of Occurrences

In [45]:
# insulting language above 0.5

insult_lang_df = pd.merge(insults, sentence, how = 'outer', on = 'Unnamed: 0', suffixes = ('_insults','_sentence'))
insult_lang_df = insult_lang_df[insult_lang_df.insult_rating > 0.5]
insult_lang_df = insult_lang_df[['insult_rating','start_time','end_time']]

In [46]:
laugh_df = laughter[['start_time']]

In [47]:
yell_df = yelling[['start_time','yelling']]
yell_df = yell_df[yell_df.yelling == True]

In [48]:
# respectful Language Data
trace_insult_lang = go.Scatter(
    x = list(insult_lang_df['start_time']/60),
    y = [0.25 for i in list(insult_lang_df['start_time'])],
    mode = 'markers',
#     marker = dict(color='rgb(0, 59, 174)'
#     ),
    name='Insulting Language'
)

# laughing Data
trace_laugh = go.Scatter(
    x = list(laugh_df['start_time']/60),
    y = [0.5 for i in list(laugh_df['start_time'])],
    mode = 'markers',
#     marker = dict(color='rgb(0, 59, 174)'
#     ),
    name='Laughing'
)

# yelling Data
trace_yell = go.Scatter(
    x = list(yell_df['start_time']/60),
    y = [0.75 for i in list(yell_df['start_time'])],
    mode = 'markers',
#     marker = dict(color='rgb(0, 59, 174)'
#     ),
    name='Yelling'
)

data = [trace_insult_lang, trace_laugh, trace_yell]
layout=go.Layout(
    title = 'Timeline Of Occurrences',
    xaxis = dict(
        zeroline = True,
        showline = True,
        title = 'Time (minutes)'
        ),
    yaxis = dict(
        range=[0,1],
        showgrid=False,
        zeroline=False,
        showline=False,
        ticks='',
        showticklabels=False
        ),
)

fig = go.Figure(data=data, layout=layout)
offline.init_notebook_mode(connected=True)
offline.iplot(fig)