In [1]:
import pandas as pd
import re
import os
from collections import defaultdict
import networkx as nx
from networkx.algorithms.centrality import betweenness_centrality, degree_centrality, eigenvector_centrality, closeness_centrality
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math

df = pd.read_csv('data/dataset_collaboration.csv')

In [2]:
df.head()

Unnamed: 0,id,project,meeting_number,speaker_number,speech_frequency,total_words,duration,normalized_speech_frequency,speaker_id,next_speaker_id,...,weighted_network_density,gini_coefficient,interaction_equality_index,degree_centrality,indegree_centrality,outdegree_centrality,betweenness_centrality,closeness_centrality,eigenvector_centrality,pagerank
0,3_0_SPEAKER_00,3,1,0,22,8731,98,0.22449,0,0,...,10.666667,0.402344,0.279554,2,1,1,0.0,0.027778,0.013484,0.04398
1,3_0_SPEAKER_00,3,1,0,22,8731,98,0.22449,0,1,...,10.666667,0.402344,0.279554,2,1,1,0.0,0.027778,0.013484,0.04398
2,3_0_SPEAKER_00,3,1,0,22,8731,98,0.22449,0,2,...,10.666667,0.402344,0.279554,2,1,1,0.0,0.027778,0.013484,0.04398
3,3_0_SPEAKER_00,3,1,0,22,8731,98,0.22449,0,3,...,10.666667,0.402344,0.279554,2,1,1,0.0,0.027778,0.013484,0.04398
4,3_0_SPEAKER_01,3,1,1,645,8731,98,6.581633,1,0,...,10.666667,0.402344,0.279554,87,43,44,0.666667,0.028302,0.632985,0.335446


In [3]:
overall = pd.read_csv('data/overall_score.csv')
personal = pd.read_csv('data/personal_score.csv')

In [4]:
overall.head()

Unnamed: 0,speaker_id,1,2,3,4,5,6,7,8,9,10,11
0,0,3,3,2,4,5,5,4,6,8,8,8
1,1,2,3,3,5,6,6,5,7,8,8,8
2,2,3,3,3,2,5,3,5,7,6,7,9
3,3,3,4,2,3,5,2,3,6,7,7,9
4,4,2,2,2,3,5,4,4,6,7,8,9


In [5]:
personal.head(10)

Unnamed: 0,speaker_id,next_speaker_id,1,2,3,4,5,6,7,8,9,10,11
0,0,0,4,3,3,4,5,3,3,5,7,6,7
1,0,1,3,4,4,3,5,4,5,6,7,6,6
2,0,2,3,3,4,3,5,4,5,6,6,5,7
3,0,3,6,6,7,7,7,7,8,8,9,8,8
4,0,4,3,4,4,4,5,5,3,6,7,6,7
5,1,0,3,4,3,4,5,5,5,6,7,7,8
6,1,1,2,4,3,3,5,5,5,6,7,6,7
7,1,2,2,3,3,3,4,5,6,6,6,6,7
8,1,3,6,6,6,7,7,8,8,8,8,7,8
9,1,4,2,3,4,4,5,6,4,6,7,7,8


In [6]:
df[(df['project'] == 4) & (df['meeting_number'] == 1) & (df['speaker_number'] == 0)]

Unnamed: 0,id,project,meeting_number,speaker_number,speech_frequency,total_words,duration,normalized_speech_frequency,speaker_id,next_speaker_id,...,weighted_network_density,gini_coefficient,interaction_equality_index,degree_centrality,indegree_centrality,outdegree_centrality,betweenness_centrality,closeness_centrality,eigenvector_centrality,pagerank
80,4_0_SPEAKER_00,4,1,0,1294,12023,119,10.87395,0,0,...,11.65,0.341631,0.330335,116,58,58,0.25,0.125,0.573573,0.23775
81,4_0_SPEAKER_00,4,1,0,1294,12023,119,10.87395,0,1,...,11.65,0.341631,0.330335,116,58,58,0.25,0.125,0.573573,0.23775
82,4_0_SPEAKER_00,4,1,0,1294,12023,119,10.87395,0,2,...,11.65,0.341631,0.330335,116,58,58,0.25,0.125,0.573573,0.23775
83,4_0_SPEAKER_00,4,1,0,1294,12023,119,10.87395,0,3,...,11.65,0.341631,0.330335,116,58,58,0.25,0.125,0.573573,0.23775
84,4_0_SPEAKER_00,4,1,0,1294,12023,119,10.87395,0,4,...,11.65,0.341631,0.330335,116,58,58,0.25,0.125,0.573573,0.23775


Overall Score

In [9]:
def add_overall_scores(df, overall):

  def get_overall_score(row):
    if row['project'] == 4:
      meetingnum = str(row['meeting_number'])
      speakernum = row['speaker_number']

      if meetingnum in overall.columns:
        match_speaker = overall[overall['speaker_id'] == speakernum]

        if not match_speaker.empty:
          return match_speaker[meetingnum].values[0]
    return None

  df['overall_score'] = df.apply(get_overall_score, axis=1)
  return df

df_add_overall = add_overall_scores(df, overall)

df_add_overall[df_add_overall['project'] == 4].head(20)

Unnamed: 0,id,project,meeting_number,speaker_number,speech_frequency,total_words,duration,normalized_speech_frequency,speaker_id,next_speaker_id,...,gini_coefficient,interaction_equality_index,degree_centrality,indegree_centrality,outdegree_centrality,betweenness_centrality,closeness_centrality,eigenvector_centrality,pagerank,overall_score
80,4_0_SPEAKER_00,4,1,0,1294,12023,119,10.87395,0,0,...,0.341631,0.330335,116,58,58,0.25,0.125,0.573573,0.23775,3.0
81,4_0_SPEAKER_00,4,1,0,1294,12023,119,10.87395,0,1,...,0.341631,0.330335,116,58,58,0.25,0.125,0.573573,0.23775,3.0
82,4_0_SPEAKER_00,4,1,0,1294,12023,119,10.87395,0,2,...,0.341631,0.330335,116,58,58,0.25,0.125,0.573573,0.23775,3.0
83,4_0_SPEAKER_00,4,1,0,1294,12023,119,10.87395,0,3,...,0.341631,0.330335,116,58,58,0.25,0.125,0.573573,0.23775,3.0
84,4_0_SPEAKER_00,4,1,0,1294,12023,119,10.87395,0,4,...,0.341631,0.330335,116,58,58,0.25,0.125,0.573573,0.23775,3.0
85,4_0_SPEAKER_01,4,1,1,711,12023,119,5.97479,1,0,...,0.341631,0.330335,42,21,21,0.083333,0.153846,0.214472,0.104646,2.0
86,4_0_SPEAKER_01,4,1,1,711,12023,119,5.97479,1,1,...,0.341631,0.330335,42,21,21,0.083333,0.153846,0.214472,0.104646,2.0
87,4_0_SPEAKER_01,4,1,1,711,12023,119,5.97479,1,2,...,0.341631,0.330335,42,21,21,0.083333,0.153846,0.214472,0.104646,2.0
88,4_0_SPEAKER_01,4,1,1,711,12023,119,5.97479,1,3,...,0.341631,0.330335,42,21,21,0.083333,0.153846,0.214472,0.104646,2.0
89,4_0_SPEAKER_01,4,1,1,711,12023,119,5.97479,1,4,...,0.341631,0.330335,42,21,21,0.083333,0.153846,0.214472,0.104646,2.0


Personal Score

In [10]:
def add_personal_scores(df, personal):

  def get_personal_score(row):
    if row['project'] == 4:
      meetingnum = str(row['meeting_number'])
      speakernum = row['speaker_number']
      nextspeaker = row['next_speaker_id']

      if meetingnum in personal.columns:
        match_speaker = personal[(personal['speaker_id'] == speakernum) & (personal['next_speaker_id'] == nextspeaker)]

        if not match_speaker.empty:
          return match_speaker[meetingnum].values[0]
    return None

  df['personal_score'] = df.apply(get_personal_score, axis=1)
  return df

df_add_personal = add_personal_scores(df, personal)

df_add_personal[df_add_personal['project'] == 4].head(20)

Unnamed: 0,id,project,meeting_number,speaker_number,speech_frequency,total_words,duration,normalized_speech_frequency,speaker_id,next_speaker_id,...,interaction_equality_index,degree_centrality,indegree_centrality,outdegree_centrality,betweenness_centrality,closeness_centrality,eigenvector_centrality,pagerank,overall_score,personal_score
80,4_0_SPEAKER_00,4,1,0,1294,12023,119,10.87395,0,0,...,0.330335,116,58,58,0.25,0.125,0.573573,0.23775,3.0,4.0
81,4_0_SPEAKER_00,4,1,0,1294,12023,119,10.87395,0,1,...,0.330335,116,58,58,0.25,0.125,0.573573,0.23775,3.0,3.0
82,4_0_SPEAKER_00,4,1,0,1294,12023,119,10.87395,0,2,...,0.330335,116,58,58,0.25,0.125,0.573573,0.23775,3.0,3.0
83,4_0_SPEAKER_00,4,1,0,1294,12023,119,10.87395,0,3,...,0.330335,116,58,58,0.25,0.125,0.573573,0.23775,3.0,6.0
84,4_0_SPEAKER_00,4,1,0,1294,12023,119,10.87395,0,4,...,0.330335,116,58,58,0.25,0.125,0.573573,0.23775,3.0,3.0
85,4_0_SPEAKER_01,4,1,1,711,12023,119,5.97479,1,0,...,0.330335,42,21,21,0.083333,0.153846,0.214472,0.104646,2.0,3.0
86,4_0_SPEAKER_01,4,1,1,711,12023,119,5.97479,1,1,...,0.330335,42,21,21,0.083333,0.153846,0.214472,0.104646,2.0,2.0
87,4_0_SPEAKER_01,4,1,1,711,12023,119,5.97479,1,2,...,0.330335,42,21,21,0.083333,0.153846,0.214472,0.104646,2.0,2.0
88,4_0_SPEAKER_01,4,1,1,711,12023,119,5.97479,1,3,...,0.330335,42,21,21,0.083333,0.153846,0.214472,0.104646,2.0,6.0
89,4_0_SPEAKER_01,4,1,1,711,12023,119,5.97479,1,4,...,0.330335,42,21,21,0.083333,0.153846,0.214472,0.104646,2.0,2.0
