# Import Necessary Libraries

In [36]:
import pandas as pd
import glob

# Load Collaboration and Survey Datasets

In [37]:
dataset_collaboration = pd.read_csv('data/dataset_collaboration.csv')
dataset_overall = pd.read_csv('data/dataset_survey_overall.csv')

# Process Overall Survey Data

In [38]:
dataset_overall_melted = pd.melt(dataset_overall, id_vars=['meeting_number'], var_name='speaker', value_name='collaboration_score')
dataset_overall_melted['speaker_number'] = dataset_overall_melted['speaker'].str.extract('(\d+)').astype(int)
dataset_overall_melted.drop('speaker', axis=1, inplace=True)

# Merge Collaboration Data with Overall Survey Scores

In [39]:
merged_table = pd.merge(dataset_collaboration, dataset_overall_melted, how='left', on=['meeting_number', 'speaker_number'])
merged_table['overall_collaboration_score'] = merged_table.apply(
    lambda row: -1 if row['project'] == 3 else row['collaboration_score'], axis=1
)
merged_table.drop(columns=['collaboration_score'], inplace=True)

# Initialize Individual Collaboration Score for Project 3

In [40]:
merged_table['individual_collaboration_score'] = -1

# List All Individual Survey Files

In [41]:
individual_files = sorted(glob.glob('data/dataset_survey_individual_*'))

# Process Each Individual Survey File and Update Collaboration Data

In [42]:
for file in individual_files:
    dataset_individual = pd.read_csv(file)

    # Melt dataset_individual
    dataset_individual_melted = pd.melt(dataset_individual, id_vars=['scorer', 'meeting_number'], var_name='speaker', value_name='score')
    dataset_individual_melted['next_speaker_id'] = dataset_individual_melted['speaker'].str.extract('(\d+)').astype(int)
    dataset_individual_melted.drop('speaker', axis=1, inplace=True)

    # Merge merged_table with dataset_individual_melted to get individual_collaboration_score
    temp_merged_table = pd.merge(
        merged_table,
        dataset_individual_melted,
        how='left',
        left_on=['meeting_number', 'speaker_id', 'next_speaker_id', 'speaker_number'],
        right_on=['meeting_number', 'scorer', 'next_speaker_id', 'scorer']
    )

    # Update individual_collaboration_score only for project 4 and where the score is not null
    temp_merged_table['individual_collaboration_score'] = temp_merged_table.apply(
        lambda row: row['score'] if row['project'] == 4 and pd.notnull(row['score']) else row['individual_collaboration_score'],
        axis=1
    )
    temp_merged_table.drop(columns=['score', 'scorer'], inplace=True)

    # Assign back to the main merged_table
    merged_table = temp_merged_table

# Reorder Columns and Save the Final Result

In [43]:
final_columns = [
    'id', 'project', 'meeting_number', 'speaker_number', 'speech_frequency', 'total_words', 'duration', 'normalized_speech_frequency',
    'speaker_id', 'next_speaker_id', 'count', 'network_density', 'weighted_network_density', 'gini_coefficient',
    'interaction_equality_index', 'degree_centrality', 'indegree_centrality', 'outdegree_centrality', 'betweenness_centrality',
    'closeness_centrality', 'eigenvector_centrality', 'pagerank', 'overall_collaboration_score', 'individual_collaboration_score'
]

merged_table = merged_table[final_columns]
merged_table.to_csv('data/dataset_collaboration_with_survey_scores.csv', index=False)

# Display the Result

In [44]:
merged_table.head()

Unnamed: 0,id,project,meeting_number,speaker_number,speech_frequency,total_words,duration,normalized_speech_frequency,speaker_id,next_speaker_id,...,interaction_equality_index,degree_centrality,indegree_centrality,outdegree_centrality,betweenness_centrality,closeness_centrality,eigenvector_centrality,pagerank,overall_collaboration_score,individual_collaboration_score
0,3_0_SPEAKER_00,3,1,0,22,8731,98,0.22449,0,0,...,0.279554,2,1,1,0.0,0.027778,0.013484,0.04398,-1.0,-1.0
1,3_0_SPEAKER_00,3,1,0,22,8731,98,0.22449,0,1,...,0.279554,2,1,1,0.0,0.027778,0.013484,0.04398,-1.0,-1.0
2,3_0_SPEAKER_00,3,1,0,22,8731,98,0.22449,0,2,...,0.279554,2,1,1,0.0,0.027778,0.013484,0.04398,-1.0,-1.0
3,3_0_SPEAKER_00,3,1,0,22,8731,98,0.22449,0,3,...,0.279554,2,1,1,0.0,0.027778,0.013484,0.04398,-1.0,-1.0
4,3_0_SPEAKER_01,3,1,1,645,8731,98,6.581633,1,0,...,0.279554,87,43,44,0.666667,0.028302,0.632985,0.335446,-1.0,-1.0
