In [None]:
import pandas as pd
import re
import json
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx

In [None]:
# Define the path to the transcription file
file_path = 'D:\\Programmers Dev Course_Data Analyst Track\\cowork_analysis\\transcription.txt'

In [None]:
# Read the transcription file
with open(file_path, 'r', encoding='utf-8') as file:
    data = file.read()

In [None]:
# Extract speaker turns
speaker_pattern = re.compile(r'Speaker (SPEAKER_\d+):')
turns = speaker_pattern.split(data)[1:]  # Split and remove the first empty element
speakers = turns[0::2]
texts = turns[1::2]

In [None]:
# Create a DataFrame
df = pd.DataFrame({
    'Speaker': speakers,
    'Text': texts
})

In [None]:
# Interaction Matrix
interaction_matrix = defaultdict(lambda: defaultdict(int))
for i in range(1, len(df)):
    prev_speaker = df.iloc[i-1]['Speaker']
    curr_speaker = df.iloc[i]['Speaker']
    interaction_matrix[prev_speaker][curr_speaker] += 1

interaction_df = pd.DataFrame(interaction_matrix).fillna(0)

In [None]:
# Convert interaction matrix to JSON
interaction_json = interaction_df.to_dict(orient='index')

In [None]:
# Frequency Analysis
frequency = df['Speaker'].value_counts().reset_index()
frequency.columns = ['Speaker', 'Frequency']

In [None]:
# Convert frequency analysis to JSON
frequency_json = frequency.to_dict(orient='records')

In [None]:
# Create Interaction Network Graph
G = nx.DiGraph()
for i in range(1, len(df)):
    prev_speaker = df.iloc[i-1]['Speaker']
    curr_speaker = df.iloc[i]['Speaker']
    if G.has_edge(prev_speaker, curr_speaker):
        G[prev_speaker][curr_speaker]['weight'] += 1
    else:
        G.add_edge(prev_speaker, curr_speaker, weight=1)

In [None]:
# Visualization (Optional)
# Plotting Interaction Heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(interaction_df, annot=True, cmap='Blues')
plt.title('Speaker Interaction Heatmap')
plt.xlabel('Speaker')
plt.ylabel('Follows Speaker')
plt.show()

In [None]:
# Plotting Frequency
plt.figure(figsize=(10, 6))
sns.barplot(data=frequency, x='Speaker', y='Frequency')
plt.title('Speaker Frequency')
plt.xlabel('Speaker')
plt.ylabel('Number of Turns')
plt.show()

In [None]:
# Create Interaction Network Graph
pos = nx.spring_layout(G)
plt.figure(figsize=(12, 10))
nx.draw(G, pos, with_labels=True, node_size=3000, node_color="skyblue", font_size=20, font_color="black", font_weight="bold", width=2, edge_color="gray")
edge_labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_color='red')
plt.title('Speaker Interaction Network')
plt.show()

In [None]:
# Output JSON
output = {
    'interaction_matrix': interaction_json,
    'frequency_analysis': frequency_json
}

# Save JSON to file
output_file_path = 'D:\\Programmers Dev Course_Data Analyst Track\\cowork_analysis\\analysis_output.json'
with open(output_file_path, 'w', encoding='utf-8') as output_file:
    json.dump(output, output_file, ensure_ascii=False, indent=4)

# Display JSON output
output