In [1]:
from pyvis.network import Network

import plotly.express as px
import pandas as pd

In [2]:
# Load the labels
def load_labels(filename: str) -> dict:
    
    dict_storage = {}
    
    with open(filename) as file:
        for index, line in enumerate(file):
            dict_storage[index] = line.strip().split(' ')
    
    return dict_storage

dialog_act_dict = load_labels('dailydialog/dialogues_act.txt')
emotion_dict = load_labels('dailydialog/dialogues_emotion.txt')

In [3]:
# Validation

for index, act_labels in dialog_act_dict.items():
    
    emotion_labels = emotion_dict[index]
    
    if len(emotion_labels) != len(act_labels):
        print(index)

In [4]:
# Mappers

act_mapper = {
    '1': 'inform', '2': 'question', 
    '3': 'directive', '4': 'commissive'
}

emotion_mapper = {
    '0': 'neutral', '1': 'anger', '2': 'disgust', 
    '3': 'fear', '4': 'happiness', '5': 'sadness', 
    '6': 'surprise'
}

def translate_labels(act_labels: list) -> list:
    return [act_mapper[act] for act in act_labels]

def translate_emotion_labels(emotion_labels: list) -> list:
    return [emotion_mapper[emotion] for emotion in emotion_labels]

# DailyDialog Dataset Analysis

This analysis is divided into the following parts:

- Dialog Act and Emotion labels
- Utterance-related
- Topic-related

# Dialog Act and Emotion labels

The following sections looks at the visualizations, in relation to dialog act and emotion labels

The visualizations are:


In [12]:
# Knowledge interaction graph for dialog act
knowledge_graph_dict = {
    'START': {},
    'inform': {},
    'question': {},
    'directive': {},
    'commissive': {},
    'END': {}
}

def perform_mapping(act_labels: list, knowledge_graph: dict) -> dict:
    
    for index, label in enumerate(act_labels):
        
        # First label is always 'START'
        if index == 0:
            knowledge_graph['START'][label] = \
                knowledge_graph['START'].get(label, 0) + 1
        
        # Last label is always 'END' and break
        if index == len(act_labels) - 1:
            knowledge_graph[label]['END'] = \
                knowledge_graph[label].get('END', 0) + 1
            break
        
        # Fetch the next label
        # Find the connection
        next_label = act_labels[index + 1]
        knowledge_graph[label][next_label] = \
            knowledge_graph[label].get(next_label, 0) + 1
    
    return knowledge_graph

for index, act_labels in dialog_act_dict.items():

    # Translate labels
    act_labels = translate_labels(act_labels)
    knowledge_graph_dict = perform_mapping(act_labels, knowledge_graph_dict)

print(knowledge_graph_dict)

{'START': {'directive': 3113, 'question': 6469, 'inform': 3531, 'commissive': 5}, 'inform': {'inform': 17843, 'END': 8308, 'directive': 5598, 'question': 14238, 'commissive': 545}, 'question': {'inform': 21817, 'question': 3277, 'directive': 3894, 'END': 405, 'commissive': 35}, 'directive': {'commissive': 9015, 'inform': 983, 'question': 3765, 'END': 1424, 'directive': 2108}, 'commissive': {'END': 2981, 'directive': 2582, 'inform': 2358, 'question': 1679, 'commissive': 125}, 'END': {}}


In [41]:
net = Network(
    notebook=True, 
    directed=True,
    cdn_resources="remote",
    height="750"
    width='100%'
)

colors = ['#3da831', '#9a31a8', '#3155a8', '#eb4034', '#2f4f4f', '#b4a725']

# Outer key: Starting node (the 'From')
# Inner key: Ending node (the 'To')
# Weight: the value of inner key

def add_nodes(knowledge_graph_dict, net, colors):
    
    nodes = knowledge_graph_dict.keys()
    
    for index, node in enumerate(nodes):
        net.add_node(node, label=node, color = colors[index])
    
    return net

def add_connections(from_node, connection_dict, network):
    
    for to_node, weights in connection_dict.items():
        
        weights_label = f'{from_node} to {to_node} - {weights} counts'
        
        network.add_edge(
            from_node, 
            to_node, 
            value=weights, 
            title=weights_label
        )
    
    return network

net = add_nodes(knowledge_graph_dict, net, colors)

for starting_node, connections in knowledge_graph_dict.items():
    
    print(starting_node)
    print(connections)
    print('\n')
    
    net = add_connections(starting_node, connections, net)

    
net.repulsion(node_distance=700, spring_length=1000)

net.show('connections.html')

START
{'directive': 3113, 'question': 6469, 'inform': 3531, 'commissive': 5}


inform
{'inform': 17843, 'END': 8308, 'directive': 5598, 'question': 14238, 'commissive': 545}


question
{'inform': 21817, 'question': 3277, 'directive': 3894, 'END': 405, 'commissive': 35}


directive
{'commissive': 9015, 'inform': 983, 'question': 3765, 'END': 1424, 'directive': 2108}


commissive
{'END': 2981, 'directive': 2582, 'inform': 2358, 'question': 1679, 'commissive': 125}


END
{}




In [48]:
# How many START vs how many END comparisons (Bar graph - broken)

starter_sentences = {
    'START': knowledge_graph_dict['START'],
    'END': {}
}

for act, inner_dict in knowledge_graph_dict.items():
    
    if 'END' in inner_dict:
        starter_sentences['END'][act] = inner_dict['END']

# Convert to dataframe
dataframe = pd.DataFrame(starter_sentences)
dataframe.reset_index(inplace=True)
dataframe = dataframe.rename(columns={'index':'dialog_act'})

print(dataframe)

fig = px.bar(
    dataframe, 
    x="dialog_act", 
    y=['START','END'],
    color_discrete_sequence=px.colors.qualitative.D3,
    text_auto=True
)

fig.show()

   dialog_act  START   END
0   directive   3113  1424
1    question   6469   405
2      inform   3531  8308
3  commissive      5  2981


In [25]:
# If current utterance is neutral and the next utterance is non-neutral
# Find the dialog act of the current utterance and the next utterance

# The 'Key' is the neutral emotion act, 
# while the inner 'Keys' are the non-neutral emotions act

# WHY: We want to know what actually triggers the emotion inertia
# Visual: Heatmap (Non-neutral (Next) Dialog Acts vs Neutral (Current) Dialog Acts)

emotion_trigger_dict = {
    'inform': {},
    'question': {},
    'directive': {},
    'commissive': {}
}

def find_the_triggers(act_labels, emotion_labels, trigger_dict):
    
    for index, emotion in enumerate(emotion_labels):
        
        # Check if this is the end
        if index == len(emotion_labels) - 1:
            break
            
        # Check if next emotion is non-neutral
        next_emotion = emotion_labels[index + 1]
        
        # Condition: current emotion is neutral, next emotion is not
        if emotion == 'neutral' and next_emotion != 'neutral':
            
            current_dialog_act = act_labels[index]
            next_dialog_act = act_labels[index + 1]
            
            trigger_dict[current_dialog_act][next_dialog_act] = \
                trigger_dict[current_dialog_act].get(next_dialog_act, 0) + 1
            
    return trigger_dict

for index, act_labels in dialog_act_dict.items():
    
    # Translate the labels
    act_labels = translate_labels(act_labels)
    
    # Translate the emotions
    emotion_labels = translate_emotion_labels(emotion_dict[index])
    
    emotion_trigger_dict = find_the_triggers(act_labels, emotion_labels, emotion_trigger_dict)
    
print(emotion_trigger_dict)

{'inform': {'inform': 2653, 'question': 1074, 'directive': 473, 'commissive': 138}, 'question': {'inform': 1622, 'question': 205, 'directive': 147, 'commissive': 4}, 'directive': {'inform': 118, 'commissive': 1498, 'question': 220, 'directive': 104}, 'commissive': {'inform': 455, 'question': 100, 'directive': 161, 'commissive': 24}}


In [26]:
dialog_act_name = []
heatmap_array = []

for act, inner_dict in emotion_trigger_dict.items():
    
    dialog_act_name.append(act)
    heatmap_array.append([value for act, value in inner_dict.items()])

fig = px.imshow(
    heatmap_array,
    labels=dict(
        x='Next Utterance (Non-neutral) dialog act',
        y='Current Utterance (Neutral) dialog act',  
        color='Pairing'
    ),
    x=dialog_act_name,
    y=dialog_act_name,
    text_auto=True, aspect="auto"
)

fig.show()

In [27]:
# If current utterance is non-neutral and the next utterance is neutral
# Find the dialog act of the current utterance and the next utterance

# The 'Key' is the non-neutral emotion act, 
# while the inner 'Keys' are the neutral

# WHY: We want to know what actually stops the emotion inertia
# Visual: Heatmap (Neutral (Next) Dialog Acts vs Non-neutral (Current) Dialog Acts)

potong_stim_dict = {
    'inform': {},
    'question': {},
    'directive': {},
    'commissive': {}
}

def find_the_breakers(act_labels, emotion_labels, trigger_dict):
    
    for index, current_emotion in enumerate(emotion_labels):
        
        # Check if this is the end
        if index == len(emotion_labels) - 1:
            break
            
        # Check if next emotion is neutral
        next_emotion = emotion_labels[index + 1]
        
        # Condition: current emotion is non-neutral, next emotion is neutral
        if current_emotion != 'neutral' and next_emotion == 'neutral':
            
            current_dialog_act = act_labels[index]
            next_dialog_act = act_labels[index + 1]
            
            trigger_dict[current_dialog_act][next_dialog_act] = \
                trigger_dict[current_dialog_act].get(next_dialog_act, 0) + 1
            
    return trigger_dict

for index, act_labels in dialog_act_dict.items():
    
    # Translate the labels
    act_labels = translate_labels(act_labels)
    
    # Translate the emotions
    emotion_labels = translate_emotion_labels(emotion_dict[index])
    
    potong_stim_dict = find_the_breakers(act_labels, emotion_labels, emotion_trigger_dict)

print(potong_stim_dict)

{'inform': {'inform': 4217, 'question': 2255, 'directive': 1055, 'commissive': 151}, 'question': {'inform': 2979, 'question': 409, 'directive': 307, 'commissive': 4}, 'directive': {'inform': 138, 'commissive': 1833, 'question': 361, 'directive': 189}, 'commissive': {'inform': 713, 'question': 301, 'directive': 447, 'commissive': 28}}


In [28]:
dialog_act_name = []
heatmap_array = []

for act, inner_dict in potong_stim_dict.items():
    
    dialog_act_name.append(act)
    heatmap_array.append([value for act, value in inner_dict.items()])

fig = px.imshow(
    heatmap_array,
    labels=dict(
        x='Next Utterance (Neutral) dialog act',
        y='Current Utterance (Non-neutral) dialog act',  
        color='Pairing'
    ),
    x=dialog_act_name,
    y=dialog_act_name,
    text_auto=True, aspect="auto"
)

fig.show()