In [1]:
import os
os.chdir('..')

In [3]:
import convokit



In [4]:
convokit

<module 'convokit' from '/Users/calebchiam/Documents/GitHub/Cornell-Conversational-Analysis-Toolkit/convokit/__init__.py'>

In [6]:
from convokit import Corpus, Utterance, User

In [7]:
"""
Basic Conversation tree (left to right within subtree => earliest to latest)
           0
    1      2      3
  4 5 6   7 8     9
10                11
"""
corpus = Corpus(utterances = [
    Utterance(id="0", reply_to=None, root="0", user=User(name="alice"), timestamp=0),

    Utterance(id="2", reply_to="0", root="0", user=User(name="alice"), timestamp=2),
    Utterance(id="1", reply_to="0", root="0", user=User(name="alice"), timestamp=1),
    Utterance(id="3", reply_to="0", root="0", user=User(name="alice"), timestamp=3),

    Utterance(id="4", reply_to="1", root="0", user=User(name="alice"), timestamp=4),
    Utterance(id="5", reply_to="1", root="0", user=User(name="alice"), timestamp=5),
    Utterance(id="6", reply_to="1", root="0", user=User(name="alice"), timestamp=6),

    Utterance(id="7", reply_to="2", root="0", user=User(name="alice"), timestamp=4),
    Utterance(id="8", reply_to="2", root="0", user=User(name="alice"), timestamp=5),

    Utterance(id="9", reply_to="3", root="0", user=User(name="alice"), timestamp=4),

    Utterance(id="10", reply_to="4", root="0", user=User(name="alice"), timestamp=5),

    Utterance(id="11", reply_to="9", root="0", user=User(name="alice"), timestamp=10),

    Utterance(id="other", reply_to=None, root="other", user=User(name="alice"), timestamp=99)
])

Adding some simple metadata:

In [8]:
corpus.get_conversation("0").meta['hey'] = 'jude'
corpus.meta['foo'] = 'bar'

## Tree Traversals

In [11]:
convo = corpus.get_conversation("0")
bfs_traversal = [utt.id for utt in convo.traverse("bfs", as_utterance=True)]
bfs_traversal

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11']

traverse() returns an iterator of Utterances OR an iterator of UtteranceNodes

In [16]:
for utt in list(convo.traverse("bfs", as_utterance=True)):
    print(utt)

Utterance('id': '0', 'root': 0, 'reply-to': None, 'user': User('id': alice, 'meta': {}), 'timestamp': 0, 'text': None, 'meta': {})
Utterance('id': '1', 'root': 0, 'reply-to': 0, 'user': User('id': alice, 'meta': {}), 'timestamp': 1, 'text': None, 'meta': {})
Utterance('id': '2', 'root': 0, 'reply-to': 0, 'user': User('id': alice, 'meta': {}), 'timestamp': 2, 'text': None, 'meta': {})
Utterance('id': '3', 'root': 0, 'reply-to': 0, 'user': User('id': alice, 'meta': {}), 'timestamp': 3, 'text': None, 'meta': {})
Utterance('id': '4', 'root': 0, 'reply-to': 1, 'user': User('id': alice, 'meta': {}), 'timestamp': 4, 'text': None, 'meta': {})
Utterance('id': '5', 'root': 0, 'reply-to': 1, 'user': User('id': alice, 'meta': {}), 'timestamp': 5, 'text': None, 'meta': {})
Utterance('id': '6', 'root': 0, 'reply-to': 1, 'user': User('id': alice, 'meta': {}), 'timestamp': 6, 'text': None, 'meta': {})
Utterance('id': '7', 'root': 0, 'reply-to': 2, 'user': User('id': alice, 'meta': {}), 'timestamp': 4,

In [13]:
list(convo.traverse("bfs", as_utterance=False))

[<convokit.model.utteranceNodeWrapper.UtteranceNode at 0x13974fac8>,
 <convokit.model.utteranceNodeWrapper.UtteranceNode at 0x13974f978>,
 <convokit.model.utteranceNodeWrapper.UtteranceNode at 0x13974f588>,
 <convokit.model.utteranceNodeWrapper.UtteranceNode at 0x13974f9b0>,
 <convokit.model.utteranceNodeWrapper.UtteranceNode at 0x13974fc18>,
 <convokit.model.utteranceNodeWrapper.UtteranceNode at 0x13974fba8>,
 <convokit.model.utteranceNodeWrapper.UtteranceNode at 0x13974fc50>,
 <convokit.model.utteranceNodeWrapper.UtteranceNode at 0x13974fc88>,
 <convokit.model.utteranceNodeWrapper.UtteranceNode at 0x13974f4e0>,
 <convokit.model.utteranceNodeWrapper.UtteranceNode at 0x13974fcf8>,
 <convokit.model.utteranceNodeWrapper.UtteranceNode at 0x13974f9e8>,
 <convokit.model.utteranceNodeWrapper.UtteranceNode at 0x13974fda0>]

In [18]:
dfs_traversal = [utt.id for utt in convo.traverse("dfs", as_utterance=True)]
dfs_traversal

['0', '1', '4', '10', '5', '6', '2', '7', '8', '3', '9', '11']

In [19]:
postorder_traversal = [utt.id for utt in convo.traverse("postorder", as_utterance=True)]
postorder_traversal

['10', '4', '5', '6', '1', '7', '8', '2', '11', '9', '3', '0']

In [20]:
preorder_traversal = [utt.id for utt in convo.traverse("preorder", as_utterance=True)]
preorder_traversal

['0', '1', '4', '10', '5', '6', '2', '7', '8', '3', '9', '11']

## Root to leaf paths

In [23]:
paths = convo.get_root_to_leaf_paths()

In [24]:
# Number of root to leaf paths
len(paths)

6

In [25]:
for path in paths:
    print([utt.id for utt in path])

['0', '1', '6']
['0', '3', '9', '11']
['0', '2', '7']
['0', '1', '4', '10']
['0', '2', '8']
['0', '1', '5']


## Subtree extraction

In [27]:
subtree_node = convo.get_subtree("1")

In [32]:
[node.utt.id for node in subtree_node.bfs_traversal()]

['1', '4', '5', '6', '10']

In [33]:
[node.utt.id for node in subtree_node.dfs_traversal()]

['1', '4', '10', '5', '6']

In [31]:
[node.utt.id for node in subtree_node.pre_order()]

['1', '4', '10', '5', '6']

In [34]:
[node.utt.id for node in subtree_node.post_order()]

['10', '4', '5', '6', '1']

## Reindexing Conversations in a Corpus

In [37]:
corpus.print_summary_stats()

Number of Users: 1
Number of Utterances: 13
Number of Conversations: 2


In [35]:
reindexed_corpus = corpus.reindex_conversations(new_convo_roots=["1", "2", "3"])

In [36]:
reindexed_corpus.print_summary_stats()

Number of Users: 1
Number of Utterances: 11
Number of Conversations: 3


In [38]:
reindexed_corpus.get_conversation("1").print_conversation_structure()

alice
    alice
        alice
    alice
    alice


In [40]:
[utt.id for utt in reindexed_corpus.get_conversation("1").traverse("bfs")]

['1', '4', '5', '6', '10']