In [7]:
import os
os.chdir('../..')

In [8]:
import convokit
from convokit import Corpus, download, Utterance, Speaker, User

In [9]:
utterances = [Utterance(id=str(i), speaker=User(id='speaker'+str(i))) for i in range(10)]

In [11]:
corpus.random_user().name



'speaker2'

In [4]:
corpus = Corpus(utterances=utterances)

In [5]:
for user in corpus.iter_users():
    print(user)

Speaker('id': speaker0, 'meta': {})
Speaker('id': speaker1, 'meta': {})
Speaker('id': speaker2, 'meta': {})
Speaker('id': speaker3, 'meta': {})
Speaker('id': speaker4, 'meta': {})
Speaker('id': speaker5, 'meta': {})
Speaker('id': speaker6, 'meta': {})
Speaker('id': speaker7, 'meta': {})
Speaker('id': speaker8, 'meta': {})
Speaker('id': speaker9, 'meta': {})


  deprecation("iter_users()", "iter_speakers()")


In [6]:
print(corpus.meta_index)

{'utterances-index': {}, 'speakers-index': {}, 'conversations-index': {}, 'overall-index': {}, 'version': 0}


In [7]:
corpus.print_summary_stats()

Number of Speakers: 10
Number of Utterances: 10
Number of Conversations: 1


## Addition / Updating behavior

### Normal behavior

In [8]:
for utt in corpus.iter_utterances():
    utt.meta['good_meta'] = 1

In [9]:
print(corpus.meta_index)

{'utterances-index': {'good_meta': "<class 'int'>"}, 'speakers-index': {}, 'conversations-index': {}, 'overall-index': {}, 'version': 0}


### Selective add

In [10]:
for utt in corpus.iter_utterances(): # annotate first utt
    utt.meta['okay_meta'] = 1
    break

In [11]:
print(corpus.meta_index)

{'utterances-index': {'good_meta': "<class 'int'>", 'okay_meta': "<class 'int'>"}, 'speakers-index': {}, 'conversations-index': {}, 'overall-index': {}, 'version': 0}


### ConvoKit 2.0 would have failed on this selective add

In [12]:
idx = 1 ## 
for utt in corpus.iter_utterances(): # annotate second utt
    if idx == 2:
        utt.meta['okay_meta2'] = 1
    idx += 1

In [13]:
print(corpus.meta_index)

{'utterances-index': {'good_meta': "<class 'int'>", 'okay_meta': "<class 'int'>", 'okay_meta2': "<class 'int'>"}, 'speakers-index': {}, 'conversations-index': {}, 'overall-index': {}, 'version': 0}


### ConvoKit 2.0 / 2.3 fails on this inconsistent metadata type

In [14]:
for idx, utt in enumerate(corpus.iter_utterances()): # annotate alternating utts
    if idx % 2:
        utt.meta['bad_meta'] = 1
    else:
        utt.meta['bad_meta'] = None


In [15]:
print(corpus.meta_index)

{'utterances-index': {'good_meta': "<class 'int'>", 'okay_meta': "<class 'int'>", 'okay_meta2': "<class 'int'>", 'bad_meta': "<class 'NoneType'>"}, 'speakers-index': {}, 'conversations-index': {}, 'overall-index': {}, 'version': 0}


Can't enforce types without an utterance-by-utterance metadata type check. (How does Pandas do it? Cython)

Can't add new types (e.g. Union) either without an utterance-by-utterance type check.

Things currently work because of luck or because meta_index working is not crucial except for binary data loading.

## Deletion

In [16]:
for utt in corpus.iter_utterances():
    utt.meta['to_be_deleted'] = 1

In [17]:
print(corpus.meta_index)

{'utterances-index': {'good_meta': "<class 'int'>", 'okay_meta': "<class 'int'>", 'okay_meta2': "<class 'int'>", 'bad_meta': "<class 'NoneType'>", 'to_be_deleted': "<class 'int'>"}, 'speakers-index': {}, 'conversations-index': {}, 'overall-index': {}, 'version': 0}


### 2.3 Deleting metadata from one obj deletes from all objs (no such behavior in 2.0)

In [18]:
del corpus.random_utterance().meta['to_be_deleted']

In [19]:
for utt in corpus.iter_utterances():
    print(utt.meta.get('to_be_deleted', None))

None
None
None
None
None
None
None
None
None
None


In [20]:
for user in corpus.random_conversation().iter_users():
    print(user)

Speaker('id': speaker6, 'meta': {})
Speaker('id': speaker8, 'meta': {})
Speaker('id': speaker5, 'meta': {})
Speaker('id': speaker9, 'meta': {})
Speaker('id': speaker4, 'meta': {})
Speaker('id': speaker2, 'meta': {})
Speaker('id': speaker0, 'meta': {})
Speaker('id': speaker1, 'meta': {})
Speaker('id': speaker7, 'meta': {})
Speaker('id': speaker3, 'meta': {})


  deprecation("iter_users()", "iter_speakers()")


In [21]:
print(corpus.meta_index)

{'utterances-index': {'good_meta': "<class 'int'>", 'okay_meta': "<class 'int'>", 'okay_meta2': "<class 'int'>", 'bad_meta': "<class 'NoneType'>"}, 'speakers-index': {}, 'conversations-index': {}, 'overall-index': {}, 'version': 0}
