In [1]:
import os
os.chdir('../../../..')

In [2]:
import convokit



In [3]:
from convokit import Forecaster, Corpus, download

In [4]:
MAX_LENGTH = 80

In [5]:
craft_model = convokit.CRAFTModel(device_type="cpu", options={'validation_size': 0.2,
                                                              'train_epochs': 5
                                                             })

Initializing CRAFT model with options:
{'validation_size': 0.2, 'train_epochs': 5, 'hidden_size': 500, 'encoder_n_layers': 2, 'context_encoder_n_layers': 2, 'decoder_n_layers': 2, 'dropout': 0.1, 'batch_size': 64, 'clip': 50.0, 'learning_rate': 1e-05, 'print_every': 10, 'max_length': 80, 'trained_model_output_filepath': 'finetuned_model.tar'}
Loading saved parameters...
Building encoders, decoder, and classifier...
Models built and ready to go!


In [6]:
forecaster = Forecaster(forecaster_model = craft_model,
                        forecast_mode = 'past',
                        convo_structure="linear",
                        text_func = lambda utt: utt.meta["tokens"][:(MAX_LENGTH-1)],
                        utt_selector_func = lambda utt: not utt.meta["is_section_header"],
                        label_func = lambda utt: int(utt.meta['comment_has_personal_attack']),
                        convo_selector_func = (lambda convo: convo.meta["split"] == "train"),
                        forecast_feat_name="prediction", forecast_prob_feat_name="pred_score",
                        use_last_only = True,
                        skip_broken_convos=False
                       )

In [7]:
corpus = Corpus(filename=download("conversations-gone-awry-corpus"))

Dataset already exists at /Users/calebchiam/.convokit/downloads/conversations-gone-awry-corpus


In [8]:
from convokit import craft_tokenize

In [9]:
for utt in corpus.iter_utterances():
    utt.add_meta("tokens", craft_tokenize(craft_model.voc, utt.text))

In [10]:
forecaster.fit(corpus)

Building optimizers...
Starting Training!
Will train for 155 iterations
Initializing ...
Training...
Iteration: 10; Percent complete: 6.5%; Average loss: 0.3469
Iteration: 20; Percent complete: 12.9%; Average loss: 0.3774
Iteration: 30; Percent complete: 19.4%; Average loss: 0.3882
Validating!
Iteration: 1; Percent complete: 12.5%
Iteration: 2; Percent complete: 25.0%
Iteration: 3; Percent complete: 37.5%
Iteration: 4; Percent complete: 50.0%
Iteration: 5; Percent complete: 62.5%
Iteration: 6; Percent complete: 75.0%
Iteration: 7; Percent complete: 87.5%
Iteration: 8; Percent complete: 100.0%
Validation set accuracy: 85.26%
Validation accuracy better than current best; saving model...
Iteration: 40; Percent complete: 25.8%; Average loss: 0.3509
Iteration: 50; Percent complete: 32.3%; Average loss: 0.3376
Iteration: 60; Percent complete: 38.7%; Average loss: 0.3410
Validating!
Iteration: 1; Percent complete: 12.5%
Iteration: 2; Percent complete: 25.0%
Iteration: 3; Percent complete: 37.

In [11]:
forecaster.forecast_mode = "past"

In [12]:
forecaster.transform(corpus)

Iteration: 1; Percent complete: 2.5%
Iteration: 2; Percent complete: 5.0%
Iteration: 3; Percent complete: 7.5%
Iteration: 4; Percent complete: 10.0%
Iteration: 5; Percent complete: 12.5%
Iteration: 6; Percent complete: 15.0%
Iteration: 7; Percent complete: 17.5%
Iteration: 8; Percent complete: 20.0%
Iteration: 9; Percent complete: 22.5%
Iteration: 10; Percent complete: 25.0%
Iteration: 11; Percent complete: 27.5%
Iteration: 12; Percent complete: 30.0%
Iteration: 13; Percent complete: 32.5%
Iteration: 14; Percent complete: 35.0%
Iteration: 15; Percent complete: 37.5%
Iteration: 16; Percent complete: 40.0%
Iteration: 17; Percent complete: 42.5%
Iteration: 18; Percent complete: 45.0%
Iteration: 19; Percent complete: 47.5%
Iteration: 20; Percent complete: 50.0%
Iteration: 21; Percent complete: 52.5%
Iteration: 22; Percent complete: 55.0%
Iteration: 23; Percent complete: 57.5%
Iteration: 24; Percent complete: 60.0%
Iteration: 25; Percent complete: 62.5%
Iteration: 26; Percent complete: 65.0

<convokit.model.corpus.Corpus at 0x13c3592b0>