In [1]:
import convokit

In [2]:
from convokit import Forecaster, Corpus, download

In [3]:
MAX_LENGTH = 80

In [5]:
from convokit.forecaster.CRAFTModel import CRAFTModel

In [6]:
craft_model = CRAFTModel(device_type="cpu", options={'validation_size': 0.2,
                                                              'train_epochs': 5
                                                             })

Initializing CRAFT model with options:
{'validation_size': 0.2, 'train_epochs': 5, 'hidden_size': 500, 'encoder_n_layers': 2, 'context_encoder_n_layers': 2, 'decoder_n_layers': 2, 'dropout': 0.1, 'batch_size': 64, 'clip': 50.0, 'learning_rate': 1e-05, 'print_every': 10, 'max_length': 80, 'trained_model_output_filepath': 'finetuned_model.tar'}
Loading saved parameters...
Building encoders, decoder, and classifier...
Models built and ready to go!


In [7]:
forecaster = Forecaster(forecaster_model = craft_model,
                        forecast_mode = 'past',
                        convo_structure="linear",
                        text_func = lambda utt: utt.meta["tokens"][:(MAX_LENGTH-1)],
                        label_func = lambda utt: int(utt.meta['comment_has_personal_attack']),
                        forecast_attribute_name="prediction", forecast_prob_attribute_name="pred_score",
                        use_last_only = True,
                        skip_broken_convos=False
                       )

In [8]:
corpus = Corpus(filename=download("conversations-gone-awry-corpus"))

Dataset already exists at /Users/calebchiam/.convokit/downloads/conversations-gone-awry-corpus


In [10]:
from convokit.forecaster.CRAFT import craft_tokenize

In [11]:
for utt in corpus.iter_utterances():
    utt.add_meta("tokens", craft_tokenize(craft_model.voc, utt.text))

In [15]:
forecaster.fit(corpus, selector = lambda convo: convo.meta["split"] == "train",
               ignore_utterances = lambda utt: utt.meta["is_section_header"])

Building optimizers...
Starting Training!
Will train for 155 iterations
Initializing ...
Training...
Iteration: 10; Percent complete: 6.5%; Average loss: 0.3124
Iteration: 20; Percent complete: 12.9%; Average loss: 0.2967
Iteration: 30; Percent complete: 19.4%; Average loss: 0.3131
Validating!
Iteration: 1; Percent complete: 12.5%
Iteration: 2; Percent complete: 25.0%
Iteration: 3; Percent complete: 37.5%
Iteration: 4; Percent complete: 50.0%
Iteration: 5; Percent complete: 62.5%
Iteration: 6; Percent complete: 75.0%
Iteration: 7; Percent complete: 87.5%
Iteration: 8; Percent complete: 100.0%
Validation set accuracy: 86.65%
Validation accuracy better than current best; saving model...
Iteration: 40; Percent complete: 25.8%; Average loss: 0.2850
Iteration: 50; Percent complete: 32.3%; Average loss: 0.2896
Iteration: 60; Percent complete: 38.7%; Average loss: 0.3033
Validating!
Iteration: 1; Percent complete: 12.5%
Iteration: 2; Percent complete: 25.0%
Iteration: 3; Percent complete: 37.

In [16]:
forecaster.forecast_mode = "past"

In [17]:
forecaster.transform(corpus)

Iteration: 1; Percent complete: 1.5%
Iteration: 2; Percent complete: 3.0%
Iteration: 3; Percent complete: 4.5%
Iteration: 4; Percent complete: 6.1%
Iteration: 5; Percent complete: 7.6%
Iteration: 6; Percent complete: 9.1%
Iteration: 7; Percent complete: 10.6%
Iteration: 8; Percent complete: 12.1%
Iteration: 9; Percent complete: 13.6%
Iteration: 10; Percent complete: 15.2%
Iteration: 11; Percent complete: 16.7%
Iteration: 12; Percent complete: 18.2%
Iteration: 13; Percent complete: 19.7%
Iteration: 14; Percent complete: 21.2%
Iteration: 15; Percent complete: 22.7%
Iteration: 16; Percent complete: 24.2%
Iteration: 17; Percent complete: 25.8%
Iteration: 18; Percent complete: 27.3%
Iteration: 19; Percent complete: 28.8%
Iteration: 20; Percent complete: 30.3%
Iteration: 21; Percent complete: 31.8%
Iteration: 22; Percent complete: 33.3%
Iteration: 23; Percent complete: 34.8%
Iteration: 24; Percent complete: 36.4%
Iteration: 25; Percent complete: 37.9%
Iteration: 26; Percent complete: 39.4%
I

<convokit.model.corpus.Corpus at 0x12184e110>