In [1]:
import json
from pathlib import Path
import pandas as pd
from ulmfit_attention import scenarios
from hyperspace_explorer import configurables

In [2]:
tasks_dir = Path.cwd().resolve().parent / 'tasks'

# configs normally stored in the database
configs_dir = Path.cwd().resolve().parent / 'configs'

In [3]:
def load_task_conf(name):
    return json.load((tasks_dir / f'{name}.json').open())

def load_config(name):
    return json.load((configs_dir / f'{name}.json').open())

### Training a model on a 1k sample of the IMDB training dataset

Returning the result and the trained learner object

In [4]:
task_conf = load_task_conf('imdb_1k_sample_single')
config = load_config('282_sample_dataset_head_training_chosen')

In [5]:
scenario = scenarios.Scenario.from_config(task_conf['Scenario'])
config['seed'] = task_conf['seed']

In [6]:
config

{'Classifier': {'Aggregation': {'agg_bn': False,
   'agg_dropouts': [0.2, 0],
   'agg_layers': [50, 10],
   'att_bn': False,
   'att_dropouts': [0, 0],
   'att_hid_layers': [50],
   'className': 'BranchingAttentionAggregation'},
  'className': 'AggregatingClassifier',
  'drop_mult': 1.25,
  'label_smoothing_eps': 0.1,
  'lin_ftrs': []},
 'TrainingSchedule': {'className': 'HeadOnlySchedule',
  'cycles': 20,
  'lr': 0.02,
  'wd': 0.1},
 'seed': 0}

In [7]:
# setting final AGG = 1 to get the best explainability
update = {'Classifier': {'Aggregation': {'agg_layers': [50, 1]}}}
config = configurables.update_config(config, update)

In [8]:
result, learner = scenario.single_run(config)

epoch,train_loss,valid_loss,accuracy,time
0,0.68601,#na#,00:08,
1,0.622516,#na#,00:08,
2,0.588206,#na#,00:07,
3,0.577458,#na#,00:07,
4,0.57765,#na#,00:07,
5,0.584532,#na#,00:07,
6,0.577925,#na#,00:08,
7,0.567214,#na#,00:08,
8,0.574821,#na#,00:07,
9,0.576082,#na#,00:08,


In [9]:
print(result)

0.9307199716567993


### Inference on an example, getting attention data

In [18]:
text = 'It was awful. The best thing about this movie was that it finally ended.'

In [19]:
x, y, pred, raw_pred = learner.predict(text, return_x=True)

In [20]:
y, pred, raw_pred

(Category neg, tensor(0), tensor([ 0.8064, -0.5708]))

In [21]:
weights = pd.Series(learner.model[1].attn.last_weights[0])

In [22]:
which_feature = 0
features = learner.model[1].attn.last_features[0, :, which_feature]

In [23]:
tokens = x.text.split(' ')

In [24]:
df = pd.DataFrame({
    'token': pd.Series(tokens),
    'weights': pd.Series(weights), 
    'features': pd.Series(features)
})

In [25]:
df.style.bar(subset=['weights', 'features'], align='mid', color=['#d65f5f', '#5fba7d'])

Unnamed: 0,token,weights,features
0,xxbos,0.007826,-1.005893
1,xxmaj,0.000594,-0.398653
2,it,0.001138,-1.398149
3,was,0.01954,-0.085323
4,awful,0.421856,-2.26706
5,.,0.12016,-2.088816
6,xxmaj,0.025916,-2.894467
7,the,0.01067,-2.37248
8,best,0.006905,-1.396706
9,thing,0.006357,-2.713917


In [None]:
del learner
import gc
gc.collect()

## Reproduce the best run from "IMDB sample - head-only training"

Additional information will be available in `scenario.info`, `scenario._metrics`.

Normally all this data would end up in the database.

A few other configs from the article available in the `configs` directory.

In [None]:
task_conf = load_task_conf('imdb_1k_20_folds')
config = load_config('282_sample_dataset_head_training_chosen')

In [None]:
scenario = scenarios.Scenario.from_config(task_conf['Scenario'])
config['seed'] = task_conf['seed']

In [None]:
result, _ = scenario.single_run(config)