In [1]:
import re
import pandas as pd
import sys
import os
import json
from tqdm import tqdm

from allennlp.common import JsonDict

sys.path.insert(0, '..')

import utils.semantic_role_labeling as sem_rl

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.options.mode.chained_assignment = None

def _run_predictor(batch_data):
    if len(batch_data) == 1:
        result = predictor.predict_json(batch_data[0])
        results = [result]
    else:
        results = predictor.predict_batch_json(batch_data)
    return results


# Read ROCStories into pandas DataFrame
roc_stories_path_csv = '../../generated/coreference_resolution/ROCStories_with_resolved_coreferences.csv'
roc_stories_df = pd.read_csv(roc_stories_path_csv, sep='\t', header=0)

roc_stories_df = roc_stories_df.iloc[:1000]

In [2]:
predictor = sem_rl.get_predictor()
batch_size = 128

for n in range(1, 6):
    batch_data = []
    batch_result = []

    for index, row in tqdm(roc_stories_df.iterrows()):
        line = str(row[f'resolved{n}'])
        if not line.isspace():
            line = {"sentence": line.strip()}
            line = json.dumps(line)
            json_data = predictor.load_line(line)
            batch_data.append(json_data)
            if len(batch_data) == batch_size:
                res = _run_predictor(batch_data)
                for b in res:
                    batch_result.append(b)
                batch_data = []

    if batch_data:
        res = _run_predictor(batch_data)
        for b in res:
            batch_result.append(b)

    roc_stories_df[f'srl_r{n}'] = batch_result

1000it [01:12, 13.78it/s]
1000it [01:49,  9.10it/s]
1000it [01:56,  8.60it/s]
1000it [01:51,  8.94it/s]
1000it [02:09,  7.75it/s]


In [3]:
len(batch_result)

1000

In [4]:
roc_stories_df.loc[0, 'srl_r1']

{'verbs': [{'verb': 'noticed',
   'description': '[ARG0: David] [notice.01: noticed] [ARG1: David had put on a lot of weight recently] .',
   'tags': ['B-ARG0',
    'B-V',
    'B-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'O'],
   'frame': 'notice.01',
   'frame_scores': 0.9999998807907104,
   'lemma': 'notice'},
  {'verb': 'had',
   'description': 'David noticed David [have.01: had] put on a lot of weight recently .',
   'tags': ['O', 'O', 'O', 'B-V', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'],
   'frame': 'have.01',
   'frame_scores': 1.0,
   'lemma': 'have'},
  {'verb': 'put',
   'description': 'David noticed [ARG0: David] had [put.08: put] on [ARG1: a lot of weight] [ARGM-TMP: recently] .',
   'tags': ['O',
    'O',
    'B-ARG0',
    'O',
    'B-V',
    'O',
    'B-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'B-ARGM-TMP',
    'O'],
   'frame': 'put.08',
   'frame_scores': 0.9999649524688721,
   'l