In [76]:
from IPython.core.display import display, HTML
display(HTML(open('visualization/highlight.css').read()))
display(HTML(open('visualization/highlight.js').read()))

In [77]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [85]:
import pandas as pd
import spacy

from stanfordcorenlp import StanfordCoreNLP
from nltk.parse.corenlp import CoreNLPParser
from allennlp.predictors.predictor import Predictor

import visualization

In [79]:
# Instantiate stanford corenlp server
STANFORD_CORENLP_PATH = '/home/sandeep/Downloads/stanford-corenlp-full-2018-10-05/'
PORT = 9090
try:
    server = StanfordCoreNLP(STANFORD_CORENLP_PATH, port=PORT, quiet=True)
except OSError as e:
    print('The port is occupied, probably an instance of the is already running.')
    server = StanfordCoreNLP('http://localhost', port=PORT, quiet=True)
    
STANFORD_SERVER_URL = server.url
ALLENNLP_COREF_MODEL_PATH = 'https://s3-us-west-2.amazonaws.com/allennlp/models/coref-model-2018.02.05.tar.gz'
HUGGINGFACE_COREF_MODEL = 'en_coref_lg'

The port is occupied, probably an instance of the is already running.


In [80]:
train = pd.read_csv('data/gap-development.tsv', sep='\t')
# normalizing column names
train.columns = map(lambda x: x.lower().replace('-', '_'), train.columns)
with pd.option_context('display.max_rows', 10, 'display.max_colwidth', 15):
    display(train)

Unnamed: 0,id,text,pronoun,pronoun_offset,a,a_offset,a_coref,b,b_offset,b_coref,url
0,development-1,Zoe Telford...,her,274,Cheryl Cassidy,191,True,Pauline,207,False,http://en.w...
1,development-2,He grew up ...,His,284,MacKenzie,228,True,Bernard Leach,251,False,http://en.w...
2,development-3,He had been...,his,265,Angeloz,173,False,De la Sota,246,True,http://en.w...
3,development-4,The current...,his,321,Hell,174,False,Henry Rosen...,336,True,http://en.w...
4,development-5,Her Santa F...,She,437,Kitty Oppen...,219,False,Rivera,294,True,http://en.w...
...,...,...,...,...,...,...,...,...,...,...,...
1995,development...,Faye's thir...,her,433,Nicole,255,False,Faye,328,True,http://en.w...
1996,development...,The plot of...,her,246,Doris Chu,111,False,Mei,215,True,http://en.w...
1997,development...,Grant playe...,she,348,Maria,259,True,Imelda Stau...,266,False,http://en.w...
1998,development...,The fashion...,She,284,Helen,145,True,Suzanne Bar...,208,False,http://en.w...


In [82]:
# If models are not preloaded, this can take a while for the first call
stanford_model = CoreNLPParser(url=STANFORD_SERVER_URL)
allennlp_model = Predictor.from_path(ALLENNLP_COREF_MODEL_PATH)
huggingface_model = spacy.load(HUGGINGFACE_COREF_MODEL)

  "num_layers={}".format(dropout, num_layers))


In [83]:
text = train.loc[0].text
data = allennlp_model.predict(text)
visualization.render(data, allen=True, jupyter=True)

In [84]:
rows = []
for idx, row in train.iterrows():
    data = stanford_model.api_call(row.text, properties={'annotators': 'coref'})
    html = visualization.render(data, stanford=True, jupyter=False)
    rows.append({'sample_idx': idx, 
                 'model': 'Stanford',
                 'annotation': html})
    
    data = allennlp_model.predict(row.text)
    html = visualization.render(data, allen=True, jupyter=False)
    rows.append({'sample_idx': idx, 
                 'model': 'AllenNlp',
                 'annotation': html})
    
    data = huggingface_model(row.text)
    html = visualization.render(data, huggingface=True, jupyter=False)
    rows.append({'sample_idx': idx, 
                 'model': 'Huggingface',
                 'annotation': html})
    
    # Special rendering for labelled pronouns, either gold or predicted
    # labels in 'a_coref', 'b_coref'
    html = visualization.render(row, proref=True, jupyter=False)
    rows.append({'sample_idx': idx, 
                 'model': 'GPR',
                 'annotation': html})
    
    break

df = pd.DataFrame(rows).groupby(['sample_idx', 'model']).agg(lambda x: x)
s = df.style.set_properties(**{'text-align': 'left'})
display(HTML(s.render(justify='left')))

Unnamed: 0_level_0,Unnamed: 1_level_0,annotation
sample_idx,model,Unnamed: 2_level_1
0,AllenNlp,"Zoe Telford -- played 1 the police officer girlfriend of 0 Simon , Maggie . Dumped by 0 Simon in the final episode of series 1 , after 1 he slept with Jenny , and is not seen again . 2 Phoebe Thomas played 3 Cheryl Cassidy , Pauline 's friend and also a year 11 pupil in 0 Simon 's class . Dumped 2 her boyfriend following 0 Simon 's advice after 0 he would n't have sex with 2 her but later realised this was due to 0 him catching crabs off 3 2 her friend Pauline ."
0,GPR,"Zoe Telford -- played the police officer girlfriend of Simon, Maggie. Dumped by Simon in the final episode of series 1, after he slept with Jenny, and is not seen again. Phoebe Thomas played 0 Cheryl Cassidy, 1 Pauline's friend and also a year 11 pupil in Simon's class. Dumped 0 her boyfriend following Simon's advice after he wouldn't have sex with her but later realised this was due to him catching crabs off her friend Pauline."
0,Huggingface,"Zoe Telford -- played the police officer girlfriend of 0 Simon , Maggie . Dumped by 0 Simon in the final episode of series 1 , after 0 he slept with Jenny , and is not seen again . 1 Phoebe Thomas played Cheryl Cassidy , Pauline 's friend and also a year 11 pupil in 0 Simon 's class . Dumped 1 her boyfriend following 0 Simon 's advice after 0 he would n't have sex with 1 her but later realised this was due to 0 him catching crabs off 1 her friend Pauline ."
0,Stanford,"Zoe Telford -- played the police officer girlfriend of 0 Simon , Maggie . Dumped by 0 Simon in the final episode of series 1 , after 0 he slept with Jenny , and is not seen again . 2 Phoebe Thomas played 1 Cheryl Cassidy , 3 Pauline 's friend and also a year 11 pupil in 0 Simon 's class . Dumped 2 her boyfriend following 0 Simon 's advice after 0 he would n't have sex with 2 her but later realised 1 this was due to 0 him catching crabs off 3 2 her friend Pauline ."
