# Coreference resolution using CoreNLP

Available models are: arabic, chinese, english, french, german and spanish.
Stanford CoreNLP affords a pipeline of tasks. 
In this tutorial, we will choose only the tasks of preprocessing.

In [1]:
%%pom
dependencies:
    - edu.stanford.nlp:stanford-corenlp:4.2.2
    - groupId: edu.stanford.nlp
      artifactId: stanford-corenlp
      version: 4.2.2
      classifier: models
    - groupId: edu.stanford.nlp
      artifactId: stanford-corenlp
      version: 4.2.2
      classifier: models-arabic

# <dependencies>
# <dependency>
#     <groupId>edu.stanford.nlp</groupId>
#     <artifactId>stanford-corenlp</artifactId>
#     <version>4.0.0</version>
# </dependency>
# <dependency>
#     <groupId>edu.stanford.nlp</groupId>
#     <artifactId>stanford-corenlp</artifactId>
#     <version>4.0.0</version>
#     <classifier>models</classifier>
# </dependency>
# </dependencies>

https://stanfordnlp.github.io/CoreNLP/coref.html

https://stanfordnlp.github.io/CoreNLP/api.html


In [2]:
%%java

import java.util.Properties;

import edu.stanford.nlp.coref.CorefCoreAnnotations;
import edu.stanford.nlp.coref.data.CorefChain;
import edu.stanford.nlp.coref.data.Mention;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CoreMap;

String text = "Barack Obama was born in Hawaii.  He is the president. Obama was elected in 2008.";


Properties props = new Properties();
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,coref");
props.setProperty("coref.algorithm", "neural");

StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
Annotation document = new Annotation(text);
// annnotate the document
pipeline.annotate(document);

document

Barack Obama was born in Hawaii.  He is the president. Obama was elected in 2008.

In [3]:
%%java
// Mentions detection

for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
    System.out.println("---");
    System.out.println("mentions");
    for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) {
        System.out.println("\t" + m);
    }
}

---
mentions
	Barack Obama
	Hawaii
---
mentions
	He
	the president
---
mentions
	Obama
	2008


In [4]:
%%java

System.out.println("---");
System.out.println("coref chains");

CorefChain lastCC = null;

for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
    System.out.println("\t" + cc);
    lastCC = cc;
}

---
coref chains
	CHAIN4-["Barack Obama" in sentence 1, "He" in sentence 2, "Obama" in sentence 3]


In [5]:
%%java
//Mention Map
import java.util.Map;
import java.util.Set;
import edu.stanford.nlp.util.IntPair;

if (lastCC != null){
    Map<IntPair,Set<CorefChain.CorefMention>> mentionMap = lastCC.getMentionMap();
    for(IntPair ip: mentionMap.keySet()){
        System.out.println("from " + ip.getSource() + " to " + ip.getTarget());
        Set<CorefChain.CorefMention> mentions = mentionMap.get(ip);
        for (CorefChain.CorefMention mention: mentions){
            System.out.print("\tmention span=" + mention.mentionSpan);
            System.out.print(" type=" + mention.mentionType);
            System.out.print(" number=" + mention.number);
            System.out.println(" gender=" + mention.gender);
        } 
    }
}


from 1 to 2
	mention span=Barack Obama type=PROPER number=SINGULAR gender=MALE
from 2 to 1
	mention span=He type=PRONOMINAL number=SINGULAR gender=MALE
from 3 to 1
	mention span=Obama type=PROPER number=SINGULAR gender=MALE
