In [None]:
# In Colab cell ─ install Java and the PyOpenIE wrapper
# !apt-get update -qq && apt-get install -qq -y openjdk-11-jdk
# !pip install pyopenie


In [None]:
!pip install stanford-openie


In [None]:
# Clean up any old residue
!rm -rf stanford-corenlp-4.5.3*

# Download & unzip so that jars live in stanford-corenlp-4.5.3/
!wget -q https://nlp.stanford.edu/software/stanford-corenlp-4.5.3.zip
!unzip -q stanford-corenlp-4.5.3.zip


In [None]:
# Restart from a clean slate: kill any old server, re-unzip, then:
!nohup java -Xmx4G \
  -cp "stanford-corenlp-4.5.3/*" \
  edu.stanford.nlp.pipeline.StanfordCoreNLPServer \
    -port 9001 \
    -timeout 150000 \
    -threads 4 \
    -preload tokenize,ssplit,pos,lemma,depparse,natlog,openie \
    -annotators tokenize,ssplit,pos,lemma,depparse,natlog,openie \
  > corenlp.log 2>&1 &

In [None]:

paragraphs = [
    "Software Engineering Code of Ethics and Professional Practice (Short Version)",
    "PREAMBLE",
    "The short version of the code summarizes aspirations at a high level of the abstraction; the clauses that are included in the full version give examples and details of how these aspirations change the way we act as software engineering professionals. Without the aspirations, the details can become legalistic and tedious; without the details, the aspirations can become high sounding but empty; together, the aspirations and the details form a cohesive code.",
    "Software engineers shall commit themselves to making the analysis, specification, design, development, testing and maintenance of software a beneficial and respected profession. In accordance with their commitment to the health, safety and welfare of the public, software engineers shall adhere to the following Eight Principles:",
    "1. PUBLIC – Software engineers shall act consistently with the public interest.",
    "2. CLIENT AND EMPLOYER – Software engineers shall act in a manner that is in the best interests of their client and employer consistent with the public interest.",
    "3. PRODUCT – Software engineers shall ensure that their products and related modifications meet the highest professional standards possible.",
    "4. JUDGMENT – Software engineers shall maintain integrity and independence in their professional judgment.",
    "5. MANAGEMENT – Software engineering managers and leaders shall subscribe to and promote an ethical approach to the management of software development and maintenance.",
    "6. PROFESSION – Software engineers shall advance the integrity and reputation of the profession consistent with the public interest.",
    "7. COLLEAGUES – Software engineers shall be fair to and supportive of their colleagues.",
    "8. SELF – Software engineers shall participate in lifelong learning regarding the practice of their profession and shall promote an ethical approach to the practice of the profession."
]

In [None]:
import requests
import json


paragraphs = [
    "The short version of the code summarizes aspirations at a high level of the abstraction;"
]

# Make sure your CoreNLPServer is live on localhost:9000 (with openie in its annotators)
def openie_triples(text: str):
    props = {
        "annotators": "tokenize,ssplit,pos,lemma,depparse,natlog,openie",
        "outputFormat": "json",
        "timeout": 150000
    }
    r = requests.post(
        "http://localhost:9001",
        params={"properties": json.dumps(props)},
        data=text.encode("utf-8"),
        timeout=180
    )
    r.raise_for_status()
    data = r.json()

    triples = []
    for sent in data.get("sentences", []):
        for t in sent.get("openie", []):
            triples.append((t["subject"], t["relation"], t["object"]))
    return triples

# Batch all your paragraphs into one request to amortize startup cost:
batch_text = "\n\n".join(paragraphs)
all_triples = openie_triples(batch_text)

for subj, rel, obj in all_triples:
    print((subj, rel, obj))


('version', 'summarizes aspirations at', 'level')
('version', 'summarizes aspirations at', 'high level')
('short version', 'summarizes aspirations at', 'high level')
('version', 'summarizes aspirations at', 'level of abstraction')
('short version', 'summarizes aspirations at', 'level')
('short version', 'summarizes aspirations at', 'high level of abstraction')
('version', 'summarizes aspirations at', 'high level of abstraction')
('short version', 'summarizes aspirations at', 'level of abstraction')
('short version', 'summarizes', 'aspirations')
('version', 'summarizes', 'aspirations')


In [None]:
# Check the log for “Listening” (or similar)
!grep "Listening for connection" corenlp.log -A2 || tail -n 10 corenlp.log

# Or see if port 9000 is open
!netstat -tln | grep 9000



  java.base/sun.nio.ch.ServerSocketChannelImpl.bind(ServerSocketChannelImpl.java:227)
  java.base/sun.nio.ch.ServerSocketAdaptor.bind(ServerSocketAdaptor.java:80)
  jdk.httpserver/sun.net.httpserver.ServerImpl.<init>(ServerImpl.java:142)
  jdk.httpserver/sun.net.httpserver.HttpServerImpl.<init>(HttpServerImpl.java:50)
  jdk.httpserver/sun.net.httpserver.DefaultHttpServerProvider.createHttpServer(DefaultHttpServerProvider.java:35)
  jdk.httpserver/com.sun.net.httpserver.HttpServer.create(HttpServer.java:137)
  edu.stanford.nlp.pipeline.StanfordCoreNLPServer.run(StanfordCoreNLPServer.java:1540)
  edu.stanford.nlp.pipeline.StanfordCoreNLPServer.launchServer(StanfordCoreNLPServer.java:1637)
  edu.stanford.nlp.pipeline.StanfordCoreNLPServer.main(StanfordCoreNLPServer.java:1644)
[Thread-0] INFO CoreNLP - CoreNLP Server is shutting down.
tcp        0      0 172.28.0.12:9000        0.0.0.0:*               LISTEN     


In [None]:
%%bash
# Kill any Java process that looks like CoreNLPServer
pkill -f edu.stanford.nlp.pipeline.StanfordCoreNLPServer || true

# Double-check port 9000 is free
if lsof -i:9000 -t >/dev/null; then
  echo "Port 9000 still in use by:"
  lsof -i:9000
  exit 1
else
  echo "Port 9000 is now free"
fi


In [None]:
 !head -n 50 corenlp.log


[main] INFO CoreNLP - --- StanfordCoreNLPServer#main() called ---
[main] INFO CoreNLP - Server default properties:
			(Note: unspecified annotator properties are English defaults)
			annotators = tokenize,ssplit,pos,lemma,depparse,natlog,openie
			inputFormat = text
			outputFormat = json
			prettyPrint = false
			threads = 4
[main] INFO CoreNLP - Threads: 4
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator tokenize
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator pos
[main] INFO edu.stanford.nlp.tagger.maxent.MaxentTagger - Loading POS tagger from edu/stanford/nlp/models/pos-tagger/english-left3words-distsim.tagger ... done [1.2 sec].
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator lemma
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator depparse
[main] INFO edu.stanford.nlp.parser.nndep.DependencyParser - Loading depparse model: edu/stanford/nlp/models/parser/nndep/english_UD.gz ... Time ela

In [None]:
import requests
import json

# Make sure your CoreNLPServer is live on localhost:9000 (with openie in its annotators)
def openie_triples(text: str):
    props = {
        "annotators": "tokenize,ssplit,pos,lemma,depparse,natlog,openie",
        "outputFormat": "json",
        "timeout": 150000
    }
    r = requests.post(
        "http://localhost:9001",
        params={"properties": json.dumps(props)},
        data=text.encode("utf-8"),
        timeout=180
    )
    r.raise_for_status()
    data = r.json()

    triples = []
    for sent in data.get("sentences", []):
        for t in sent.get("openie", []):
            triples.append((t["subject"], t["relation"], t["object"]))
    return triples

# Batch all your paragraphs into one request to amortize startup cost:
batch_text = "\n\n".join(paragraphs)
all_triples = openie_triples(batch_text)

for subj, rel, obj in all_triples:
    print((subj, rel, obj))


('version', 'summarizes', 'aspirations')
('we', 'act as', 'software engineering professionals')
('version', 'summarizes aspirations at', 'level of abstraction')
('version', 'summarizes aspirations at', 'high level of abstraction')
('short version', 'summarizes aspirations at', 'high level of abstraction')
('short version', 'summarizes aspirations at', 'level of abstraction')
('short version', 'summarizes', 'aspirations')
('short version', 'summarizes aspirations at', 'level')
('short version', 'summarizes aspirations at', 'high level')
('version', 'summarizes aspirations at', 'level')
('version', 'summarizes aspirations at', 'high level')
('details', 'form', 'code')
('details', 'together form', 'code')
('details', 'form', 'cohesive code')
('details', 'can become Without', 'aspirations')
('details', 'together form', 'cohesive code')
('aspirations', 'form', 'code')
('aspirations', 'together form', 'code')
('details', 'can become', 'legalistic')
('aspirations', 'together form', 'cohesive 