## English Parser from Stanford Core-NLP

In [7]:
from nltk.parse.corenlp import CoreNLPServer
import os
import time
import stanza

In [2]:
coreNLP_dir = "/Users/eun-youngchristinapark/Documents/stanford-corenlp-4.2.2" # Change this to your coreNLP directory

server = CoreNLPServer(
   os.path.join(coreNLP_dir, "stanford-corenlp-4.2.2.jar"),
   os.path.join(coreNLP_dir, "stanford-corenlp-4.2.2-models.jar")    
)
server.start()

In [3]:
from nltk.parse.corenlp import CoreNLPParser
parser = CoreNLPParser()
parse = next(parser.raw_parse("I put the book in the box on the table."))
print(parse)

(ROOT
  (S
    (NP (PRP I))
    (VP
      (VBD put)
      (NP (DT the) (NN book))
      (PP (IN in) (NP (DT the) (NN box)))
      (PP (IN on) (NP (DT the) (NN table))))
    (. .)))


In [4]:
server.stop()

In [5]:
? CoreNLPParser

[0;31mInit signature:[0m  [0mCoreNLPParser[0m[0;34m([0m[0murl[0m[0;34m=[0m[0;34m'http://localhost:9000'[0m[0;34m,[0m [0mencoding[0m[0;34m=[0m[0;34m'utf8'[0m[0;34m,[0m [0mtagtype[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
>>> parser = CoreNLPParser(url='http://localhost:9000')

>>> next(
...     parser.raw_parse('The quick brown fox jumps over the lazy dog.')
... ).pretty_print()  # doctest: +NORMALIZE_WHITESPACE
                     ROOT
                      |
                      S
       _______________|__________________________
      |                         VP               |
      |                _________|___             |
      |               |             PP           |
      |               |     ________|___         |
      NP              |    |            NP       |
  ____|__________     |    |     _______|____    |
 DT   JJ    JJ   NN  VBZ   IN   DT      JJ   NN  .
 |    |     |    |    |    

## Spanish Dependency Parser 1

In [8]:
stanza.download('es') # download Spanish model
nlp = stanza.Pipeline('es') # initialize Spanish neural pipeline
doc = nlp("Has vendido un nuevo producto que es muy popular dentro de tu escuela.")

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/master/resources_1.2.0.json: 128kB [00:00, 23.4MB/s]                    
2021-05-28 16:01:10 INFO: Downloading default packages for language: es (Spanish)...
2021-05-28 16:01:11 INFO: File exists: /Users/eun-youngchristinapark/stanza_resources/es/default.zip.
2021-05-28 16:01:15 INFO: Finished downloading models and saved to /Users/eun-youngchristinapark/stanza_resources.
2021-05-28 16:01:15 INFO: Loading these models for language: es (Spanish):
| Processor | Package |
-----------------------
| tokenize  | ancora  |
| mwt       | ancora  |
| pos       | ancora  |
| lemma     | ancora  |
| depparse  | ancora  |
| ner       | conll02 |

2021-05-28 16:01:15 INFO: Use device: cpu
2021-05-28 16:01:15 INFO: Loading: tokenize
2021-05-28 16:01:15 INFO: Loading: mwt
2021-05-28 16:01:15 INFO: Loading: pos
2021-05-28 16:01:15 INFO: Loading: lemma
2021-05-28 16:01:15 INFO: Loading: depparse
2021-05-28 16:01:15 INFO: Loading

In [9]:
for sentence in doc.sentences:
    #print(sentence.ents)
    print(sentence.dependencies)

[({
  "id": 2,
  "text": "vendido",
  "lemma": "vender",
  "upos": "VERB",
  "xpos": "VERB",
  "feats": "Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part",
  "head": 0,
  "deprel": "root",
  "misc": "start_char=4|end_char=11"
}, 'aux', {
  "id": 1,
  "text": "Has",
  "lemma": "haber",
  "upos": "AUX",
  "xpos": "AUX",
  "feats": "Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin",
  "head": 2,
  "deprel": "aux",
  "misc": "start_char=0|end_char=3"
}), ({
  "id": 0,
  "text": "ROOT"
}, 'root', {
  "id": 2,
  "text": "vendido",
  "lemma": "vender",
  "upos": "VERB",
  "xpos": "VERB",
  "feats": "Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part",
  "head": 0,
  "deprel": "root",
  "misc": "start_char=4|end_char=11"
}), ({
  "id": 5,
  "text": "producto",
  "lemma": "producto",
  "upos": "NOUN",
  "xpos": "NOUN",
  "feats": "Gender=Masc|Number=Sing",
  "head": 2,
  "deprel": "obj",
  "misc": "start_char=21|end_char=29"
}, 'det', {
  "id": 3,
  "text": "un",
  "lemma": "uno",
  "upos": 

## Spanish Dependency Parser 2

In [10]:
stanza.install_corenlp(dir="/Users/eun-youngchristinapark/Documents/stanza_corenlp")



In [11]:
stanza.download_corenlp_models(model='spanish', version='4.1.0', dir="/Users/eun-youngchristinapark/Documents/stanza_corenlp")

2021-05-28 16:01:31 INFO: Downloading spanish models (version 4.1.0) into directory /Users/eun-youngchristinapark/Documents/stanza_corenlp...
Downloading http://nlp.stanford.edu/software/stanford-corenlp-4.1.0-models-spanish.jar: 100%|██████████| 275M/275M [00:48<00:00, 5.66MB/s] 


In [12]:
from stanza.server import CoreNLPClient

### English Parser works with Stanza. 

In [13]:
text = "I am going to school."
with CoreNLPClient(
        classpath = '/Users/eun-youngchristinapark/Documents/stanza_corenlp/*',
        annotators=['parse'],
        timeout=30000,
        memory='5G') as client:
    ann = client.annotate(text)

2021-05-28 16:02:29 INFO: Writing properties to tmp file: corenlp_server-3f2cb5d8377a4d64.props
2021-05-28 16:02:29 INFO: Starting server with command: java -Xmx5G -cp /Users/eun-youngchristinapark/Documents/stanza_corenlp/* edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 30000 -threads 5 -maxCharLength 100000 -quiet False -serverProperties corenlp_server-3f2cb5d8377a4d64.props -annotators parse -preload -outputFormat serialized


In [14]:
sentence = ann.sentence[0]
constituency_parse = sentence.parseTree
print(constituency_parse)

child {
  child {
    child {
      child {
        value: "I"
      }
      value: "PRP"
      score: -1.9594900608062744
    }
    value: "NP"
    score: -2.8210573196411133
  }
  child {
    child {
      child {
        value: "am"
      }
      value: "VBP"
      score: -2.893768548965454
    }
    child {
      child {
        child {
          value: "going"
        }
        value: "VBG"
        score: -3.2131869792938232
      }
      child {
        child {
          child {
            value: "to"
          }
          value: "IN"
          score: -1.8374395370483398
        }
        child {
          child {
            child {
              value: "school"
            }
            value: "NN"
            score: -6.9619140625
          }
          value: "NP"
          score: -9.055895805358887
        }
        value: "PP"
        score: -11.415947914123535
      }
      value: "VP"
      score: -16.762569427490234
    }
    value: "VP"
    score: -23.903934478759766
  }

### Spanish parser does not work with Stanza. 

In [15]:
spanish_text = "Voy a la escuela."

#CUSTOM_PROPS = {"parse.model": "edu/stanford/nlp/models/srparser/spanishSR.beam.ser.gz"}

#with CoreNLPClient(properties=CUSTOM_PROPS, output_format="json") as client:
    
with CoreNLPClient(
        properties = 'es',
        classpath = '/Users/eun-youngchristinapark/Documents/stanza_corenlp/*',
        annotators=['parse'],
        timeout=30000,
        memory='5G') as client:
    spanish_ann = client.annotate(spanish_text)
    #ann = client.annotate(text)

2021-05-28 16:02:47 INFO: Using CoreNLP default properties for: spanish.  Make sure to have spanish models jar (available for download here: https://stanfordnlp.github.io/CoreNLP/) in CLASSPATH
2021-05-28 16:02:47 INFO: Starting server with command: java -Xmx5G -cp /Users/eun-youngchristinapark/Documents/stanza_corenlp/* edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 30000 -threads 5 -maxCharLength 100000 -quiet False -serverProperties spanish -annotators parse -preload -outputFormat serialized


AnnotationException: edu.stanford.nlp.io.RuntimeIOException: java.io.InvalidClassException: edu.stanford.nlp.parser.shiftreduce.Weight; local class incompatible: stream classdesc serialVersionUID = 1, local class serialVersionUID = 3

### Spanish Dependency Parser works with Stanza. 

In [19]:
spanish_text = "María usa un hermoso vestido azul."

#CUSTOM_PROPS = {"parse.model": "edu/stanford/nlp/models/srparser/spanishSR.beam.ser.gz"}

#with CoreNLPClient(properties=CUSTOM_PROPS, output_format="json") as client:
    
with CoreNLPClient(
        properties = 'es',
        classpath = '/Users/eun-youngchristinapark/Documents/stanza_corenlp/*',
        annotators=['depparse'],
        timeout=30000,
        memory='5G') as client:
    spanish_ann = client.annotate(spanish_text)
    #ann = client.annotate(text)

2021-05-28 16:06:17 INFO: Using CoreNLP default properties for: spanish.  Make sure to have spanish models jar (available for download here: https://stanfordnlp.github.io/CoreNLP/) in CLASSPATH
2021-05-28 16:06:17 INFO: Starting server with command: java -Xmx5G -cp /Users/eun-youngchristinapark/Documents/stanza_corenlp/* edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 30000 -threads 5 -maxCharLength 100000 -quiet False -serverProperties spanish -annotators depparse -preload -outputFormat serialized


In [20]:
sentence = spanish_ann.sentence[0]
#print(sentence)
print(sentence.basicDependencies)

node {
  sentenceIndex: 0
  index: 1
}
node {
  sentenceIndex: 0
  index: 2
}
node {
  sentenceIndex: 0
  index: 3
}
node {
  sentenceIndex: 0
  index: 4
}
node {
  sentenceIndex: 0
  index: 5
}
node {
  sentenceIndex: 0
  index: 6
}
node {
  sentenceIndex: 0
  index: 7
}
edge {
  source: 2
  target: 1
  dep: "nsubj"
  isExtra: false
  sourceCopy: 0
  targetCopy: 0
  language: Spanish
}
edge {
  source: 2
  target: 5
  dep: "obj"
  isExtra: false
  sourceCopy: 0
  targetCopy: 0
  language: Spanish
}
edge {
  source: 2
  target: 7
  dep: "punct"
  isExtra: false
  sourceCopy: 0
  targetCopy: 0
  language: Spanish
}
edge {
  source: 5
  target: 3
  dep: "det"
  isExtra: false
  sourceCopy: 0
  targetCopy: 0
  language: Spanish
}
edge {
  source: 5
  target: 4
  dep: "amod"
  isExtra: false
  sourceCopy: 0
  targetCopy: 0
  language: Spanish
}
edge {
  source: 5
  target: 6
  dep: "amod"
  isExtra: false
  sourceCopy: 0
  targetCopy: 0
  language: Spanish
}
root: 2

