Code referred from https://stanfordnlp.github.io/CoreNLP/tutorials.html  
Vectorizing Part of speech

In [3]:
from stanfordcorenlp import StanfordCoreNLP
import json

In [4]:
class StanfordNLP:
    #Need to run the StanfordCoreNLP server first.
    def __init__(self, host='http://localhost', port=9000):
        self.nlp = StanfordCoreNLP(host, port=port, timeout=30000)
        self.props = {
            'annotators': 'tokenize,ssplit,pos,lemma,ner',
            'pipelineLanguage': 'en',
            'outputFormat': 'json'
        }

    def word_tokenize(self, sentence):
        return self.nlp.word_tokenize(sentence)

    def pos(self, sentence):
        return self.nlp.pos_tag(sentence)

    def ner(self, sentence):
        return self.nlp.ner(sentence)

    def parse(self, sentence):
        return self.nlp.parse(sentence)

    def dependency_parse(self, sentence):
        return self.nlp.dependency_parse(sentence)

    def annotate(self, sentence):
        return json.loads(self.nlp.annotate(sentence, properties=self.props))

In [5]:
sNLP = StanfordNLP()
text = 'John met Susan in the mall. She told him that she is traveling to Europe next week.'

json = sNLP.annotate(text)
pos = sNLP.pos(text)
tokens = sNLP.word_tokenize(text)
ner = sNLP.ner(text)
parse = sNLP.parse(text)
dep_parse = sNLP.dependency_parse(text)


# print(json)
print(pos)
print(tokens)
# print(ner)
# print(parse)
# print(dep_parse)

[('John', 'NNP'), ('met', 'VBD'), ('Susan', 'NNP'), ('in', 'IN'), ('the', 'DT'), ('mall', 'NN'), ('.', '.'), ('She', 'PRP'), ('told', 'VBD'), ('him', 'PRP'), ('that', 'IN'), ('she', 'PRP'), ('is', 'VBZ'), ('traveling', 'VBG'), ('to', 'TO'), ('Europe', 'NNP'), ('next', 'IN'), ('week', 'NN'), ('.', '.')]
['John', 'met', 'Susan', 'in', 'the', 'mall', '.', 'She', 'told', 'him', 'that', 'she', 'is', 'traveling', 'to', 'Europe', 'next', 'week', '.']


Used the tags mentioned here - https://stackoverflow.com/questions/1833252/java-stanford-nlp-part-of-speech-labels

In [6]:
pos_vector1 = [] #POS Vector for sentece 1
pos_vector2 = [] #POS Vector for sentece 2

list_pos = ['CC', 'CD', 'DT', 'EX', 'FW', 'IN', 'JJ', 'JJR', 'JJS', 'LS', 'MD', 'NN', 'NNS', 'NNP', 'NNPS', 'PDT', 'POS', 'PRP', 'PRP$', 'RB', 'RBR', 'RBS', 'RP', 'SYM', 'TO', 'UH', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB']
dict_1 = {}
dict_2 = {}
for i in list_pos:
    dict_1[i]=0
    dict_2[i]=0

counter = 0
for i in range (len(tokens)):
    if counter == 0:
        if(tokens[i]=='.'):
            counter += 1
            continue
        dict_1[pos[i][1]]+=1
    else:
        if(tokens[i]=='.'):
            break
        dict_2[pos[i][1]]+=1

for i in list_pos:
    pos_vector1.append(dict_1[i])
    pos_vector2.append(dict_2[i])

In [7]:
print(pos_vector1)
print(pos_vector2)

[0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
