In [5]:
%pip install stanza
import stanza
import copy

# Load the English pipeline
stanza.download('en')  # Download the English model
nlp = stanza.Pipeline('en')  # Initialize the English pipeline

Note: you may need to restart the kernel to use updated packages.


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.5.0.json: 216kB [00:00, 15.4MB/s]                    
2023-08-31 01:45:49 INFO: Downloading default packages for language: en (English) ...
2023-08-31 01:45:50 INFO: File exists: /Users/sally/stanza_resources/en/default.zip
2023-08-31 01:45:54 INFO: Finished downloading models and saved to /Users/sally/stanza_resources.
2023-08-31 01:45:54 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES
Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.5.0.json: 216kB [00:00, 28.1MB/s]                    
2023-08-31 01:45:56 INFO: Loading these models for language: en (English):
| Processor    | Package   |
----------------------------
| tokenize     | combined  |
| pos          | combined  |
| lemma        | combined  |
| cons

In [6]:
doc = nlp("Bill seems honest")
sentence = doc.sentences[0].to_dict()
sentence

[{'id': 1,
  'text': 'Bill',
  'lemma': 'Bill',
  'upos': 'PROPN',
  'xpos': 'NNP',
  'feats': 'Number=Sing',
  'head': 2,
  'deprel': 'nsubj',
  'start_char': 0,
  'end_char': 4,
  'ner': 'S-PERSON',
  'multi_ner': ('S-PERSON',)},
 {'id': 2,
  'text': 'seems',
  'lemma': 'seem',
  'upos': 'VERB',
  'xpos': 'VBZ',
  'feats': 'Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin',
  'head': 0,
  'deprel': 'root',
  'start_char': 5,
  'end_char': 10,
  'ner': 'O',
  'multi_ner': ('O',)},
 {'id': 3,
  'text': 'honest',
  'lemma': 'honest',
  'upos': 'ADJ',
  'xpos': 'JJ',
  'feats': 'Degree=Pos',
  'head': 2,
  'deprel': 'xcomp',
  'start_char': 11,
  'end_char': 17,
  'ner': 'O',
  'multi_ner': ('O',)}]

In [7]:
sent = copy.deepcopy(sentence)

In [8]:
def makeCoarse(x):
    if ":" in x:
        return x[: x.index(":")]
    return x

In [9]:
def get_all_children(sentence):
    """ Coarsify all the dependent relations, track all children """
    for line in sentence:
        # make the dependency relation label coarse (ignore stuff after colon)
        line["coarse_dep"] = makeCoarse(line["deprel"])

        # identify the root, and skip to next word
        if line["coarse_dep"] == "root":
            root = line["id"]
            continue

        if line["coarse_dep"].startswith("punct"):
            continue

        headIndex = line["head"] - 1
        sentence[headIndex]["children"] = sentence[headIndex].get("children", []) + [line["id"]]

In [10]:
get_all_children(sent)
sent

[{'id': 1,
  'text': 'Bill',
  'lemma': 'Bill',
  'upos': 'PROPN',
  'xpos': 'NNP',
  'feats': 'Number=Sing',
  'head': 2,
  'deprel': 'nsubj',
  'start_char': 0,
  'end_char': 4,
  'ner': 'S-PERSON',
  'multi_ner': ('S-PERSON',),
  'coarse_dep': 'nsubj'},
 {'id': 2,
  'text': 'seems',
  'lemma': 'seem',
  'upos': 'VERB',
  'xpos': 'VBZ',
  'feats': 'Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin',
  'head': 0,
  'deprel': 'root',
  'start_char': 5,
  'end_char': 10,
  'ner': 'O',
  'multi_ner': ('O',),
  'children': [1, 3],
  'coarse_dep': 'root'},
 {'id': 3,
  'text': 'honest',
  'lemma': 'honest',
  'upos': 'ADJ',
  'xpos': 'JJ',
  'feats': 'Degree=Pos',
  'head': 2,
  'deprel': 'xcomp',
  'start_char': 11,
  'end_char': 17,
  'ner': 'O',
  'multi_ner': ('O',),
  'coarse_dep': 'xcomp'}]

In [11]:
def swap_order(verb_idx, obj_idx, sentence, result):
# Helper function for processing verb and object chunks
    # verb_list = sentence[verb_idx]['children']
    # obj_list = sentence[obj_idx]['children']
    # verb_list = verb_list - obj_list
    result[verb_idx], result[obj_idx] = sentence[obj_idx]["id"], sentence[verb_idx]["id"]

In [12]:
def swap(sentence, root):
# DFS for swaping verb and object
# TODO: edge cases: 1. multiple obj
#                   2. went to school happily -> to school went happily
    result = [i for i in range(1, len(sentence) + 1)]
    stack = [root]
    visited = set()

    while stack:
        node = stack.pop()
        if node not in visited:
            visited.add(node)
            print(node) # print out index of the node being processed

            if not sentence[node-1].get("children", None):
                continue
            for c in sentence[node-1]["children"]:
                if sentence[node-1]['upos'] == 'VERB' and sentence[c-1]['coarse_dep'] == 'obj':
                    verb_idx, obj_idx = node - 1, c - 1
                    swap_order(verb_idx, obj_idx, sentence, result)
                if c not in visited:
                    stack.append(c)
    return result

In [13]:
swap(sent, 2)

2
3
1


[1, 2, 3]