## TP4: Dependency Parsing

In [1]:
import spacy
from spacy.tokens import Doc
from spacy.training.example import Example

In [2]:
# 1: Data
training_data = [
    ("There are few securities .", {"heads": [2, 1, 3, 3, 1], "deps": ["expl", "ROOT","Quantity", "dobj","punct"]}),
    ("They have many stocks .", {"heads": [1, 2, 4, 2, 4], "deps": ["nsubj", "ROOT","Quantity", "dobj","punct"]}),
    ("Giv me somme apples .", {"heads": [1, 2, 4, 2, 4], "deps": ["nsubj", "ROOT", "Quantity", "dobj","punct"]}),
    ("they read all this .", {"heads": [2, 1, 3, 3, 1], "deps": ["nsubj", "ROOT", "Quantity", "dobj","punct"]}),
    ("Open the half box .", {"heads": [1, 2, 4, 2, 4], "deps": ["nsubj", "ROOT", "Quantity", "dobj","punct"]}),
    ("they have numerous caracters .", {"heads": [1, 2, 4, 2, 4], "deps": ["nsubj", "ROOT", "Quantity", "dobj","punct"]}),
    ("I bay enough vegetables .", {"heads": [1, 2, 4, 2, 4], "deps": ["nsubj", "ROOT", "Quantity", "dobj","punct"]}),
    ("I ate whole of them .", {"heads": [1, 2, 4, 2, 4, 3], "deps": ["nsubj", "ROOT", "Quantity", "dobj","punct", "dobj"]})
    ]

In [3]:
# Load a pre-trained English model
nlp = spacy.load("en_core_web_sm")

In [4]:
# 2:
# Add a custom label for quantity adjectives to the existing parser
dep = nlp.get_pipe("parser")
dep.add_label("Quantity")

1

In [5]:
# 3:
# Train the parser on the training dataset
n_iter = 50
for j in range(n_iter):
    for text, annotations in training_data:
        words = text.split()
        heads = annotations["heads"]
        deps = annotations["deps"]

        # Ensure that the lengths match
        if len(words) == len(heads) == len(deps):
            doc = Doc(nlp.vocab, words=words)
            for i in range(len(words)):
                token = doc[i]
                token.dep_ = deps[i]  # maj
                if heads[i] != 0:
                    token.head = doc[heads[i]]

            example = Example.from_dict(doc, annotations)
            nlp.update([example], drop=0.5, losses={})
        else:
            print(f"Error in data: Length mismatch in sentence '{text}'.")


In [6]:
# Save the trained parser
nlp.to_disk("quantity_parser")
print("Fin de creation de parser...")

Fin de creation de parser...


In [8]:
import os
print(os.getcwd())


/content


In [12]:
quantity_parser = spacy.load("/content/quantity_parser")

# Test data
test_data = [
    "They have many problems .",
    "Open the half box .",
    "I like all of you ."
]

In [13]:

# Process the test data using the loaded parser
for text in test_data:
    doc = quantity_parser(text)

    # Extract and print dependency information
    print("Text:", text)
    print("Dependencies:", [(token.text, token.dep_) for token in doc])
    print()

Text: They have many problems .
Dependencies: [('They', 'nsubj'), ('have', 'dep'), ('many', 'Quantity'), ('problems', 'dobj'), ('.', 'ROOT')]

Text: Open the half box .
Dependencies: [('Open', 'nsubj'), ('the', 'dep'), ('half', 'Quantity'), ('box', 'dobj'), ('.', 'ROOT')]

Text: I like all of you .
Dependencies: [('I', 'ROOT'), ('like', 'ROOT'), ('all', 'Quantity'), ('of', 'dobj'), ('you', 'ROOT'), ('.', 'dep')]



In [15]:
from spacy import displacy

# Process the test data using the loaded parser
for text in test_data:
    doc = quantity_parser(text)

    # Extract and print dependency information
    print("Text:", text)
    print("Dependencies:", [(token.text, token.dep_) for token in doc])

    # Visualize the dependency parse tree with black background and white text
    options = {'distance': 90, 'bg': '#000000', 'color': '#ffffff'}
    displacy.render(doc, style="dep", jupyter=True, options=options)
    print()


Text: They have many problems .
Dependencies: [('They', 'nsubj'), ('have', 'dep'), ('many', 'Quantity'), ('problems', 'dobj'), ('.', 'ROOT')]



Text: Open the half box .
Dependencies: [('Open', 'nsubj'), ('the', 'dep'), ('half', 'Quantity'), ('box', 'dobj'), ('.', 'ROOT')]



Text: I like all of you .
Dependencies: [('I', 'ROOT'), ('like', 'ROOT'), ('all', 'Quantity'), ('of', 'dobj'), ('you', 'ROOT'), ('.', 'dep')]



