# Collecting VerbNet Terms

This notebook parses all the VerbNet .XML definitions - extracting all the possible PREDicates in the FRAME SEMANTICS and the ARG type-value tuples. This will allow DNA to understand/account for all the semantics that can be expressed.
    
An example XML structure is:
```
<VNCLASS xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" ID="dedicate-79" ...> 
    <MEMBERS>
        <MEMBER name="dedicate" wn="dedicate%2:32:00" grouping="dedicate.01"/>
        <MEMBER name="devote" wn="devote%2:32:00" grouping="devote.01"/>
        <MEMBER name="commit" wn="commit%2:32:01 commit%2:40:00" grouping="commit.02"/>
    </MEMBERS>
    <THEMROLES>
        ...
    </THEMROLES>
    <FRAMES>
        <FRAME>
            <DESCRIPTION descriptionNumber="8.1" primary="NP V NP S_ING" secondary="NP-P-ING-SC; to-PP" .../>
            <EXAMPLES>
                <EXAMPLE>I dedicated myself to the cause.</EXAMPLE>
            </EXAMPLES>
            <SYNTAX>
                <NP value="Agent">
                    <SYNRESTRS/>
                </NP>
                <VERB/>
                <NP value="Theme">
                    <SYNRESTRS/>
                </NP>
                <PREP value="to">
                    <SYNRESTRS/>
                </PREP>
                <NP value="Goal">
                    <SYNRESTRS/>
                </NP>
            </SYNTAX>
            <SEMANTICS>
                <PRED value="dedicate">
                    <ARGS>
                        <ARG type="Event" value="during(E)"/>
                        <ARG type="ThemRole" value="Agent"/>
                        <ARG type="ThemRole" value="Theme"/>
                        <ARG type="ThemRole" value="Goal"/>
                    </ARGS>
                </PRED>
            </SEMANTICS>
        </FRAME>
        <FRAME>
            <DESCRIPTION descriptionNumber="0.2" primary="NP V NP PP.goal" secondary="NP-PP; to-PP" .../>
            <EXAMPLES>
                <EXAMPLE>I dedicated myself to the cause.</EXAMPLE>
            </EXAMPLES>
            <SYNTAX>
                <NP value="Agent">
                    <SYNRESTRS/>
                </NP>
                <VERB/>
                <NP value="Theme">
                    <SYNRESTRS/>
                </NP>
                <PREP value="to">
                    <SELRESTRS/>
                </PREP>
                <NP value="Goal">
                    <SYNRESTRS>
                        <SYNRESTR Value="-" type="sentential"/>
                    </SYNRESTRS>
                </NP>
            </SYNTAX>
            <SEMANTICS>
                <PRED value="dedicate">
                    <ARGS>
                        <ARG type="Event" value="during(E)"/>
                        <ARG type="ThemRole" value="Agent"/>
                        <ARG type="ThemRole" value="Theme"/>
                        <ARG type="ThemRole" value="Goal"/>
                    </ARGS>
                </PRED>
            </SEMANTICS>
        </FRAME>
    </FRAMES>
    <SUBCLASSES/>
</VNCLASS>
```

The above results in capturing the following detail:
* The possible PREDicates in the FRAME SEMANTICS => 'dedicate'
* The ARG type-value tuples =>
  * 'Event', 'during(E)'
  * 'ThemRole', 'Agent'
  * 'ThemRole', 'Theme'
  * 'ThemRole', 'Goal'
  

In [1]:
# Imports
from pathlib import Path
import xml.etree.ElementTree as ET

In [2]:
# Constants
verbnet_dir = '/Users/andreaw/Documents/VerbNet3.3'

In [4]:
preds = set()
args = set()

def get_arg_details(etree):
    for arg in etree.findall('./FRAMES/FRAME/SEMANTICS/PRED/ARGS/ARG'):
        args.add((arg.attrib["type"], arg.attrib["value"]))
    
    # Recursively process the subclasses
    for subclass in etree.findall('./SUBCLASSES/VNSUBCLASS'):
        get_arg_details(subclass)

def get_pred_details(etree):
    for pred in etree.findall('./FRAMES/FRAME/SEMANTICS/PRED'):
        preds.add(pred.attrib["value"])
        
    # Recursively process the subclasses
    for subclass in etree.findall('./SUBCLASSES/VNSUBCLASS'):
        get_pred_details(subclass)
        

# Process each of the VerbNet files
file_list = Path(verbnet_dir).glob('**/*.xml')
for file_path in file_list:
    file_str = str(file_path)
    with open(file_str, 'r') as xml_file:
        xml_in = xml_file.read()
        
    # Create the tree
    vn_class = ET.fromstring(xml_in)
    # Process from the top down, recursively
    get_pred_details(vn_class)
    get_arg_details(vn_class)
    
print(sorted(preds))
print()
print(sorted(args))

['Adv', 'about', 'act', 'adjust', 'admit', 'adopt', 'agree', 'alive', 'allow', 'apart', 'appear', 'apply_heat', 'apply_material', 'approve', 'assess', 'attached', 'attempt', 'attract', 'authority_relationship', 'avoid', 'base', 'begin', 'believe', 'benefit', 'body_motion', 'body_process', 'body_reflex', 'calculate', 'capacity', 'cause', 'change_value', 'characterize', 'charge', 'conclude', 'confined', 'conflict', 'confront', 'consider', 'conspire', 'contact', 'contain', 'continue', 'convert', 'cooked', 'cooperate', 'cope', 'correlate', 'cost', 'covered', 'created_image', 'declare', 'dedicate', 'defend', 'degradation_material_integrity', 'delay', 'depend', 'describe', 'designated', 'desire', 'destroyed', 'different', 'direction', 'disappear', 'discomfort', 'discover', 'do', 'earn', 'emit', 'emotional_state', 'end', 'enforce', 'ensure', 'equals', 'exceed', 'exert_force', 'exist', 'experience', 'express', 'filled_with', 'financial_interest_in', 'financial_relationship', 'flinch', 'free', 

In [5]:
# Process again for VerbNet 3.4
verbnet_dir = '/Users/andreaw/Documents/VerbNet3.4'

# Process each of the VerbNet files
file_list = Path(verbnet_dir).glob('**/*.xml')
for file_path in file_list:
    file_str = str(file_path)
    with open(file_str, 'r') as xml_file:
        xml_in = xml_file.read()
        
    # Create the tree
    vn_class = ET.fromstring(xml_in)
    # Process from the top down, recursively
    get_pred_details(vn_class)
    get_arg_details(vn_class)
    
print(sorted(preds))
print()
print(sorted(args))

['Adv', 'about', 'act', 'adjust', 'adjusted', 'admit', 'adopt', 'agree', 'alive', 'allow', 'apart', 'appear', 'apply_heat', 'apply_material', 'approve', 'assess', 'attached', 'attempt', 'attract', 'authority_relationship', 'avoid', 'base', 'be', 'becomes', 'begin', 'believe', 'benefit', 'body_motion', 'body_process', 'body_reflex', 'body_sensation', 'calculate', 'capacity', 'cause', 'change_value', 'characterize', 'charge', 'co-temporal', 'conclude', 'confined', 'conflict', 'confront', 'consider', 'conspire', 'contact', 'contain', 'continue', 'convert', 'cooked', 'cooperate', 'cope', 'correlate', 'correlated', 'cost', 'covered', 'create_image', 'created_image', 'declare', 'dedicate', 'defend', 'degradation_material_integrity', 'delay', 'depend', 'describe', 'designated', 'desire', 'destroyed', 'develop', 'different', 'direction', 'disappear', 'discomfort', 'discover', 'do', 'duration', 'earn', 'elliptical_motion', 'emit', 'emotional_state', 'end', 'enforce', 'ensure', 'equals', 'exceed