In [1]:
import json
from typing import List, Dict, Optional

class Word:
    def __init__(self, text: str, lemma: str, pos: str, index: int):
        self.text = text
        self.lemma = lemma
        self.pos = pos
        self.index = index  # 1-based index as in CoreNLP
        self.head = None  # Will be set later
        self.dep = None  # Dependency relation
        self.children = []  # List of Word objects
    
    def __repr__(self):
        return f"Word(text='{self.text}', lemma='{self.lemma}', pos='{self.pos}')"

class Sentence:
    def __init__(self, words: List[Word]):
        self.words = words
    
    def __repr__(self):
        return f"Sentence({self.words})"
    
    def get_root(self) -> Optional[Word]:
        for word in self.words:
            if word.dep == 'ROOT':
                return word
        
        # Fallback: try to find a word with no head
        for word in self.words:
            if word.head is None:
                return word
        
        return None


def load_parse_results(json_file: str) -> List[Sentence]:
    with open(json_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    sentences = []
    for sentence_data in data['sentences']:
        words_data = sentence_data['tokens']
        deps_data = sentence_data['basicDependencies']
        
        # Create Word objects
        words = [Word(w['word'], w['lemma'], w['pos'], w['index']) for w in words_data]
        
        # Build dependency relations
        for dep in deps_data:
            if dep['dep'] == 'ROOT':
                continue
            
            governor_idx = dep['governor']
            dependent_idx = dep['dependent']
            
            governor = words[governor_idx - 1]
            dependent = words[dependent_idx - 1]
            
            dependent.head = governor
            dependent.dep = dep['dep']
            governor.children.append(dependent)
        
        sentences.append(Sentence(words))
    
    return sentences


json_file = "ai.en.txt.json"
    


task 40

In [2]:
# Problem 40: Read parse results and show first sentence
sentences = load_parse_results(json_file)
first_sentence = sentences[0]
print(first_sentence)



Sentence([Word(text='In', lemma='in', pos='IN'), Word(text='computer', lemma='computer', pos='NN'), Word(text='science', lemma='science', pos='NN'), Word(text=',', lemma=',', pos=','), Word(text='artificial', lemma='artificial', pos='JJ'), Word(text='intelligence', lemma='intelligence', pos='NN'), Word(text='-LRB-', lemma='-lrb-', pos='-LRB-'), Word(text='AI', lemma='ai', pos='NN'), Word(text='-RRB-', lemma='-rrb-', pos='-RRB-'), Word(text=',', lemma=',', pos=','), Word(text='sometimes', lemma='sometimes', pos='RB'), Word(text='called', lemma='call', pos='VBN'), Word(text='machine', lemma='machine', pos='NN'), Word(text='intelligence', lemma='intelligence', pos='NN'), Word(text=',', lemma=',', pos=','), Word(text='is', lemma='be', pos='VBZ'), Word(text='intelligence', lemma='intelligence', pos='NN'), Word(text='demonstrated', lemma='demonstrate', pos='VBN'), Word(text='by', lemma='by', pos='IN'), Word(text='machines', lemma='machine', pos='NNS'), Word(text=',', lemma=',', pos=','), Wor

task 41

In [3]:
# Problem 41: Show governor-dependent pairs for first sentence
sentences = load_parse_results(json_file)
first_sentence = sentences[0]

print("Governor-Dependent pairs:")
for word in first_sentence.words:
    if word.head:
        print(f"{word.head.text} ({word.head.pos}) <-{word.dep}-- {word.text} ({word.pos})")


Governor-Dependent pairs:
science (NN) <-case-- In (IN)
science (NN) <-compound-- computer (NN)
called (VBN) <-nmod-- science (NN)
called (VBN) <-punct-- , (,)
intelligence (NN) <-amod-- artificial (JJ)
called (VBN) <-nsubj-- intelligence (NN)
AI (NN) <-punct-- -LRB- (-LRB-)
intelligence (NN) <-appos-- AI (NN)
AI (NN) <-punct-- -RRB- (-RRB-)
intelligence (NN) <-punct-- , (,)
called (VBN) <-advmod-- sometimes (RB)
intelligence (NN) <-compound-- machine (NN)
called (VBN) <-xcomp-- intelligence (NN)
called (VBN) <-punct-- , (,)
called (VBN) <-advcl-- is (VBZ)
is (VBZ) <-nsubj-- intelligence (NN)
intelligence (NN) <-acl-- demonstrated (VBN)
machines (NNS) <-case-- by (IN)
demonstrated (VBN) <-nmod-- machines (NNS)
intelligence (NN) <-punct-- , (,)
contrast (NN) <-case-- in (IN)
intelligence (NN) <-nmod-- contrast (NN)
intelligence (NN) <-case-- to (TO)
intelligence (NN) <-det-- the (DT)
intelligence (NN) <-amod-- natural (JJ)
contrast (NN) <-nmod-- intelligence (NN)
intelligence (NN) <-acl

task 42

In [4]:
# Problem 42: Show root words
sentences = load_parse_results(json_file)
    
print("Root words:")
for i, sentence in enumerate(sentences, 1):
    root = sentence.get_root()
    print(f"Sentence {i}: {root.text} ({root.pos})")


Root words:
Sentence 1: called (VBN)
Sentence 2: define (VBP)
Sentence 3: used (VBN)
Sentence 4: removed (VBN)
Sentence 5: says (VBZ)
Sentence 6: excluded (VBN)
Sentence 7: classified (VBN)
Sentence 8: founded (VBN)
Sentence 9: divided (VBN)
Sentence 10: based (VBN)
Sentence 11: based (VBN)
Sentence 12: include (VBP)
Sentence 13: goals (NNS)
Sentence 14: include (VBP)
Sentence 15: used (VBN)
Sentence 16: draws (VBZ)
Sentence 17: founded (VBN)
Sentence 18: raises (VBZ)
Sentence 19: explored (VBN)
Sentence 20: consider (VBP)
Sentence 21: believe (VBP)
Sentence 22: experienced (VBN)
Sentence 23: appeared (VBD)
Sentence 24: raised (VBD)
Sentence 25: study (NN)
Sentence 26: led (VBD)
Sentence 27: known (VBN)
Sentence 28: led (VBD)
Sentence 29: Turing (VBG)
Sentence 30: work (NN)
Sentence 31: born (VBN)
Sentence 32: became (VBD)
Sentence 33: produced (VBD)
Sentence 34: established (VBN)
Sentence 35: optimistic (JJ)
Sentence 36: agreed (VBD)
Sentence 37: failed (VBD)
Sentence 38: slowed (VBD)

task 43

In [5]:

# Problem 43: Show verb governors and noun dependents
sentences = load_parse_results(json_file)

print("Verb governors with noun dependents:")
for sentence in sentences:
    for word in sentence.words:
        if word.pos.startswith('VB'):  # Verb
            for child in word.children:
                if child.pos.startswith('NN'):  # Noun
                    print(f"{word.text} ({word.pos}) <-{child.dep}-- {child.text} ({child.pos})")


Verb governors with noun dependents:
called (VBN) <-nmod-- science (NN)
called (VBN) <-nsubj-- intelligence (NN)
called (VBN) <-xcomp-- intelligence (NN)
is (VBZ) <-nsubj-- intelligence (NN)
demonstrated (VBN) <-nmod-- machines (NNS)
displayed (VBN) <-nmod-- humans (NNS)
define (VBP) <-nsubj-- textbooks (NNS)
define (VBP) <-dobj-- field (NN)
define (VBP) <-nmod-- study (NN)
perceives (VBZ) <-dobj-- environment (NN)
takes (VBZ) <-dobj-- actions (NNS)
maximize (VBP) <-dobj-- chance (NN)
achieving (VBG) <-dobj-- goals (NNS)
used (VBN) <-nsubjpass-- intelligence (NN)
describe (VB) <-dobj-- machines (NNS)
mimic (VBP) <-dobj-- functions (NNS)
associate (VBP) <-nsubj-- humans (NNS)
associate (VBP) <-nmod-- mind (NN)
solving (VBG) <-nsubj-- problem (NN)
become (VBP) <-nsubj-- machines (NNS)
require (VB) <-dobj-- intelligence (NN)
removed (VBN) <-nsubjpass-- tasks (NNS)
removed (VBN) <-nmod-- definition (NN)
known (VBN) <-nmod-- effect (NN)
says (VBZ) <-nsubj-- quip (NN)
done (VBN) <-nsubjpass-

task 44

In [6]:
# Problem 44: Visualize dependency tree (simplified version)
sentence_idx = 0
def _print_tree(word: Word, level: int):
    print("  " * level + f"{word.text} ({word.pos})")
    for child in sorted(word.children, key=lambda x: x.index):
        _print_tree(child, level + 1)
sentences = load_parse_results(json_file)
sentence = sentences[sentence_idx]

# Simple text visualization
print("Dependency tree (text representation):")
root = sentence.get_root()
_print_tree(root, 0)


Dependency tree (text representation):
called (VBN)
  science (NN)
    In (IN)
    computer (NN)
  , (,)
  intelligence (NN)
    artificial (JJ)
    AI (NN)
      -LRB- (-LRB-)
      -RRB- (-RRB-)
    , (,)
  sometimes (RB)
  intelligence (NN)
    machine (NN)
  , (,)
  is (VBZ)
    intelligence (NN)
      demonstrated (VBN)
        machines (NNS)
          by (IN)
      , (,)
      contrast (NN)
        in (IN)
        intelligence (NN)
          to (TO)
          the (DT)
          natural (JJ)
          displayed (VBN)
            humans (NNS)
              by (IN)
              and (CC)
              animals (NNS)
  . (.)


task 45

In [7]:


# Problem 45: Extract SVO triples
sentences = load_parse_results(json_file)

print("SVO triples:")
for sentence in sentences:
    for word in sentence.words:
        if word.pos == 'VBD':  # Verb, past tense
            nsubj = None
            dobj = None
            
            for child in word.children:
                if child.dep == 'nsubj':
                    nsubj = child
                elif child.dep == 'dobj':
                    dobj = child
            
            if nsubj and dobj:
                print(f"({nsubj.text}, {word.text}, {dobj.text})")


SVO triples:
(characters, raised, many)
(this, led, researchers)
(They, produced, programs)
(governments, cut, research)
(project, inspired, U.S)
(development, enabled, development)
(match, defeated, champions)
(computers, enabled, advances)
(AlphaGo, won, games)
(AlphaGo, won, match)
(who, held, ranking)
(This, marked, completion)
(they, had, AI)
(China, accelerated, funding)
(that, undiscovered, swans)
(they, advocated, violence)
(researchers, developed, algorithms)
(that, imitated, reasoning)
(they, experienced, explosion)
(DeepMind, developed, intelligence)
(number, explored, connection)
(Some, built, machines)
(that, used, networks)
(one, developed, style)
(Simon, studied, skills)
(work, laid, foundations)
(team, used, results)
(people, used, algorithms)
(Schank, described, approaches)
(revolution, led, form)
(Researchers, rejected, AI)
(researchers, adopted, tools)
(language, permitted, level)
(each, cast, vote)
(Rosenblatt, invented, perceptron)
(Aizenberg, introduced, it)
(publ

task 46

In [8]:
# Problem 46: Expanded SVO triples with phrases
sentences = load_parse_results(json_file)
def _get_phrase(word: Word) -> str:
    """Get the full phrase for a word by including its compound/modifiers"""
    phrase = [word.text]
    
    # Include compound nouns (e.g., "Frank Rosenblatt")
    for child in word.children:
        if child.dep in ['compound', 'amod']:
            phrase.insert(0, child.text)
    
    return ' '.join(phrase)

print("Expanded SVO triples:")
for sentence in sentences:
    for word in sentence.words:
        if word.pos == 'VBD':  # Verb, past tense
            nsubj = None
            dobj = None
            
            for child in word.children:
                if child.dep == 'nsubj':
                    nsubj = _get_phrase(child)
                elif child.dep == 'dobj':
                    dobj = _get_phrase(child)
            
            if nsubj and dobj:
                print(f"({nsubj}, {word.text}, {dobj})")




Expanded SVO triples:
(characters, raised, many)
(this, led, researchers)
(They, produced, programs)
(U.S. governments, cut, exploratory research)
(computer generation fifth project, inspired, U.S)
(development, enabled, development)
(exhibition show quiz Jeopardy! match, defeated, Jeopardy! greatest champions)
(Faster computers, enabled, advances)
(AlphaGo, won, games)
(AlphaGo, won, three-game match)
(who, held, No. world ranking)
(This, marked, completion)
(they, had, incorporated AI)
(China, accelerated, government funding)
(that, undiscovered, black swans)
(they, advocated, violence)
(researchers, developed, algorithms)
(that, imitated, step-by-step reasoning)
(they, experienced, combinatorial explosion)
(DeepMind, developed, artificial generalized intelligence)
(number, explored, connection)
(Some, built, machines)
(that, used, electronic networks)
(one, developed, own style)
(Herbert Economist Simon, studied, problem-solving human skills)
(work, laid, foundations)
(research team

task 47

In [9]:
# Problem 47: Triples from passive sentences
sentences = load_parse_results(json_file)

print("Triples from passive sentences:")
for sentence in sentences:
    for word in sentence.words:
        if word.pos == 'VBN':  # Verb, past participle (often passive)
            nsubjpass = None
            prep_objs = {}
            
            for child in word.children:
                if child.dep == 'nsubjpass':
                    nsubjpass = _get_phrase(child)
                elif child.dep.startswith('prep'):
                    # Look for pobj of the preposition
                    for grandchild in child.children:
                        if grandchild.dep == 'pobj':
                            prep_objs[child.text] = _get_phrase(grandchild)
            
            if nsubjpass and prep_objs:
                for prep, obj in prep_objs.items():
                    predicate = f"{word.text}-{prep}"
                    print(f"({nsubjpass}, {predicate}, {obj})")


Triples from passive sentences:


task 48

In [10]:
# Problem 48: Paths from root to nouns
sentences = load_parse_results(json_file)
def _get_path_to_root(word: Word) -> List[str]:
    path = [word.text]
    current = word
    
    while current.head and current.dep != 'ROOT':
        current = current.head
        path.append(current.text)
    
    return path[::-1]  # Reverse to show root to noun

print("Paths from root to nouns:")
for sentence in sentences:
    root = sentence.get_root()
    for word in sentence.words:
        if word.pos.startswith('NN'):  # Noun
            path = _get_path_to_root(word)
            print(" -> ".join(path))



Paths from root to nouns:
called -> science -> computer
called -> science
called -> intelligence
called -> intelligence -> AI
called -> intelligence -> machine
called -> intelligence
called -> is -> intelligence
called -> is -> intelligence -> demonstrated -> machines
called -> is -> intelligence -> contrast
called -> is -> intelligence -> contrast -> intelligence
called -> is -> intelligence -> contrast -> intelligence -> displayed -> humans
called -> is -> intelligence -> contrast -> intelligence -> displayed -> humans -> animals
define -> textbooks -> AI
define -> textbooks
define -> field
define -> study
define -> study -> agents
define -> study -> agents -> device
define -> study -> agents -> device -> perceives -> environment
define -> study -> agents -> device -> perceives -> takes -> actions
define -> study -> agents -> device -> perceives -> takes -> actions -> maximize -> chance
define -> study -> agents -> device -> perceives -> takes -> actions -> maximize -> chance -> achi

task 49

In [11]:
# Problem 49: Shortest path between two nouns
sentences = load_parse_results(json_file)

def _get_shortest_path(word1: Word, word2: Word) -> str:
    path1 = _get_path_to_root(word1)
    path2 = _get_path_to_root(word2)
    
    # Find lowest common ancestor
    lca = None
    for i, (w1, w2) in enumerate(zip(path1, path2)):
        if w1 != w2:
            lca = path1[i-1] if i > 0 else None
            break
    else:
        lca = path1[min(len(path1), len(path2)) - 1]
    
    if not lca:
        return f"X <- {word1.text} -> Y"
    
    # Get path from word1 to LCA
    part1 = []
    current = word1
    while current.text != lca:
        part1.append(current.text)
        current = current.head
    
    # Get path from LCA to word2
    part2 = []
    current = word2
    while current.text != lca:
        part2.append(current.text)
        current = current.head
    
    # Build the path string
    path_str = "X"
    for word in part1[:-1]:
        path_str += f" <- {word}"
    if part1:
        path_str += f" <- {part1[-1]}"
    
    if part1 and part2:
        path_str += " -> "
    elif not part1 and part2:
        path_str += " -> "
    
    if part2:
        path_str += " -> ".join(reversed(part2))
    path_str += " -> Y"
    
    return path_str.replace("X <-", "X").replace("-> Y", "Y")


print("Shortest paths between noun pairs:")
for sentence in sentences:
    nouns = [w for w in sentence.words if w.pos.startswith('NN')]
    
    for i in range(len(nouns)):
        for j in range(i + 1, len(nouns)):
            path = _get_shortest_path(nouns[i], nouns[j])
            print(path)


Shortest paths between noun pairs:
X computer Y
X computer <- science -> intelligence Y
X computer <- science -> intelligence -> AI Y
X computer <- science -> intelligence -> machine Y
X computer <- science -> intelligence Y
X computer <- science -> is -> intelligence Y
X computer <- science -> is -> intelligence -> demonstrated -> machines Y
X computer <- science -> is -> intelligence -> contrast Y
X computer <- science -> is -> intelligence -> contrast -> intelligence Y
X computer <- science -> is -> intelligence -> contrast -> intelligence -> displayed -> humans Y
X computer <- science -> is -> intelligence -> contrast -> intelligence -> displayed -> humans -> animals Y
X science -> intelligence Y
X science -> intelligence -> AI Y
X science -> intelligence -> machine Y
X science -> intelligence Y
X science -> is -> intelligence Y
X science -> is -> intelligence -> demonstrated -> machines Y
X science -> is -> intelligence -> contrast Y
X science -> is -> intelligence -> contrast -> 