# Mining Wikipedia for Semantic Relations

I define some functions to extract some semantic relations from Wikipedia text.  Specifically, they are relations that could be useful for crafting "automatic allusions"---i.e., given a `target word` such as `"multitudinous"`, return a snippet of text containing an old proper noun, such as `"as Ashurbanipal's army"`. 

## Mining ⛏️

### Helper Functions

In [1]:
import spacy
nlp = spacy.load("en_core_web_lg")

In [2]:
def test_contiguous(numbers):
    """
    just make sure that ints are contiguous
    useful for making sure a span of text isn't missing any words
    """
    for en,n in enumerate(numbers[:-1]):
        if n+1!=numbers[en+1]:
            return False
    return True

print(test_contiguous([123,124,125,126]))
print(test_contiguous([123,20]))

True
False


In [3]:
def get_chunk(word,tempspacy):
    """
    get all ancestors of a word
    exclude bits that are connected by appos dependency
    """
    descendants = [i for i in tempspacy if (word.is_ancestor(i) or i==word)]
    to_ban = [i for i in descendants if (i.dep_ in ["appos"] and i!=word)]
    descendants_valid = [i for i in descendants if i not in to_ban]
    descendants_valid = [i for i in descendants_valid if not any([banned.is_ancestor(i) for banned in to_ban])]
    return descendants_valid

s = "Moatt of Carthage, a wise king, is very tired."
sp = nlp(s)
print(sp[0])
get_chunk(sp[0],sp)

Moatt


[Moatt, of, Carthage, ,]

In [4]:
def rclip(fragment,bad_ending_pos=["PUNCT","SPACE"]):#ok_ending_pos=["NOUN","PROPN"]):
    """
    remove everything from the right of a spacy sequence if doesn't end correctly
    """
    while len(fragment)>0 and (fragment[-1].pos_ in bad_ending_pos and fragment[-1].text!=")"):
            fragment.pop()
    return fragment


In [5]:
rclip(get_chunk(sp[0],sp))

[Moatt, of, Carthage]

### Functions for Mining Relations from Text Parsed with `spaCy`

In [27]:
def noun_of_nounphrase(tempspacy):
    """
    """
    to_return = []
    for t in tempspacy:
        if t.head.text.lower()=="of" and t.pos_ in ["NOUN","PROPN"]:
            fragment = rclip([i for i in tempspacy if (t.head.is_ancestor(i))])
            fragment_pos = [i.pos_ for i in fragment]
            fragment_indices = [i.i for i in fragment]
            if test_contiguous(fragment_indices):
                if "PROPN" in fragment_pos:
                    to_return.append({
                        "type":"of",
                        "target":str(t.head.head).lower(),
                        #"target_original":str(t.head.head),
                        "other_words":"|".join([child.text.lower() for child in t.head.head.children if child.dep_ in ['amod',"conj","advmod"]]),
                        "allusion":"of "+str(tempspacy[fragment_indices[0]:fragment_indices[-1]+1]),
                        "allusion2":None,
                        "target_pos":t.head.head.pos_,

                    })
    return to_return

noun_of_nounphrase(nlp("Beyond the old cat's stinky legs and ugly face.  The semi-ugly library of the cat of Alexandria.  And there was a Library of Alexandria. The cat's beautiful felt. A ball of yarn. The Shining Scales of Agoatus."))

[{'type': 'of',
  'target': 'library',
  'other_words': 'semi|-|ugly',
  'allusion': 'of the cat of Alexandria',
  'allusion2': None,
  'target_pos': 'NOUN'},
 {'type': 'of',
  'target': 'cat',
  'other_words': '',
  'allusion': 'of Alexandria',
  'allusion2': None,
  'target_pos': 'NOUN'},
 {'type': 'of',
  'target': 'library',
  'other_words': '',
  'allusion': 'of Alexandria',
  'allusion2': None,
  'target_pos': 'PROPN'},
 {'type': 'of',
  'target': 'scales',
  'other_words': 'shining',
  'allusion': 'of Agoatus',
  'allusion2': None,
  'target_pos': 'PROPN'}]

In [28]:
def verb2np(tempspacy):
    to_return = []
    for t in tempspacy:
        if t.dep_=="nsubj":
            subtree = list(t.head.subtree)
            span = tempspacy[subtree[0].i:subtree[-1].i+1]
            if span[0].dep_=="mark":
                span = tempspacy[subtree[1].i:subtree[-1].i+1]
            fragment = [i for i in tempspacy if (t.is_ancestor(i) or i==t)]
            fragment_pos = [i.pos_ for i in fragment]
            fragment_indices = [i.i for i in fragment]
            if test_contiguous(fragment_indices):
                if "PROPN" in fragment_pos:
                    if t.head.lemma_ not in ["be"]: # boring
                        to_return.append({"type":"v2np",
                                          "target":t.head.lemma_,
                                          "other_words":"|".join([child.lemma_ for child in t.head.children if child.dep_ in ["dobj","conj","advmod","prt"]]),
                                          "allusion":str(span),
                                          "allusion2":str(tempspacy[fragment_indices[0]:fragment_indices[-1]+1]),
                                          "target_pos":t.head.pos_,
                                         })
    return to_return

verb2np(nlp("I have heard that Ashurbanurpal hurt my friend. I have heard a rumor that Ashurbanipal's army first advanced south and secured the city of Der, and there were people there"))

[{'type': 'v2np',
  'target': 'hurt',
  'other_words': 'friend',
  'allusion': 'Ashurbanurpal hurt my friend',
  'allusion2': 'Ashurbanurpal',
  'target_pos': 'VERB'},
 {'type': 'v2np',
  'target': 'advance',
  'other_words': 'first|south|secure',
  'allusion': "Ashurbanipal's army first advanced south and secured the city of Der",
  'allusion2': "Ashurbanipal's army",
  'target_pos': 'VERB'}]

In [44]:
from collections import defaultdict

def adj_to_noun_phrase(tempspacy):
    to_return = []
    for t in tempspacy:
        if t.dep_=="amod":
            if "neg" not in [c.dep_ for c in t.children]:
                if (t.head.pos_=="NOUN"):
                    #fragment = rclip([i for i in tempspacy if (t.head.is_ancestor(i) or i==t.head)])
                    fragment = rclip(get_chunk(t.head,tempspacy))
                    fragment_pos = [i.pos_ for i in fragment]
                    fragment_indices = [i.i for i in fragment]
                    if test_contiguous(fragment_indices):
                        if "PROPN" in fragment_pos:
                            to_return.append({
                                "target":str(t).lower(),
                                "allusion":str(tempspacy[fragment_indices[0]:fragment_indices[-1]+1]),
                                "type":"adj2np",
                                "allusion2":None,
                                "other_words":None,
                                "target_pos":"ADJ",
                            })
    return to_return
        
adj_to_noun_phrase(nlp("I saw the rebellious, stinky friends of Azerooit and his merciless ilk, a bad team.  And I saw the shining scales of Aotis."))

[{'target': 'rebellious',
  'allusion': 'the rebellious, stinky friends of Azerooit and his merciless ilk',
  'type': 'adj2np',
  'allusion2': None,
  'other_words': None,
  'target_pos': 'ADJ'},
 {'target': 'stinky',
  'allusion': 'the rebellious, stinky friends of Azerooit and his merciless ilk',
  'type': 'adj2np',
  'allusion2': None,
  'other_words': None,
  'target_pos': 'ADJ'},
 {'target': 'shining',
  'allusion': 'the shining scales of Aotis',
  'type': 'adj2np',
  'allusion2': None,
  'other_words': None,
  'target_pos': 'ADJ'}]

In [127]:
def attr_or_acomp_relation(tempspacy):
    to_return = []
    for t in tempspacy: 
        if (t.dep_=="nsubj" and t.head.pos_=="AUX"):  ## looking for nouns that are in nsubj relationship to aux verb
            if "neg" not in [i.dep_ for i in t.head.children]: ## exclude negatives
                attrs = [ch for ch in [tok for tok in tempspacy if tok.head==t.head] if ch.dep_ in ["attr","acomp"]]
                if attrs!=[]: ## if there is at least 1 attr
                    attr = attrs[0] ## just deal with the 0th one
                    attr_or_acomp = attr.dep_
                    ## get the noun ph
                    #np_fragment = [i for i in tempspacy if (t.is_ancestor(i) or i==t)]
                    np_fragment = rclip(get_chunk(t,tempspacy))
                    np_fragment_pos = [i.pos_ for i in np_fragment]
                    np_fragment_indices = [i.i for i in np_fragment]
                    noun_phrase = str(tempspacy[np_fragment_indices[0]:np_fragment_indices[-1]+1])
                    if "PROPN" in np_fragment_pos:     
                        ## get attr phrase
                        #attr_fragment = [i for i in tempspacy if (attr.is_ancestor(i) or i==attr)]
                        attr_fragment = rclip((get_chunk(attr,tempspacy)))
                        attr_fragment_indices = [i.i for i in attr_fragment]
                        attr_phrase = str(tempspacy[attr_fragment_indices[0]:attr_fragment_indices[-1]+1])
                        ## get attr phrase simple
                        valid_deps = ["det","amod"]
                        simple_fragment = [i for i in tempspacy if ((i.head==attr and i.dep_ in valid_deps) or i==attr)]
                        simple_fragment_indices = [i.i for i in simple_fragment]
                        simple_phrase = str(tempspacy[simple_fragment_indices[0]:simple_fragment_indices[-1]+1])
                        if test_contiguous(np_fragment_indices) and test_contiguous(attr_fragment_indices):
                            for n in [w for w in simple_fragment if w.pos_ in ["NOUN","ADJ"]]:
                                to_return.append({
                                    "type":attr_or_acomp,
                                    "allusion":noun_phrase,
                                    "target":n.lemma_,
                                    "allusion2":simple_phrase,
                                    "other_words":"|".join([child.text.lower() for child in n.children if child.dep_ in ['amod']]),
                                    "target_pos":n.pos_
                                })
    return to_return

In [128]:
attr_or_acomp_relation(nlp("John is uick to speak. A mind is a terrible thing to puree. The feet of the Big King were a simple nomadic people who moved through the earth and ate things, and the Dull Forest was a stupid place where old people went. Mausoolus was a wise king of bitter sadness, a good person.  John was a big lawyer.  Mausoolus of Xeria was going to the store."))

[{'type': 'acomp',
  'allusion': 'John',
  'target': 'uick',
  'allusion2': 'uick',
  'other_words': '',
  'target_pos': 'ADJ'},
 {'type': 'attr',
  'allusion': 'The feet of the Big King',
  'target': 'simple',
  'allusion2': 'a simple nomadic people',
  'other_words': '',
  'target_pos': 'ADJ'},
 {'type': 'attr',
  'allusion': 'The feet of the Big King',
  'target': 'nomadic',
  'allusion2': 'a simple nomadic people',
  'other_words': '',
  'target_pos': 'ADJ'},
 {'type': 'attr',
  'allusion': 'The feet of the Big King',
  'target': 'people',
  'allusion2': 'a simple nomadic people',
  'other_words': 'simple|nomadic',
  'target_pos': 'NOUN'},
 {'type': 'attr',
  'allusion': 'the Dull Forest',
  'target': 'stupid',
  'allusion2': 'a stupid place',
  'other_words': '',
  'target_pos': 'ADJ'},
 {'type': 'attr',
  'allusion': 'the Dull Forest',
  'target': 'place',
  'allusion2': 'a stupid place',
  'other_words': 'stupid',
  'target_pos': 'NOUN'},
 {'type': 'attr',
  'allusion': 'Mausool

In [129]:
def appos_relation(tempspacy):
    to_return = []
    for t in tempspacy:
        if t.dep_=="appos":
            appos_fragment = get_chunk(t,tempspacy) ##[i for i in tempspacy if (t.is_ancestor(i) or i==t)]
            appos_fragment_indices = [i.i for i in appos_fragment]
            appos_phrase = str(tempspacy[appos_fragment_indices[0]:appos_fragment_indices[-1]+1])
            ## get the noun ph
            noun_or_proper_noun = t.head
            valid_deps = ['compound']
            np_fragment = rclip(get_chunk(noun_or_proper_noun,tempspacy))#
            np_fragment_pos = [i.pos_ for i in np_fragment]
            np_fragment_indices = [i.i for i in np_fragment]
            noun_phrase = str(tempspacy[np_fragment_indices[0]:np_fragment_indices[-1]+1])
            if "PROPN" in np_fragment_pos:    
                if test_contiguous(np_fragment_indices) and test_contiguous(appos_fragment_indices):
                    for n in [w for w in appos_fragment if w.pos_ in ["NOUN","ADJ"]]:
                        to_return.append({
                            "type":"appos",
                            "allusion":noun_phrase,
                            "allusion2":appos_phrase,
                            "target":n.lemma_,
                            "other_words":"|".join([w.lemma_ for w in appos_fragment if w.pos_=="ADJ"]),
                            "target_pos":n.pos_,
                        })                   
    return to_return

appos_relation(nlp(" King Mausaillus of Carthage, a wise and fat king, is my friend. Mausoolus was going to the store. The old King Mausoolaic of Wales, a wise and smelly king, is my friend."))#ppos_relation(nlp("King Mausaillus of Carthage, a wise person, is my friend. Mausoolus was going to the store. The old King Mausoolaic of Wales, a wise king, is my friend."))

[{'type': 'appos',
  'allusion': ' King Mausaillus of Carthage',
  'allusion2': 'a wise and fat king',
  'target': 'wise',
  'other_words': 'wise|fat',
  'target_pos': 'ADJ'},
 {'type': 'appos',
  'allusion': ' King Mausaillus of Carthage',
  'allusion2': 'a wise and fat king',
  'target': 'fat',
  'other_words': 'wise|fat',
  'target_pos': 'ADJ'},
 {'type': 'appos',
  'allusion': ' King Mausaillus of Carthage',
  'allusion2': 'a wise and fat king',
  'target': 'king',
  'other_words': 'wise|fat',
  'target_pos': 'NOUN'},
 {'type': 'appos',
  'allusion': 'The old King Mausoolaic of Wales',
  'allusion2': 'a wise and smelly king',
  'target': 'wise',
  'other_words': 'wise|smelly',
  'target_pos': 'ADJ'},
 {'type': 'appos',
  'allusion': 'The old King Mausoolaic of Wales',
  'allusion2': 'a wise and smelly king',
  'target': 'smelly',
  'other_words': 'wise|smelly',
  'target_pos': 'ADJ'},
 {'type': 'appos',
  'allusion': 'The old King Mausoolaic of Wales',
  'allusion2': 'a wise and sm

In [130]:
mining_funcs = [
    noun_of_nounphrase,
    verb2np,
    adj_to_noun_phrase,
    attr_or_acomp_relation,
    appos_relation,
]

## Wikipedia Article Tester

I want to focus on things from Wikipedia that are old.  I will use some gnarly regexes to test whether some section of a Wikipedia text contains references to dates in the 19th century or prior and none from later.

In [131]:
import re

In [132]:
old_stuff_regex = r'\b(?:in|from|c\.|year) ?(?:late|early)? ?(?:\d{3}|1[0-8]\d\d)\b|\d+ b.?c.?|\b(?:[1-9]|1[0-9])(?:st|nd|th) century|\b(?!tw)[a-z]+(?:th|nd|st) century|\b\d{3,4}\b ?- ?\b(?:\d{3}|1[0-8]\d\d)\b|\((?:\d{3}|1[0-8]\d\d)\b\)|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|spring|summer|fall|winter).+ (?:of  |, )?(?:\d{3}|1[0-8]\d\d)\b'

In [133]:
new_stuff_regex = r'\b(?:in|from|c\.|year) ?(?:late|early)? ?(?:19|2\d)\d\d\b|\d+ b.?c.?|\b2[0-9](?:st|nd|th) century|\btw[a-z-]+(?:th|nd|st) century|\b\d{3,4}\b ?- ?\b(?:\d{3}|1[0-8]\d\d)\b|\((?:19|2\d)\d\d\b\)|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|spring|summer|fall|winter).+ (?:of  |, )?(?:19|2\d)\d\d\b'

In [134]:
test_string = """
(c. 334 – c. 2602 BC)
18th century
20th century
2nd century
first century
ninth century
nineteeth century
twentieth century
from 1923
from 1520
(119)
(1953)
(1834)
1995
the year 1895
in late 234
in late 1943
in early 1734

June, 1924
July of 2345
June, 1892
June, 234l234

spring of 1233

c. 234
c.234 
c. 1234
1523-1934
234
1523453245
"""

In [135]:
re.findall(old_stuff_regex,test_string)

['c. 334',
 '18th century',
 '2nd century',
 'first century',
 'ninth century',
 'nineteeth century',
 'from 1520',
 '(119)',
 '(1834)',
 'year 1895',
 'in late 234',
 'in early 1734',
 'June, 1892',
 'spring of 1233',
 'c. 234',
 'c.234',
 'c. 1234']

In [136]:
re.findall(new_stuff_regex,test_string)

['c. 2602',
 '20th century',
 'twentieth century',
 'from 1923',
 '(1953)',
 'in late 1943',
 'June, 1924',
 'July of 2345']

In [137]:
def text_time_of_text(text):
    olds = re.findall(old_stuff_regex,text,flags=re.I)
    news = re.findall(new_stuff_regex,text,flags=re.I)
#     print(olds)
#     print(news)
    if len(olds)>0:
        if len(news)==0:
            return True
    return False

In [138]:
text_time_of_text("in the 2nd CENTuRY ")

True

In [139]:
text_time_of_text("in 1923 and the 2nd century")

False

***

In [179]:
import re

def clean_wiki_text(text):
    text = re.sub(r"\'\'",'',text) ## links
    text = re.sub(r"poly[ \d]+","",text) ## ?
    text = re.sub(r"=+[^=]+=+\n"," ",text) ## section headings
    text = re.sub(r"\|alt=.+\n"," ",text) ## alt text
    text = text.rstrip(r"\n")
    return text

clean_wiki_text('====Union military strategy====\nLincoln took executive control of the war and shaped the Union military strategy.   |alt=Large group of people\n  sdfl;kasdf\n')

' Lincoln took executive control of the war and shaped the Union military strategy.      sdfl;kasdf\n'

In [186]:
from gensim import utils

import json

In [196]:
!rm wiki.db ## just re-initialize each time this notebook runs

In [197]:
import sqlite3
connection = sqlite3.connect("wiki.db")
cursor = connection.cursor()

In [198]:
cursor.execute("CREATE TABLE allusions (target TEXT, type TEXT, allusion TEXT, allusion2 TEXT, other_words TEXT, target_pos TEXT, entry TEXT, id NUMBER)")

<sqlite3.Cursor at 0x7f99d49e16c0>

In [201]:
def push_to_db(data):
    sql = "INSERT INTO allusions(target,type,allusion,allusion2,other_words,target_pos,entry,id) VALUES(?,?,?,?,?,?,?,?)"  
    for d in data:
        #print(d.keys())
        task = (d['target'],d['type'],d['allusion'],d['allusion2'],d['other_words'],d['target_pos'],d['entry'],d['id'])
        ## replace empty strings with None
        task = tuple(t if t!="" else None for t in task)
        print(task)
        cursor.execute(sql,task)
        connection.commit()
#         for k in d:
#             print(k,d[k])
            #task = (k[])
            #cur.execute(sql, task)

In [202]:
c=0
max_n = 10

idnum = 0

with utils.open('enwiki-latest.json.gz', 'rb') as f:
    for line in f:
        article = json.loads(line)
        for section_title, section_text in zip(article['section_titles'], article['section_texts']):
            text = section_text.strip()
            if text_time_of_text(text) and len(text)>300:              
                #print("Section title: %s" % section_title)
                text = clean_wiki_text(text)
                ok_sents = [s for s in text.split("\n\n") if len(s)>400]
                #print(ok_sents)
                for oks in ok_sents:
                    spacied = nlp(oks)
                    for func in mining_funcs:
                        #try:
                        results = func(spacied)
                        [r.update({"entry":article['title']}) for r in results] ## just update all the dicts
                        for r in results:
                            r.update({"id":idnum})
                            idnum+=1
                        push_to_db(results)
#                         except:
#                             pass
        c+=1
        if c>max_n:
            break

('revolutionaries', 'of', 'of the 19th century such as William Godwin (1756–1836) and Wilhelm Weitling (1808–1871)', None, 'many', 'NOUN', 'Anarchism', 0)
('label', 'v2np', 'Various factions within the French Revolution labelled their opponents as anarchists, although few such accused shared many views with later anarchists.', 'Various factions within the French Revolution', 'opponent', 'VERB', 'Anarchism', 1)
('contribute', 'v2np', 'Many revolutionaries of the 19th century such as William Godwin (1756–1836) and Wilhelm Weitling (1808–1871) would contribute to the anarchist doctrines of the next generation but did not use anarchist or anarchism in describing themselves or their beliefs.', 'Many revolutionaries of the 19th century such as William Godwin (1756–1836) and Wilhelm Weitling (1808–1871)', 'use', 'VERB', 'Anarchism', 2)
('ancient', 'adj2np', 'the Ancient Greek anarkhia', None, None, 'ADJ', 'Anarchism', 3)
('various', 'adj2np', 'Various factions within the French Revolution', N

('name', 'of', 'of a female gladiator fighting an "Amazon', None, None, 'NOUN', 'Achilles', 64)
('attest', 'v2np', 'Linear B tablets attest to the personal name Achilleus in the forms a-ki-re-u and a-ki-re-we, the latter being the dative of the former.', 'Linear B tablets', 're|-', 'VERB', 'Achilles', 65)
('female', 'adj2np', 'a female gladiator fighting an "Amazon', None, None, 'ADJ', 'Achilles', 66)
('form', 'v2np', 'Achilles\' role as the hero of grief or distress forms an ironic juxtaposition with the conventional view of him as the hero of   ("glory", usually in war).', "Achilles' role as the hero of grief or distress", 'juxtaposition', 'VERB', 'Achilles', 67)
('people', 'appos', 'a proto-form *Akhí-lāu̯os', 'he who has the people distressed" or "he whose people have distress"', 'distressed', 'NOUN', 'Achilles', 68)
('distressed', 'appos', 'a proto-form *Akhí-lāu̯os', 'he who has the people distressed" or "he whose people have distress"', 'distressed', 'ADJ', 'Achilles', 69)
('peo

('agree', 'v2np', 'Socrates and Hippias agree that Odysseus, who concocted a number of lies throughout the Odyssey and other stories in the Trojan War Cycle, was false intentionally.', 'Socrates and Hippias', None, 'VERB', 'Achilles', 155)
('tell', 'v2np', 'Achilles, like Odysseus, told numerous falsehoods.', 'Achilles', 'falsehood', 'VERB', 'Achilles', 156)
('believe', 'v2np', 'Hippias believes that Achilles was a generally honest man, while Socrates believes that Achilles lied for his own benefit.', 'Hippias', None, 'VERB', 'Achilles', 157)
('believe', 'v2np', 'Socrates believes that Achilles lied for his own benefit', 'Socrates', None, 'VERB', 'Achilles', 158)
('lie', 'v2np', 'Achilles lied for his own benefit', 'Achilles', None, 'VERB', 'Achilles', 159)
('abandon', 'v2np', 'Socrates eventually abandons Homeric arguments and makes sports analogies to drive home the point: someone who does wrong on purpose is a better person than someone who does wrong unintentionally.', 'Socrates', 

('daughter', 'of', 'of Robert Smith Todd, a wealthy lawyer and businessman in Lexington, Kentucky', None, None, 'NOUN', 'Abraham Lincoln', 257)
('mansion', 'of', "of Mary's sister", None, None, 'NOUN', 'Abraham Lincoln', 258)
('meet', 'v2np', 'In 1839, Lincoln met Mary Todd in Springfield, Illinois, and the following year they became engaged.', 'Lincoln', 'Todd|become', 'VERB', 'Abraham Lincoln', 259)
('keep', 'v2np', 'Mary kept house with the help of a hired servant and a relative.', 'Mary', 'house', 'VERB', 'Abraham Lincoln', 260)
('wealthy', 'adj2np', 'a wealthy lawyer and businessman in Lexington', None, None, 'ADJ', 'Abraham Lincoln', 261)
('wealthy', 'appos', 'Robert Smith Todd', 'a wealthy lawyer and businessman in Lexington,', 'wealthy', 'ADJ', 'Abraham Lincoln', 262)
('lawyer', 'appos', 'Robert Smith Todd', 'a wealthy lawyer and businessman in Lexington,', 'wealthy', 'NOUN', 'Abraham Lincoln', 263)
('businessman', 'appos', 'Robert Smith Todd', 'a wealthy lawyer and businessman

('failure', 'of', 'of the Compromise of 1850', None, None, 'NOUN', 'Abraham Lincoln', 352)
('highlight', 'v2np', 'In his 1852 eulogy for Clay, Lincoln highlighted the latter\'s support for gradual emancipation and opposition to "both extremes" on the slavery issue.', 'Lincoln', 'support', 'VERB', 'Abraham Lincoln', 353)
('become', 'v2np', 'the slavery debate in the Nebraska and Kansas territories became particularly acrimonious', 'the slavery debate in the Nebraska and Kansas territories', None, 'VERB', 'Abraham Lincoln', 354)
('propose', 'v2np', 'the slavery debate in the Nebraska and Kansas territories became particularly acrimonious, Illinois Senator Stephen A. Douglas proposed popular sovereignty as a compromise', 'Illinois Senator Stephen A. Douglas', 'sovereignty', 'VERB', 'Abraham Lincoln', 355)
('pass', 'v2np', "Douglas's Kansas–Nebraska Act narrowly passed Congress in May 1854.", "Douglas's Kansas–Nebraska Act", 'narrowly|Congress', 'VERB', 'Abraham Lincoln', 356)
('speech', '

('label', 'of', 'of "The Rail Candidate', None, None, 'NOUN', 'Abraham Lincoln', 451)
('imaging', 'of', 'of Lincoln', None, 'effective', 'NOUN', 'Abraham Lincoln', 452)
('organize', 'v2np', "Lincoln's followers organized a campaign team led by David Davis, Norman Judd, Leonard Swett, and Jesse DuBois, and Lincoln received his first endorsement.", "Lincoln's followers", 'team|receive', 'VERB', 'Abraham Lincoln', 453)
('receive', 'v2np', 'Lincoln received his first endorsement.', 'Lincoln', 'endorsement', 'VERB', 'Abraham Lincoln', 454)
('adopt', 'v2np', 'Exploiting his embellished frontier legend (clearing land and splitting fence rails), Lincoln\'s supporters adopted the label of "The Rail Candidate".', "Lincoln's supporters", 'label', 'VERB', 'Abraham Lincoln', 455)
('describe', 'v2np', 'In 1860, Lincoln described himself: "I am in height, six feet, four inches, nearly; lean in flesh, weighing, on an average, one hundred and eighty pounds; dark complexion, with coarse black hair, and 

('section', 'of', 'of the Constitution', None, None, 'NOUN', 'Abraham Lincoln', 530)
('send', 'v2np', 'States sent Union regiments south', 'States', 'regiment|south', 'VERB', 'Abraham Lincoln', 531)
('attack', 'v2np', 'States sent Union regiments south, on April 19, Baltimore mobs in control of the rail links attacked Union troops who were changing trains.', 'Baltimore mobs in control of the rail links', 'troop', 'VERB', 'Abraham Lincoln', 532)
('respond', 'v2np', 'the Army responded by arresting local Maryland officials.', 'the Army', None, 'VERB', 'Abraham Lincoln', 533)
('suspend', 'v2np', 'Lincoln suspended the writ of habeas corpus where needed for the security of troops trying to reach Washington.', 'Lincoln', 'writ', 'VERB', 'Abraham Lincoln', 534)
('petition', 'v2np', 'John Merryman, one Maryland official hindering the U.S. troop movements, petitioned Supreme Court Chief Justice Roger B. Taney to issue a writ of habeas corpus.', 'John Merryman, one Maryland official hindering t

('satisfy', 'v2np', "Pope satisfied Lincoln's desire to advance on Richmond from the north, thus protecting Washington from counterattack.", 'Pope', 'desire', 'VERB', 'Abraham Lincoln', 598)
('command', 'of', 'of all forces around Washington', None, None, 'NOUN', 'Abraham Lincoln', 599)
('battle', 'of', 'of Antietam', None, None, 'PROPN', 'Abraham Lincoln', 600)
('restore', 'v2np', "Despite his dissatisfaction with McClellan's failure to reinforce Pope, Lincoln restored him to command of all forces around Washington.", 'Lincoln', 'he', 'VERB', 'Abraham Lincoln', 601)
('cross', 'v2np', "Two days after McClellan's return to command, General Robert E. Lee's forces crossed the Potomac River into Maryland, leading to the Battle of Antietam.", "General Robert E. Lee's forces", 'River', 'VERB', 'Abraham Lincoln', 602)
('army', 'of', 'of the Ohio', None, None, 'PROPN', 'Abraham Lincoln', 603)
('resist', 'v2np', "McClellan then resisted the president's demand that he pursue Lee's withdrawing ar

('proclamation', 'of', 'of December 8, 1863', None, None, 'PROPN', 'Abraham Lincoln', 698)
('precede', 'v2np', " \nReconstruction preceded the war's end, as Lincoln and his associates considered the reintegration of the nation, and the fates of Confederate leaders and freed slaves.", ' \nReconstruction', 'end', 'VERB', 'Abraham Lincoln', 699)
('consider', 'v2np', 'Lincoln and his associates considered the reintegration of the nation, and the fates of Confederate leaders and freed slaves', 'Lincoln and his associates', 'reintegration|free', 'VERB', 'Abraham Lincoln', 700)
('reply', 'v2np', 'When a general asked Lincoln how the defeated Confederates were to be treated, Lincoln replied, "Let \'em up easy."', 'Lincoln', None, 'VERB', 'Abraham Lincoln', 701)
('lead', 'v2np', "Lincoln led the moderates in Reconstruction policy and was opposed by the Radicals, under Rep. Thaddeus Stevens, Sen. Charles Sumner and Sen. Benjamin Wade, who otherwise remained Lincoln's allies.", 'Lincoln', 'modera

('secretary', 'of', 'of State', None, None, 'PROPN', 'Abraham Lincoln', 807)
('handling', 'of', 'of the Trent Affair', None, None, 'NOUN', 'Abraham Lincoln', 808)
('name', 'v2np', ' \nLincoln named his main political rival William H. Seward as Secretary of State, and left most diplomatic issues in his portfolio.', ' \nLincoln', 'rival|leave', 'VERB', 'Abraham Lincoln', 809)
('select', 'v2np', 'However Lincoln did select some of the top diplomats as part of his patronage policy.  ', 'Lincoln', 'however|some', 'VERB', 'Abraham Lincoln', 810)
('declare', 'v2np', 'Washington would declare war on them if they supported Richmond', 'Washington', 'war', 'VERB', 'Abraham Lincoln', 811)
('main', 'adj2np', "Seward's main role", None, None, 'ADJ', 'Abraham Lincoln', 812)
('learned', 'of', "of the Lincolns' intent to attend a play with General Grant", None, 'when', 'VERB', 'Abraham Lincoln', 813)
('evening', 'of', 'of April 14', None, None, 'NOUN', 'Abraham Lincoln', 814)
('battle', 'of', 'of Appom

('suggest', 'v2np', 'Aristotle suggested that the reason for anything coming about can be attributed to four different types of simultaneously active factors.', 'Aristotle', None, 'VERB', 'Aristotle', 885)
('growth', 'of', 'of the Nile delta', None, None, 'NOUN', 'Aristotle', 886)
('time', 'of', 'of Homer', None, None, 'NOUN', 'Aristotle', 887)
('note', 'v2np', ' \n Aristotle noted that the ground level of the Aeolian islands changed before a volcanic eruption.\n', ' \n Aristotle', None, 'VERB', 'Aristotle', 888)
('note', 'v2np', 'The geologist Charles Lyell noted that Aristotle described such change, including "lakes that had dried up" and "deserts that had become watered by rivers", giving as examples the growth of the Nile delta since the time of Homer, and "the upheaving of one of the Aeolian islands, previous to a volcanic eruption.', 'The geologist Charles Lyell', None, 'VERB', 'Aristotle', 889)
('describe', 'v2np', 'Aristotle described such change, including "lakes that had drie

('believe', 'v2np', 'Aristotle believed the chain of thought, which ends in recollection of certain impressions, was connected systematically in relationships such as similarity, contrast, and contiguity, described in his laws of association.', 'Aristotle', 'chain', 'VERB', 'Aristotle', 939)
('believe', 'v2np', 'Aristotle believed that past experiences are hidden within the mind.', 'Aristotle', None, 'VERB', 'Aristotle', 940)
('describe', 'v2np', 'Aristotle describes sleep in On Sleep and Wakefulness.', 'Aristotle', 'sleep', 'VERB', 'Aristotle', 941)
('absence', 'of', 'of perception to the faculty of imagination, phantasia', None, None, 'NOUN', 'Aristotle', 942)
('explain', 'v2np', 'Aristotle explains that when a person stares at a moving stimulus such as the waves in a body of water, and then looks away, the next thing they look at appears to have a wavelike motion.', 'Aristotle', None, 'VERB', 'Aristotle', 943)
('compare', 'v2np', 'Aristotle compares a sleeping person to a person who

In [203]:
def query_db(target):
    cursor.execute("SELECT * FROM allusions WHERE target=?", (target,))
    rows = cursor.fetchall()
    return rows

query_db("remain")

[('remain',
  'v2np',
  'opposition to the Kansas–Nebraska Act remained strong throughout the North.',
  'opposition to the Kansas–Nebraska Act',
  None,
  'VERB',
  'Abraham Lincoln',
  379),
 ('remain',
  'v2np',
  'Secession sentiment was strong in Missouri and Maryland, but did not prevail; Kentucky remained neutral.',
  'Kentucky',
  None,
  'VERB',
  'Abraham Lincoln',
  527),
 ('remain',
  'v2np',
  "Aristotle's writings on motion remained influential until the Early Modern period.",
  "Aristotle's writings on motion",
  None,
  'VERB',
  'Aristotle',
  871)]