# ⛏️ Mining KJV for Biblical Adornments

To Pray Without Ceasing uses various linguistic fragments gathered from the KJV Bible to adorn rather homely and simple prayers that I have written.   

***

Import and pos-tag Bible. 

In [10]:
from nltk import pos_tag
from nltk.corpus import gutenberg
biblewords = gutenberg.words('bible-kjv.txt')
bible_tagged = [(w.lower(),tag) for w,tag in pos_tag(biblewords)]

In [11]:
from nltk.corpus import brown
brownwords = brown.words()
brown_tagged = pos_tag(brownwords)

In [12]:
from nltk import FreqDist as fd 
brownfd = fd(brown_tagged)
biblefd = fd(bible_tagged)

biblefd_nopos = fd([w.lower() for w in biblewords])

Pos-tagging archaic text is far from reliable.  I try to deal with this by taking a "majority rules" approach to the token's pos tag:

In [13]:
def make_sure_predominant_token_tag_combo(token,tag,threshold=.5):
    try:
        if biblefd[(token.lower(),tag)]/biblefd_nopos[token.lower()]>threshold:
            #if brownfd[(token,tag)]/max(1,brownfd_nopos[token])>.5:
            return True
        else:
            return False
    except:
        return False
    
make_sure_predominant_token_tag_combo("noah","NNP")

True

Also prepare raw text.

In [14]:
import re
from nltk.corpus import gutenberg
rawbible = gutenberg.raw('bible-kjv.txt')
rawbible = re.sub(r'\d+:\d+','',rawbible)
rawbible = re.sub(r'(?:\n)+',' ',rawbible)
rawbible = re.sub(r'\bthy\b','your',rawbible, flags=re.IGNORECASE)
rawbible = re.sub(r'\bhath\b','has',rawbible, flags=re.IGNORECASE)
rawbible = re.sub(r'\bthine\b','your',rawbible, flags=re.IGNORECASE)
rawbible = re.sub(r'\bthou\b','you',rawbible, flags=re.IGNORECASE)
rawbible[:1000]

'[The King James Bible] The Old Testament of the King James Bible The First Book of Moses:  Called Genesis  In the beginning God created the heaven and the earth.  And the earth was without form, and void; and darkness was upon the face of the deep. And the Spirit of God moved upon the face of the waters.  And God said, Let there be light: and there was light.  And God saw the light, that it was good: and God divided the light from the darkness.  And God called the light Day, and the darkness he called Night. And the evening and the morning were the first day.  And God said, Let there be a firmament in the midst of the waters, and let it divide the waters from the waters.  And God made the firmament, and divided the waters which were under the firmament from the waters which were above the firmament: and it was so.  And God called the firmament Heaven. And the evening and the morning were the second day.  And God said, Let the waters under the heaven be gathered together unto one place

### SpaCy for Mining

#### Extract `(JJ,NN)` from KJV

Here I extract all `(ADJECTIVE, NOUN)` pairs such as `("PURE","GOLD")`.

I don't conflate singular and plural nouns, since certain adjectives may only (or better) fit one or the other.

In [15]:
import spacy
nlp = spacy.load('en')

In [16]:
def extract_adj2nouns(tempspacy):
    """
    For a sentence like "I ate the small frog." returns [(small, frog)].
    """
    nouns = ["NN","NNS"]
    adj_noun_tuples = []
    
    for token in tempspacy:  ## for every token in the document
        
        ### get ones like "the blue dog"
        try: 
            if token.dep_=="amod":  ## try to see if it is an `amod`, an adjective
                if token.pos_=="ADJ":
                    if token.head.tag_ in nouns:  ## try to see if the head is a noun
                        adj_noun_tuples.append((token.text.lower(),(token.head.text.lower(),token.head.tag_))) ## add the modifying word and the lemma 
        except:
            pass
        
        ### get ones like "The dog is blue."
        try:
            if ((token.tag_ in nouns) and (token.dep_=="nsubj")): ## find a noun subject
                verb = token.head  ## make sure it's head is a verb
                if verb.pos_ == "AUX":  ## ...
                    adj = [a for a in list(verb.children) if (a.tag_=="JJ" and a.dep_=="acomp")][0]
                    adj_noun_tuples.append((adj.text.lower(),(token.text.lower(),token.tag_)))
                    
        except:
            pass
    return adj_noun_tuples
                                       
extract_adj2nouns(nlp("Old frogs are soft as pets.  The young dog was blue. The smiling dog, who was white, walked itself through the house.  The dog was white."))

[('old', ('frogs', 'NNS')),
 ('soft', ('frogs', 'NNS')),
 ('young', ('dog', 'NN')),
 ('blue', ('dog', 'NN')),
 ('white', ('dog', 'NN'))]

In [17]:
from nltk import tokenize
spacybiblesents = [nlp(s) for s in tokenize.sent_tokenize(rawbible)]

In [18]:
spacybiblesents[30]

And to every beast of the earth, and to every fowl of the air, and to every thing that creepeth upon the earth, wherein there is life, I have given every green herb for meat: and it was so.

In [19]:
adj_noun_pairs = []
for s in spacybiblesents:
    adj_noun_pairs+=extract_adj2nouns(s)

In [20]:
adj_noun_pairs

[('light', ('day', 'NN')),
 ('first', ('day', 'NN')),
 ('second', ('day', 'NN')),
 ('dry', ('land', 'NN')),
 ('third', ('day', 'NN')),
 ('great', ('lights', 'NNS')),
 ('greater', ('light', 'NN')),
 ('lesser', ('light', 'NN')),
 ('fourth', ('day', 'NN')),
 ('open', ('firmament', 'NN')),
 ('great', ('whales', 'NNS')),
 ('fifth', ('day', 'NN')),
 ('own', ('image', 'NN')),
 ('green', ('herb', 'NN')),
 ('sixth', ('day', 'NN')),
 ('seventh', ('day', 'NN')),
 ('seventh', ('day', 'NN')),
 ('seventh', ('day', 'NN')),
 ('whole', ('face', 'NN')),
 ('living', ('soul', 'NN')),
 ('whole', ('land', 'NN')),
 ('good', ('gold', 'NN')),
 ('second', ('river', 'NN')),
 ('whole', ('land', 'NN')),
 ('third', ('river', 'NN')),
 ('fourth', ('river', 'NN')),
 ('alone', ('man', 'NN')),
 ('deep', ('sleep', 'NN')),
 ('subtil', ('serpent', 'NN')),
 ('good', ('tree', 'NN')),
 ('flaming', ('sword', 'NN')),
 ('young', ('man', 'NN')),
 ('own', ('likeness', 'NN')),
 ('mighty', ('men', 'NNS')),
 ('great', ('wickedness', 

In [21]:
print([a for a,c in fd([a for a,n in adj_noun_pairs]).most_common(30)])

['great', 'own', 'good', 'other', 'many', 'holy', 'high', 'young', 'right', 'first', 'whole', 'mighty', 'little', 'same', 'evil', 'chief', 'full', 'third', 'strong', 'old', 'certain', 'new', 'wise', 'second', 'fine', 'mine', 'much', 'seventh', 'dead', 'wicked']


In [22]:
print([n for n,c in fd([n for a,n in adj_noun_pairs]).most_common(100)])

[('man', 'NN'), ('men', 'NNS'), ('day', 'NN'), ('things', 'NNS'), ('hand', 'NN'), ('year', 'NN'), ('place', 'NN'), ('thing', 'NN'), ('places', 'NNS'), ('time', 'NN'), ('people', 'NNS'), ('month', 'NN'), ('land', 'NN'), ('heart', 'NN'), ('side', 'NN'), ('house', 'NN'), ('part', 'NN'), ('priest', 'NN'), ('days', 'NNS'), ('city', 'NN'), ('gods', 'NNS'), ('way', 'NN'), ('works', 'NNS'), ('priests', 'NNS'), ('ones', 'NNS'), ('linen', 'NN'), ('voice', 'NN'), ('bread', 'NN'), ('son', 'NN'), ('woman', 'NN'), ('words', 'NNS'), ('gold', 'NN'), ('children', 'NNS'), ('name', 'NN'), ('spirit', 'NN'), ('eyes', 'NNS'), ('work', 'NN'), ('body', 'NN'), ('waters', 'NNS'), ('image', 'NN'), ('savour', 'NN'), ('stones', 'NNS'), ('life', 'NN'), ('soul', 'NN'), ('multitude', 'NN'), ('court', 'NN'), ('earth', 'NN'), ('hands', 'NNS'), ('witness', 'NN'), ('years', 'NNS'), ('wine', 'NN'), ('hour', 'NN'), ('child', 'NN'), ('cities', 'NNS'), ('king', 'NN'), ('bullock', 'NN'), ('flour', 'NN'), ('tree', 'NN'), ('nat

In [23]:
theos_words = ["christ","spirit","god","mary","christ","spirit","lord"]

In [24]:
forbidden_adjs = ['certain','much','same','other','own',"mine","your",'his','her']
forbidden_nouns = ['thing','part','place','part','side','one','things','one','ones',"thereon"] + theos_words

In [25]:
from collections import defaultdict

noun2adj = defaultdict(list)
adj2noun = defaultdict(list)

c=0
for adj,noun in adj_noun_pairs:
    if adj not in forbidden_adjs and noun[0] not in forbidden_nouns:
        if make_sure_predominant_token_tag_combo(adj,"JJ")==True:
            if make_sure_predominant_token_tag_combo(*noun)==True:
                #print(noun)
                c+=1
                noun2adj[noun].append(adj)
                adj2noun[adj].append(noun)
c

7578

In [26]:
for k,v in adj2noun.items():
    adj2noun[k] = list(set(v))

In [27]:
### keys become "dog***NN", need to split before using
bible_noun2adj_jsonable = {"***".join(key):list(set(value)) for key,value in noun2adj.items()}

import json
with open('bible_noun2adj.json','w') as f: ## keys are like dog***NN
    json.dump(bible_noun2adj_jsonable,f)

#### Extract `(subject,verb,object)` triples.

In [28]:
def extract_svo(tempspacy):
    """
    returns (S,V,O), (S,V,None), or (NONE,V,O) where they exist
    """
    tuples = []
    for token in tempspacy:
        s = None
        v = None
        o = None
        if token.pos_ == "VERB":
            v = token
            children = token.children
            for c in children:
                if c.dep_=="dobj" and c.tag_ in ["NN","NNS","NNP","NNPS"]: ## must be noun
                    o = c
                if c.dep_=="nsubj" and c.tag_ in ["NN","NNS","NNP","NNPS"]: ## must be noun
                    s = c
        if s!=None and o!=None:     
            svo_ids =  [s.i,v.i,o.i]
            for p in [s,v,o]:
                svo_ids += list((x.i for x in p.subtree ))#if x.dep_ in ["det","prt","amod",'poss','aux','relcl']))

            svo_ids = list(set(svo_ids))
            tokens = [(t.text,t.tag_) for t in tempspacy if t.i in svo_ids]
            tokens = [(token.lower(),tag) if not tag.startswith("NNP") else (token,tag) for token,tag in tokens]
            tuples.append((((s.text.lower(),s.tag_),(v.text.lower(),v.lemma_,v.tag_),(o.text.lower(),s.tag_),tuple(tokens))))
    return tuples

extract_svo(nlp("The enemy persecutes my soul. On that day, the King is eating up the weary cat beneath a house, and thy right sin found out a lie out, and the cat slept in the barn, and running wore me out."))

[(('enemy', 'NN'),
  ('persecutes', 'persecute', 'VBZ'),
  ('soul', 'NN'),
  (('the', 'DT'),
   ('enemy', 'NN'),
   ('persecutes', 'VBZ'),
   ('my', 'PRP$'),
   ('soul', 'NN'),
   ('.', '.'))),
 (('king', 'NNP'),
  ('eating', 'eat', 'VBG'),
  ('cat', 'NNP'),
  (('on', 'IN'),
   ('that', 'DT'),
   ('day', 'NN'),
   (',', ','),
   ('the', 'DT'),
   ('King', 'NNP'),
   ('is', 'VBZ'),
   ('eating', 'VBG'),
   ('up', 'RP'),
   ('the', 'DT'),
   ('weary', 'JJ'),
   ('cat', 'NN'),
   ('beneath', 'IN'),
   ('a', 'DT'),
   ('house', 'NN'),
   (',', ','),
   ('and', 'CC'),
   ('thy', 'PRP$'),
   ('right', 'JJ'),
   ('sin', 'NN'),
   ('found', 'VBD'),
   ('out', 'RP'),
   ('a', 'DT'),
   ('lie', 'NN'),
   ('out', 'RB'),
   (',', ','),
   ('and', 'CC'),
   ('the', 'DT'),
   ('cat', 'NN'),
   ('slept', 'VBD'),
   ('in', 'IN'),
   ('the', 'DT'),
   ('barn', 'NN'),
   (',', ','),
   ('and', 'CC'),
   ('running', 'VBG'),
   ('wore', 'VBD'),
   ('me', 'PRP'),
   ('out', 'RP'),
   ('.', '.'))),
 (('sin'

In [29]:
extract_svo(nlp("Noah found grace in the eyes of the Lord"))

[(('noah', 'NNP'),
  ('found', 'find', 'VBD'),
  ('grace', 'NNP'),
  (('Noah', 'NNP'),
   ('found', 'VBD'),
   ('grace', 'NN'),
   ('in', 'IN'),
   ('the', 'DT'),
   ('eyes', 'NNS'),
   ('of', 'IN'),
   ('the', 'DT'),
   ('Lord', 'NNP')))]

In [30]:
[(i,i.dep_) for i in nlp("thy right sin will find you out")]

[(thy, 'nmod'),
 (right, 'amod'),
 (sin, 'nsubj'),
 (will, 'aux'),
 (find, 'ROOT'),
 (you, 'dobj'),
 (out, 'prt')]

In [31]:
import itertools
bible_svos = list(itertools.chain(*[extract_svo(s) for s in spacybiblesents]))

In [33]:
bible_svos[:30]

[(('god', 'NNP'),
  ('created', 'create', 'VBD'),
  ('heaven', 'NNP'),
  (('in', 'IN'),
   ('the', 'DT'),
   ('beginning', 'VBG'),
   ('God', 'NNP'),
   ('created', 'VBD'),
   ('the', 'DT'),
   ('heaven', 'NNP'),
   ('and', 'CC'),
   ('the', 'DT'),
   ('earth', 'NN'),
   ('.', '.'))),
 (('god', 'NNP'),
  ('saw', 'see', 'VBD'),
  ('light', 'NNP'),
  (('and', 'CC'),
   ('God', 'NNP'),
   ('saw', 'VBD'),
   ('the', 'DT'),
   ('light', 'NN'),
   (',', ','),
   ('that', 'IN'),
   ('it', 'PRP'),
   ('was', 'VBD'),
   ('good', 'JJ'),
   (':', ':'),
   ('and', 'CC'),
   ('God', 'NNP'),
   ('divided', 'VBD'),
   ('the', 'DT'),
   ('light', 'NN'),
   ('from', 'IN'),
   ('the', 'DT'),
   ('darkness', 'NN'),
   ('.', '.'))),
 (('god', 'NNP'),
  ('divided', 'divide', 'VBD'),
  ('light', 'NNP'),
  (('God', 'NNP'),
   ('divided', 'VBD'),
   ('the', 'DT'),
   ('light', 'NN'),
   ('from', 'IN'),
   ('the', 'DT'),
   ('darkness', 'NN'),
   ('.', '.'))),
 (('god', 'NNP'),
  ('made', 'make', 'VBD'),
  ('f

How many did I extract?

In [34]:
len(bible_svos)

4464

In [35]:
#[(s,v,o,sent) for s,v,o,sent in bible_svos if s!=None and (make_sure_predominant_token_tag_combo(*s)==False)]

[(('ye', 'NNP'),
  ('eat', 'eat', 'VB'),
  ('flesh', 'NNP'),
  (('but', 'CC'),
   ('flesh', 'NN'),
   ('with', 'IN'),
   ('the', 'DT'),
   ('life', 'NN'),
   ('thereof', 'RB'),
   (',', ','),
   ('which', 'WDT'),
   ('is', 'VBZ'),
   ('the', 'DT'),
   ('blood', 'NN'),
   ('thereof', 'RB'),
   (',', ','),
   ('shall', 'MD'),
   ('ye', 'NNP'),
   ('not', 'RB'),
   ('eat', 'VB'),
   ('.', '.'))),
 (('lot', 'NNP'),
  ('lifted', 'lift', 'VBD'),
  ('eyes', 'NNP'),
  (('and', 'CC'),
   ('Lot', 'NNP'),
   ('lifted', 'VBD'),
   ('up', 'RP'),
   ('his', 'PRP$'),
   ('eyes', 'NNS'),
   (',', ','),
   ('and', 'CC'),
   ('beheld', 'VBD'),
   ('all', 'PDT'),
   ('the', 'DT'),
   ('plain', 'NN'),
   ('of', 'IN'),
   ('Jordan', 'NNP'),
   (',', ','),
   ('that', 'IN'),
   ('it', 'PRP'),
   ('was', 'VBD'),
   ('well', 'RB'),
   ('watered', 'VBN'),
   ('every', 'DT'),
   ('where', 'WRB'),
   (',', ','),
   ('before', 'IN'),
   ('the', 'DT'),
   ('LORD', 'NNP'),
   ('destroyed', 'VBD'),
   ('Sodom', 'NNP

In [45]:
theos_words

['christ', 'spirit', 'god', 'mary', 'christ', 'spirit', 'lord']

In [48]:
def contains_theos_words(tokens):
    #print(tokens)
    if any([token.lower() in theos_words for token in tokens]):
        return True
    else:
        return False

Filter out ones that are incomplete (no subject, no object) or that seem otherwise undesirable.

In [50]:
#[(s,v,o,sent) for s,v,o,sent in ok_svos if contains_theos_words([token for token,tag in sent])]

In [51]:
ok_svos = bible_svos
ok_svos = [(s,v,o,sent) for s,v,o,sent in ok_svos if s!=None and make_sure_predominant_token_tag_combo(*s,threshold=.2)]
ok_svos = [(s,v,o,sent) for s,v,o,sent in ok_svos if o!=None and o[1] and make_sure_predominant_token_tag_combo(*o,threshold=.2)]
ok_svos = [(s,v,o,sent) for s,v,o,sent in ok_svos if make_sure_predominant_token_tag_combo(v[0],v[2],threshold=.2)]
ok_svos = [(s,v,o,sent) for s,v,o,sent in ok_svos if sent[0][1][:2] not in ["MD","VB","WT"]] ## get rid of those starting with modals etc.
ok_svos = [(s,v,o,sent) for s,v,o,sent in ok_svos if not contains_theos_words([token for token,tag in sent])]
ok_svos = list(set(ok_svos))
len(ok_svos)

775

In [52]:
lemma2svo_phrase = defaultdict(list)

for s,v,o,p in list(set(ok_svos)):
    token,lemma,pos = v
    lemma2svo_phrase[lemma].append(p)

In [53]:
lemma2svo_phrase['forget']

[(('because', 'IN'),
  ('mine', 'NN'),
  ('enemies', 'NNS'),
  ('have', 'VBP'),
  ('forgotten', 'VBN'),
  ('your', 'PRP$'),
  ('words', 'NNS')),
 (('for', 'IN'),
  ('Israel', 'NNP'),
  ('has', 'VBZ'),
  ('forgotten', 'VBN'),
  ('his', 'PRP$'),
  ('maker', 'NN'),
  (',', ','),
  ('and', 'CC'),
  ('buildeth', 'JJ'),
  ('temples', 'NNS'),
  (';', ':'),
  ('and', 'CC'),
  ('Judah', 'NNP'),
  ('has', 'VBZ'),
  ('multiplied', 'VBN'),
  ('fenced', 'JJ'),
  ('cities', 'NNS'),
  (':', ':'),
  ('but', 'CC'),
  ('i', 'PRP'),
  ('will', 'MD'),
  ('send', 'VB'),
  ('a', 'DT'),
  ('fire', 'NN'),
  ('upon', 'IN'),
  ('his', 'PRP$'),
  ('cities', 'NNS'),
  (',', ','),
  ('and', 'CC'),
  ('it', 'PRP'),
  ('shall', 'MD'),
  ('devour', 'VB'),
  ('the', 'DT'),
  ('palaces', 'NNS'),
  ('thereof', 'RB'),
  ('.', '.'))]

In [54]:
with open('bible_lemma2svophrases.json','w') as f:
    json.dump(lemma2svo_phrase,f)

### Extracting Phrases/Clauses

This is a bit confusing, I think.

I want to get prepositional phrases or relative clauses that are children of a target word, a noun or a verb.

In [66]:
def extract_noun_or_verb_prepphrase_or_relclause(tempspacy,targetpos="NOUN",targetextra="prep"):
    """
    """
    pronouns = ['PRP','PRP$']
    tuples = []
    
    for token in tempspacy:
        n = None
        pp = None
        if token.pos_ == targetpos:
            n = token
            children = token.children
            for c in children:
                if c.dep_==targetextra:
                    pp_pos = [i.tag_ for i in list(c.subtree)]
                    if any(pronoun in pp_pos for pronoun in pronouns)==False:
                        pp = [(i.text,i.tag_) for i in list(c.subtree)]
                        pp = [(token.lower(),tag) if not tag.startswith("NNP") else (token,tag) for token,tag in pp]
        if n!=None and pp!=None:
            tuples.append(((n.text,n.tag_),pp))
    return tuples

extract_noun_or_verb_prepphrase_or_relclause(nlp("the apple OF an eye which is here, and I went to the store"))

[(('apple', 'NN'),
  [('of', 'IN'),
   ('an', 'DT'),
   ('eye', 'NN'),
   ('which', 'WDT'),
   ('is', 'VBZ'),
   ('here', 'RB')])]

#### Extract `(VB, prep_phrase)` 

In [67]:
verb_to_prep = list(itertools.chain(*[extract_noun_or_verb_prepphrase_or_relclause(s,targetpos="VERB") for s in spacybiblesents]))

In [68]:
verb_to_prep

[(('created', 'VBD'), [('in', 'IN'), ('the', 'DT'), ('beginning', 'VBG')]),
 (('moved', 'VBD'),
  [('upon', 'IN'),
   ('the', 'DT'),
   ('face', 'NN'),
   ('of', 'IN'),
   ('the', 'DT'),
   ('waters', 'NNS')]),
 (('divided', 'VBD'), [('from', 'IN'), ('the', 'DT'), ('darkness', 'NN')]),
 (('divide', 'VB'), [('from', 'IN'), ('the', 'DT'), ('waters', 'NNS')]),
 (('gathered', 'VBN'), [('unto', 'IN'), ('one', 'CD'), ('place', 'NN')]),
 (('divide', 'VB'), [('from', 'IN'), ('the', 'DT'), ('night', 'NN')]),
 (('give', 'VB'), [('upon', 'IN'), ('the', 'DT'), ('earth', 'NN')]),
 (('set', 'VBD'),
  [('in', 'IN'),
   ('the', 'DT'),
   ('firmament', 'NN'),
   ('of', 'IN'),
   ('the', 'DT'),
   ('heaven', 'NNP')]),
 (('give', 'VB'),
  [('upon', 'IN'), ('the', 'DT'), ('earth', 'NN'), (',', ','), (' ', '_SP')]),
 (('rule', 'VB'),
  [('over', 'IN'),
   ('the', 'DT'),
   ('day', 'NN'),
   ('and', 'CC'),
   ('over', 'IN'),
   ('the', 'DT'),
   ('night', 'NN')]),
 (('divide', 'VB'), [('from', 'IN'), ('the'

In [69]:
ok_verb_to_prep = []

for verb,pp in verb_to_prep:
    if make_sure_predominant_token_tag_combo(*verb):
       # print(pp[-1][1])
        if pp[-1][1].startswith("N"): ##noun
            ok_verb_to_prep.append((verb,tuple(pp)))

In [70]:
len(ok_verb_to_prep)

13645

In [75]:
ok_verb_to_prep = [(verb,tagged_tokens) for verb,tagged_tokens in ok_verb_to_prep if not contains_theos_words([token for token,tag in tagged_tokens])]

In [76]:
ok_verb_to_prep = list(set(ok_verb_to_prep))

In [77]:
len(ok_verb_to_prep)

10267

In [78]:
ok_verb_to_prep[0]

(('go', 'VB'), (('to', 'IN'), ('the', 'DT'), ('high', 'JJ'), ('place', 'NN')))

In [79]:
with open('bible_verb_and_prepphrase.json','w') as f:
    json.dump(ok_verb_to_prep,f)

#### Extract `(NN, rel_clause)`  and `(NN, prep_phrase)`

In [87]:
extract_noun_or_verb_prepphrase_or_relclause(nlp("The man who is sitting there is my friend."),targetextra="relcl")

[(('man', 'NN'),
  [('who', 'WP'), ('is', 'VBZ'), ('sitting', 'VBG'), ('there', 'EX')])]

In [88]:
noun_to_prep = list(itertools.chain(*[extract_noun_or_verb_prepphrase_or_relclause(s) for s in spacybiblesents]))

In [89]:
noun_to_recl = list(itertools.chain(*[extract_noun_or_verb_prepphrase_or_relclause(s,targetextra="relcl") for s in spacybiblesents]))

In [90]:
noun_to_prep_recl = noun_to_prep + noun_to_recl

In [91]:
ok_noun_to_prep_recl = []

for noun,phrase in noun_to_prep_recl:
    if make_sure_predominant_token_tag_combo(*noun):
        if phrase[-1][1].startswith("N"): ##noun
            ok_noun_to_prep_recl.append((noun,tuple(phrase)))

In [92]:
ok_noun_to_prep_recl = [(noun,tagged_tokens) for noun,tagged_tokens in ok_noun_to_prep_recl if not contains_theos_words([token for token,tag in tagged_tokens])]

In [93]:
ok_noun_to_prep_recl = list(set(ok_noun_to_prep_recl))

In [94]:
len(ok_noun_to_prep_recl)

14497

In [95]:
with open('bible_nouns_and_prep_or_reclphrase.json','w') as f:
    json.dump(ok_noun_to_prep_recl,f)

***
