In [None]:
!pip install -q hashstash[rec] stanza

In [None]:
import stanza
stanza.Pipeline()

In [None]:
NLP = None
def get_nlp():
    global NLP
    if NLP is None:
        NLP = stanza.Pipeline(lang='en', processors='tokenize,mwt,pos,lemma,ner,depparse,constituency', verbose=0)
    return NLP

In [None]:
# !unzip -n osp_slices_1000.zip
# !mkdir -p /root/.cache/hashstash
# !rm -rf /root/.cache/hashstash/osp_slices_1000
# !mv osp_slices_1000 /root/.cache/hashstash

In [None]:
from hashstash import HashStash


input_stash = HashStash('osp_slices_1000')
input_stash

Config,Param,Value
Path,Root Dir,/root/.cache/hashstash/osp_slices_1000
,Filename,data.db
Engine,Engine,lmdb
,Serializer,hashstash
,Compress,lz4
,B64,True
Stats,Len,57620


In [None]:
import string
def remove_left_right_punct(text):
    """
    Removes leading and trailing punctuation from a string.
    """
    return text.strip(string.punctuation)


In [None]:
output_stash = HashStash('osp_slices_1000_nlp')
output_stash


In [None]:
text_id,slice_d = next(input_stash.items())
slice_id, slice_txt = list(slice_d.items())[0]
# slice_id, slice_txt

In [None]:
def gen_nlp_doc(txt, key, force=False, stash=output_stash):
    if force or key not in stash:
        nlp = get_nlp()
        doc = nlp(txt)
        stash[key] = doc.to_serialized()
        return doc

In [None]:
# gen_nlp_doc(slice_txt, f'{text_id}__{int(slice_id):02d}')

In [None]:
from tqdm import tqdm

In [None]:
inps = [(f'{text_id}__{int(slice_id):02d}', _) for text_id, slice_d in tqdm(input_stash.items(),total=len(input_stash)) for slice_id, _ in slice_d.items()]
inps[0]

100%|██████████| 57620/57620 [00:16<00:00, 3412.85it/s]


('phil/10.2307/45238188__01', 'My argument has three steps.\nFirst, I argue that proponents of conciliatory policies have good reason to affirm a view that I call "instrumentalism," a view that commends treating our doxastic inclinations like instrumental readouts.\nSecond, I show that instrumentalism supplies a basis for demanding conciliatory requirements in superficial disagreements but not in fundamental disagreements.\nThird, I argue that the frequently invoked "independence" principle, which arguably would require significant conciliation in fundamental disputes, is unmotivated in light of the explanatory power of instrumentalism.\nThe most plausible conciliatory view, then, is a weak conciliationism that features instrumentalism rather than independence as the central principle, and that therefore gives us a principled basis for thinking that fundamental disagreements should occasion less doxastic revision than shallow disagreements.\nIn the course of developing this explanation

In [722]:
import random
random.shuffle(inps)

In [723]:
for key, txt in tqdm(inps):
    gen_nlp_doc(txt, key)

  0%|          | 269/186708 [07:38<88:17:57,  1.70s/it]


KeyboardInterrupt: 

In [None]:
# !zip -r -q osp_slices_1000_nlp.zip /root/.cache/hashstash/osp_slices_1000_nlp

In [None]:
key,docstr = next(output_stash.items())
doc = stanza.Document.from_serialized(docstr)
word = doc.sentences[1].words[5]
key

'phil/10.2307/20140633__02'

In [None]:
def get_word_context(doc, sent_i, word_i, context_len=2):
    sent = doc.sentences[sent_i]

    prev_context=''
    next_context=''

    words_forward=sent.words[word_i+1:]
    words_backward=reversed(sent.words[:word_i])
    for w in words_forward:
        if len(next_context) < context_len:
            next_context+=w.text+' '
        else:
            break
    for w in words_backward:
        if len(prev_context) < context_len:
            prev_context=w.text+' '+prev_context+' '
        else:
            break

    word = sent.words[word_i]
    out = f'{prev_context.strip()} {word.text.upper()} {next_context.strip()}'
    out = out.replace('\n',' ').replace(' ,',',').replace(' .', '.').replace(' !','!').replace(' ?','?')
    out = out.replace('( ','(').replace('[ ','[').strip().replace(' )',')').replace(' ]',']')
    return remove_left_right_punct(out).strip()

In [None]:
# get_word_context(doc, 2, 12)

In [None]:
# def get_token_eg(doc,word):
#     return

In [740]:


def get_pos_counts(doc, feat2word2count=None, feat2word2eg=None):
    pos_counts = Counter()
    deprel_counts = Counter()
    for sent_i,sent in enumerate(doc.sentences):
        for word_i,word in enumerate(sent.words):
            pos_counts[word.pos]+=1
            deprel_counts[word.deprel]+=1

            eg_word = word.text.lower()
            if feat2word2count is not None:
                feat2word2count[word.deprel][eg_word]+=1
                feat2word2count[word.pos][eg_word]+=1

            if feat2word2eg is not None:
                eg_context = get_word_context(doc, sent_i, word_i, context_len=context_len).strip()
                feat2word2eg[word.deprel][eg_word] = eg_context
                feat2word2eg[word.pos][eg_word] = eg_context

    sum_pos_counts = sum(pos_counts.values())
    # print(sum_pos_counts)
    pos_counts_rel = {k:int(round(v/sum_pos_counts*1000)) for k,v in pos_counts.items()}

    sum_deprel_counts = sum(deprel_counts.values())
    deprel_counts_rel = {k:int(round(v/sum_deprel_counts*1000)) for k,v in deprel_counts.items()}

    return {**pos_counts_rel,**deprel_counts_rel}



In [741]:


from collections import Counter, defaultdict
feat2word2count = defaultdict(Counter)
disc2feat2word2count = {
    'phil': defaultdict(Counter),
    'lit': defaultdict(Counter)
}

disc2feat2word2eg={
    'phil': defaultdict(dict),
    'lit': defaultdict(dict)
}

context_len = 15


id2counts = {}

for id,docstr in tqdm(output_stash.items(),total=len(output_stash)):
    if id not in id2counts:
        doc = stanza.Document.from_serialized(docstr)
        disc = id.split('/')[0]
        feat2word2count = disc2feat2word2count[disc]
        feat2word2eg = disc2feat2word2eg[disc]
        id2counts[id] = get_pos_counts(doc, feat2word2count=feat2word2count, feat2word2eg=feat2word2eg)
    if len(id2counts) > 1000:
        break

 36%|███▌      | 1000/2773 [01:07<01:59, 14.85it/s]


In [742]:
# get_pos_counts(doc)

In [745]:
FEAT_N = 100
FEAT_MIN_COUNT = 0

def get_egs(word2count, n = FEAT_N, min_count=FEAT_MIN_COUNT, word2eg={}):
    total = word2count.total()
    o = []
    for w,c in word2count.most_common():
        if n and len(o) >= n:
            break

        c = int(round(c/total*1000))
        if c >= min_count:
            # o.append(f'{w} ({c})')
            eg = word2eg.get(w)
            if eg:
                o.append(f'{w.upper()} ({c}) ["{eg}"]')
            else:
                o.append(f'{w} ({c})')
            n-=1
        else:
            break
        # o.append(w)
    return '; '.join(o)

def get_egs_feat(feat, disc='phil', n=FEAT_N, min_count=FEAT_MIN_COUNT, incl_egs=False):
    feat2word2count = disc2feat2word2count[disc]
    feat2word2eg = disc2feat2word2eg[disc]
    word2count = feat2word2count[feat]
    word2eg = feat2word2eg[feat]
    return get_egs(word2count, n=n, min_count=min_count, word2eg=word2eg if incl_egs else {})


In [746]:
# get_egs_feat('nsubj', disc='phil')

In [747]:
# get_egs_feat('amod', disc='lit',incl_egs=True,n=20)

In [749]:
import pandas as pd

pd.options.display.max_colwidth=None

df = pd.DataFrame(id2counts).T.rename_axis('id')
df['_target'] = [i.split('/')[0] for i in df.index]
# df.columns = [f'{x} ({get_egs(feat2word2count[x])})' for x in df]
df

Unnamed: 0_level_0,PRON,AUX,VERB,CCONJ,ADP,PUNCT,NOUN,ADV,PROPN,PART,...,vocative,advcl:relcl,nmod:unmarked,iobj,csubj:outer,dislocated,csubj:pass,orphan,goeswith,_target
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
phil/10.2307/20140633__02,88.0,66.0,99.0,34.0,115.0,98.0,189.0,47.0,25.0,26.0,...,,,,,,,,,,phil
phil/10.2307/43921200__10,7.0,6.0,16.0,42.0,78.0,234.0,204.0,6.0,258.0,6.0,...,,,,,,,,,,phil
phil/10.2307/26706722__04,38.0,76.0,78.0,21.0,110.0,121.0,215.0,64.0,18.0,19.0,...,,,,,,,,,,phil
phil/10.2307/2180906__06,46.0,51.0,88.0,30.0,134.0,137.0,198.0,44.0,44.0,19.0,...,,,,,,,,,,phil
lit/469190__01,62.0,59.0,82.0,38.0,111.0,132.0,210.0,35.0,8.0,24.0,...,,,,,,,,,,lit
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
phil/10.2307/42971411__08,45.0,68.0,93.0,34.0,95.0,137.0,239.0,34.0,9.0,27.0,...,,2.0,1.0,,,,,,,phil
lit/510130__02,58.0,59.0,95.0,28.0,130.0,133.0,173.0,47.0,49.0,28.0,...,,2.0,,2.0,,,,,,lit
lit/3713731__02,72.0,63.0,103.0,33.0,104.0,126.0,135.0,27.0,100.0,29.0,...,1.0,,,,,,,,,lit
lit/460335__01,49.0,46.0,70.0,43.0,126.0,174.0,198.0,34.0,35.0,23.0,...,,,,1.0,,,,,,lit


In [750]:
df_means = df.groupby('_target').mean()
feat2grp2mean = df_means.to_dict()

ld = []
feat1 = None
for feat in feat2grp2mean:

    feat_d = {'feat':feat}
    grp2mean = feat2grp2mean[feat]
    grp1 = None
    for grp,score in grp2mean.items():
        feat_d[grp+'_egs'] = get_egs_feat(feat, disc=grp, min_count=0, n=25, incl_egs=False)
        feat_d[grp+'_egs2'] = get_egs_feat(feat, disc=grp, n=25, incl_egs=True)

        feat_d[grp]=score

    grps = list(grp2mean.keys())
    if len(grps)>1:
        grp1=grps[0]
        for grp2 in grps[1:]:
            feat_d[f'{grp2}-{grp1}'] = feat_d[grp2] - feat_d[grp1]

    # feat1 = feat
    ld.append(feat_d)
odf = pd.DataFrame(ld).dropna().sort_values('phil-lit',ascending=False).set_index('feat')
for c in odf:
    if not '_egs' in c:
        odf[c] = odf[c].apply(round).apply(int).astype(int)
odf = odf[['phil','lit','phil-lit','phil_egs','lit_egs','phil_egs2','lit_egs2']]

In [753]:
# @title Most philosophy
odf.sort_values('phil-lit', ascending=False).head(10)

Unnamed: 0_level_0,phil,lit,phil-lit,phil_egs,lit_egs,phil_egs2,lit_egs2
feat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AUX,65,47,18,is (323); be (139); are (104); can (71); would (36); will (34); may (30); does (30); have (27); must (21); has (20); was (19); being (18),is (291); be (99); are (80); was (75); have (50); can (40); has (37); been (33); would (33); may (30); were (29); will (26); had (24),"IS (323) [""It IS of course being""]; BE (139) [""it may still BE true that different""]; ARE (104) [""A few facts ARE nevertheless known""]; CAN (71) [""We CAN distinguish them""]; WOULD (36) [""ghetto problems WOULD be solved if ghetto""]; WILL (34) [""a septic tank WILL not dry up, collect""]; MAY (30) [""For it MAY still be true that""]; DOES (30) [""pronominalization DOES not cause any special""]; HAVE (27) [""I HAVE located only two""]; MUST (21) [""Instead, we MUST allow branching""]; HAS (20) [""every critic HAS reviewed at least""]; WAS (19) [""if the message WAS completely destroyed""]; BEING (18) [""is of course BEING assumed here that""]","IS (291) [""final effect IS coalescence""]; BE (99) [""He might also BE taxed with setting""]; ARE (80) [""and the Fool ARE the sublimest instance""]; WAS (75) [""He WAS perhaps rashly""]; HAVE (50) [""the longer they HAVE been kept asunder""]; CAN (40) [""Espronceda CAN scarcely have had""]; HAS (37) [""Shakespeare or Milton HAS achieved "" a deeper""]; BEEN (33) [""longer they have BEEN kept asunder""]; WOULD (33) [""characters, WOULD be false and sentimental""]; MAY (30) [""incongruities MAY lend even to tragic""]; WERE (29) [""Verdugo, which WERE duly published""]; WILL (26) [""professional satirist WILL naturally insinuate""]; HAD (24) [""that Hazlitt HAD analyzed in more""]"
NOUN,196,180,16,theory (8); case (7); sense (7); way (7); fact (6); truth (6); knowledge (5); view (5); world (5); question (5); argument (5); belief (4); time (4),life (6); time (6); world (5); way (5); man (4); work (4); sense (4); play (4); language (4); self (4); form (4); fact (4); history (3),"THEORY (8) [""known as the THEORY of finite partially""]; CASE (7) [""not always the CASE in real games""]; SENSE (7) [""latter in the SENSE that the move connected""]; WAY (7) [""obtainable in this WAY, with the obvious""]; FACT (6) [""It is in FACT easy to find any""]; TRUTH (6) [""there is one TRUTH, which is supposed""]; KNOWLEDGE (5) [""have detailed KNOWLEDGE of anything specific""]; VIEW (5) [""theoretical point of VIEW""]; WORLD (5) [""considering a WORLD co and a sentence""]; QUESTION (5) [""It is a QUESTION of the locus of""]; ARGUMENT (5) [""This ARGUMENT does not consider""]; BELIEF (4) [""is that its BELIEF in tigers provide""]; TIME (4) [""in point of TIME, first comes to""]","LIFE (6) [""observation on human LIFE, without elevation""]; TIME (6) [""at the same TIME bring out "" with""]; WORLD (5) [""result is a WORLD of "" seeming anal""]; WAY (5) [""kind, in one WAY or another, limits""]; MAN (4) [""substitution of a MAN for a doomed friend""]; WORK (4) [""In this WORK "" the scenes""]; SENSE (4) [""achieved "" a deeper SENSE than others of""]; PLAY (4) [""As the PLAY opens Dick Dudgeon""]; LANGUAGE (4) [""Virgil in any LANGUAGE""]; SELF (4) [""to save the SELF - sacrificing friend""]; FORM (4) [""in abstract FORM) the climax of""]; FACT (4) [""In FACT, the evident differences""]; HISTORY (3) [""with the later HISTORY of some of the""]"
mark,46,32,14,to (332); that (264); if (80); as (68); of (39); in (28); for (25); because (22); by (22); whether (19); since (14); while (10); though (10),to (381); that (204); as (95); if (47); of (39); in (30); by (23); for (23); because (21); while (16); though (15); than (11); although (10),"TO (332) [""writer they hat TO pronounce themselves""]; THAT (264) [""still be true THAT different writers""]; IF (80) [""However, IF the requirement""]; AS (68) [""degrees, as long AS for each separate""]; OF (39) [""interesting possibility OF characterizing""]; IN (28) [""specialization consists IN maintaining a contextual""]; FOR (25) [""FOR it may still be""]; BECAUSE (22) [""preserve meaning BECAUSE they serve to introduce""]; BY (22) [""however, BY considering the""]; WHETHER (19) [""to ascertain WHETHER or not this fact""]; SINCE (14) [""SINCE Shannon 's theory""]; WHILE (10) [""contextual view WHILE focusing on specifics""]; THOUGH (10) [""classificational thinking, THOUGH it is based on""]","TO (381) [""their union seem TO become""]; THAT (204) [""recognizes the power THAT witty incongruities""]; AS (95) [""either case, AS Hazlitt realizes""]; IF (47) [""feelings which, IF ascribed realistically""]; OF (39) [""the power OF calling up images""]; IN (30) [""is important IN creating the final""]; BY (23) [""poetry falls short BY offering a limited""]; FOR (23) [""passionate ; FOR, to communicate""]; BECAUSE (21) [""poetry of wit BECAUSE it is objective""]; WHILE (16) [""Wherein, WHILE Kiddy unawares""]; THOUGH (15) [""applause, and, THOUGH he could see nothing""]; THAN (11) [""gained much more THAN the army lost""]; ALTHOUGH (10) [""analysis of Lear, ALTHOUGH again the final""]"
SCONJ,30,20,11,that (407); if (122); as (105); of (56); in (38); because (33); for (33); by (32); whether (29); since (21); while (16); though (15); without (10),that (340); as (160); if (78); of (63); in (44); by (36); because (34); for (31); while (26); though (25); although (16); since (16); whether (15),"THAT (407) [""still be true THAT different writers""]; IF (122) [""However, IF the requirement""]; AS (105) [""degrees, as long AS for each separate""]; OF (56) [""interesting possibility OF characterizing""]; IN (38) [""specialization consists IN maintaining a contextual""]; BECAUSE (33) [""preserve meaning BECAUSE they serve to introduce""]; FOR (33) [""FOR it may still be""]; BY (32) [""however, BY considering the""]; WHETHER (29) [""to ascertain WHETHER or not this fact""]; SINCE (21) [""SINCE Shannon 's theory""]; WHILE (16) [""contextual view WHILE focusing on specifics""]; THOUGH (15) [""classificational thinking, THOUGH it is based on""]; WITHOUT (10) [""make a move WITHOUT knowing the outcome""]","THAT (340) [""recognizes the power THAT witty incongruities""]; AS (160) [""either case, AS Hazlitt realizes""]; IF (78) [""feelings which, IF ascribed realistically""]; OF (63) [""the power OF calling up images""]; IN (44) [""is important IN creating the final""]; BY (36) [""poetry falls short BY offering a limited""]; BECAUSE (34) [""poetry of wit BECAUSE it is objective""]; FOR (31) [""was prosecuted FOR mildly advocating""]; WHILE (26) [""Wherein, WHILE Kiddy unawares""]; THOUGH (25) [""applause, and, THOUGH he could see nothing""]; ALTHOUGH (16) [""analysis of Lear, ALTHOUGH again the final""]; SINCE (16) [""of greatness, SINCE it re""]; WHETHER (15) [""WHETHER or not readers""]"
cop,27,18,9,is (590); are (161); be (157); being (27); was (26); were (15); been (10); am (7); 's (4); s (1); 'm (0); r (0); 're (0),is (541); are (124); was (117); be (105); were (38); been (29); being (20); am (11); 's (9); 're (2); 'd (1); 'm (1); tis (0),"IS (590) [""It IS in fact easy to""]; ARE (161) [""semantical games ARE games with perfect""]; BE (157) [""it may still BE true that different""]; BEING (27) [""structure as BEING more than a simple""]; WAS (26) [""settle now what WAS yesterday in no""]; WERE (15) [""For, if this WERE all, it would""]; BEEN (10) [""least Frege have BEEN well aware that""]; AM (7) [""definite * I AM grateful to Alan""]; 'S (4) [""other hand, it 'S plausible that""]; S (1) [""It S logic of understanding""]; 'M (0) [""you say, ' I 'M here, ' at place""]; R (0) [""be somethin, R, however indistinct""]; 'RE (0) [""of world we 'RE in we are epistemically""]","IS (541) [""final effect IS coalescence""]; ARE (124) [""and the Fool ARE the sublimest instance""]; WAS (117) [""He WAS perhaps rashly""]; BE (105) [""characters, would BE false and sentimental""]; WERE (38) [""inferior novels WERE precisely those""]; BEEN (29) [""expulsion must have BEEN painful to his""]; BEING (20) [""BEING what he is, he""]; AM (11) [""I AM not interested""]; 'S (9) [""No : that 'S a very pretty reason""]; 'RE (2) [""was alone You 'RE either or neither""]; 'D (1) [""Fielding 's, call 'D The Modern Husband""]; 'M (1) [""say ; but I 'M not so modest as""]; TIS (0) [""lords, ' TIS the first time""]"
nsubj,67,58,9,we (92); it (71); i (63); that (47); he (36); they (35); which (31); this (26); one (23); who (12); what (9); you (8); she (6),he (84); it (59); i (54); we (43); that (40); they (34); which (31); who (26); she (20); this (15); one (13); you (10); what (7),"WE (92) [""WE can distinguish""]; IT (71) [""What IT is can be seen""]; I (63) [""I have located only""]; THAT (47) [""expression types THAT have no typereference""]; HE (36) [""the strategies HE has available""]; THEY (35) [""particular writer THEY hat to pronounce""]; WHICH (31) [""quantifier sentences WHICH do not have any""]; THIS (26) [""THIS is not always the""]; ONE (23) [""ONE does not evaluate""]; WHO (12) [""intellectuals WHO advocate that contraceptives""]; WHAT (9) [""from knowing WHAT has happened at""]; YOU (8) [""Suppose YOU want to know where""]; SHE (6) [""expressions, then SHE can plausibly insist""]","HE (84) [""but even so HE clearly recognizes""]; IT (59) [""IT is their "" circumstances""]; I (54) [""sublimest instance I know of passion""]; WE (43) [""WE can not expect""]; THAT (40) [""circumstances "" THAT are dissimilar""]; THEY (34) [""THEY startle, and take""]; WHICH (31) [""and feelings WHICH, if ascribed realistically""]; WHO (26) [""To the poet WHO would find materials""]; SHE (20) [""SHE married Narciso""]; THIS (15) [""Hazlitt that THIS is "" neither wit""]; ONE (13) [""ONE is inclined to""]; YOU (10) [""he was alone YOU 're either or neither""]; WHAT (7) [""WHAT Hazlitt means by""]"
advmod,55,47,8,not (161); only (35); so (33); then (30); when (26); more (25); also (23); even (22); however (19); thus (18); just (17); how (16); now (15),not (119); more (35); only (33); so (32); when (29); also (27); even (23); then (18); most (17); however (16); here (13); now (13); too (13),"NOT (161) [""each writer a NOT unimaginable state""]; ONLY (35) [""have located ONLY two other substantial""]; SO (33) [""our game rules SO far formulated""]; THEN (30) [""THEN he has to make""]; WHEN (26) [""later on, WHEN Merleau - Ponty""]; MORE (25) [""quantifiers and, MORE generally, partially""]; ALSO (23) [""unfeasible but ALSO unnecessary""]; EVEN (22) [""who can not EVEN relate to specialists""]; HOWEVER (19) [""intuitively, HOWEVER, by considering""]; THUS (18) [""marry old men, THUS reducing the birth""]; JUST (17) [""Instead of JUST one "" world """"]; HOW (16) [""clear about just HOW deficient the internal""]; NOW (15) [""strategies are NOW defined by functions""]","NOT (119) [""effect but is NOT part of that effect""]; MORE (35) [""likeness is made MORE dazzling by their""]; ONLY (33) [""Polarities not ONLY attract but may""]; SO (32) [""poems ; but even SO he clearly recognizes""]; WHEN (29) [""rashly enthusiastic WHEN, in speaking of""]; ALSO (27) [""He might ALSO be taxed with setting""]; EVEN (23) [""incongruities may lend EVEN to tragic poetry""]; THEN (18) [""surrender, and were THEN murdered""]; MOST (17) [""unfolding the MOST tremendous sufferings""]; HOWEVER (16) [""It is wit, HOWEVER, that Hazlitt""]; HERE (13) [""not in question HERE, all this talk""]; NOW (13) [""that what we NOW think to be liis""]; TOO (13) [""Characters, TOO, may restrict""]"
VERB,96,88,8,have (21); is (15); has (12); say (11); given (9); do (8); make (8); see (8); are (8); think (7); know (7); seems (7); take (5),have (9); is (9); see (8); made (8); make (7); has (7); seems (7); had (6); found (5); say (5); know (4); do (4); find (4),"HAVE (21) [""which do not HAVE any firstorder""]; IS (15) [""that there IS a culture in which""]; HAS (12) [""Then he HAS to make a move""]; SAY (11) [""majority rule system SAY that it is better""]; GIVEN (9) [""referent of a GIVEN token""]; DO (8) [""themselves, DO the work traditionally""]; MAKE (8) [""Then he has to MAKE a move without""]; SEE (8) [""it 's hard to SEE how Katz 's observations""]; ARE (8) [""There ARE too many intellectuals""]; THINK (7) [""but I can not THINK of any off - hand""]; KNOW (7) [""always comes « to KNOW, and never forgets""]; SEEMS (7) [""indeterminacy, SEEMS to open a door""]; TAKE (5) [""a game might TAKE us successively""]","HAVE (9) [""other hand, we HAVE no difficulty in""]; IS (9) [""and there IS said to have written""]; SEE (8) [""though he could SEE nothing to praise""]; MADE (8) [""Their likeness is MADE more dazzling by""]; MAKE (7) [""temptation to MAKE use of this jest""]; HAS (7) [""this country HAS another copy""]; SEEMS (7) [""literary success SEEMS to have encouraged""]; HAD (6) [""lobster even HAD to be cooked""]; FOUND (5) [""He FOUND shelter in the""]; SAY (5) [""which is to SAY, the forces that""]; KNOW (4) [""sublimest instance I KNOW of passion and""]; DO (4) [""he was to DO so in Paris""]; FIND (4) [""poet who would FIND materials sig""]"
ADJ,84,78,6,other (24); such (23); true (15); same (13); different (13); moral (11); possible (10); first (10); certain (10); particular (9); more (8); general (8); many (8),other (20); own (15); first (13); such (12); new (11); same (10); more (10); many (8); literary (8); human (8); great (7); different (6); english (6),"OTHER (24) [""located only two OTHER substantial studies""]; SUCH (23) [""Examples of SUCH ethnocentricism""]; TRUE (15) [""may still be TRUE that different""]; SAME (13) [""something with the SAME form as the following""]; DIFFERENT (13) [""efforts to entirely DIFFERENT degrees, as long""]; MORAL (11) [""type of rational MORAL theory (which""]; POSSIBLE (10) [""attention to the POSSIBLE underdeterminacy""]; FIRST (10) [""them go beyond FIRST - order logic""]; CERTAIN (10) [""has happened at CERTAIN earlier moves""]; PARTICULAR (9) [""effort of that PARTICULAR writer they hat""]; MORE (8) [""be applied in MORE than one order""]; GENERAL (8) [""suggested by our GENERAL game - theoretical""]; MANY (8) [""There are too MANY intellectuals who""]","OTHER (20) [""distance from each OTHER""]; OWN (15) [""shapings of his OWN fancy "" ; and his""]; FIRST (13) [""excluded from the FIRST rank of greatness""]; SUCH (12) [""seeke to garnish SUCH Gorgonlike shapes""]; NEW (11) [""recognize the NEW French monarchy""]; SAME (10) [""prisoner in the SAME instant""]; MORE (10) [""had analyzed in MORE detail the "" exquisiteness""]; MANY (8) [""deeply about MANY things immediately""]; LITERARY (8) [""His LITERARY success seems to""]; HUMAN (8) [""observation on HUMAN life, without""]; GREAT (7) [""bring out "" with GREAT felicity""]; DIFFERENT (6) [""fundamentally DIFFERENT in the two accounts""]; ENGLISH (6) [""this use of ' ENGLISH Seneca ' and the""]"
aux,24,18,6,can (190); would (96); will (89); may (81); does (73); have (72); must (56); has (52); should (48); might (45); do (43); could (38); is (23),have (130); can (103); has (98); would (84); may (77); will (65); had (62); does (47); must (45); could (39); might (35); should (33); is (29),"CAN (190) [""We CAN distinguish them""]; WOULD (96) [""ghetto problems WOULD be solved if ghetto""]; WILL (89) [""a septic tank WILL not dry up, collect""]; MAY (81) [""For it MAY still be true that""]; DOES (73) [""pronominalization DOES not cause any special""]; HAVE (72) [""I HAVE located only two""]; MUST (56) [""Instead, we MUST allow branching""]; HAS (52) [""every critic HAS reviewed at least""]; SHOULD (48) [""then a Fregean SHOULD analyze proper""]; MIGHT (45) [""round of a game MIGHT take us successively""]; DO (43) [""sentences which DO not have any firstorder""]; COULD (38) [""a philosopher COULD recognize as meaning""]; IS (23) [""beyond what IS to be gleaned from""]","HAVE (130) [""the longer they HAVE been kept asunder""]; CAN (103) [""Espronceda CAN scarcely have had""]; HAS (98) [""Shakespeare or Milton HAS achieved "" a deeper""]; WOULD (84) [""characters, WOULD be false and sentimental""]; MAY (77) [""incongruities MAY lend even to tragic""]; WILL (65) [""professional satirist WILL naturally insinuate""]; HAD (62) [""that Hazlitt HAD analyzed in more""]; DOES (47) [""more intimate DOES their union seem""]; MUST (45) [""His expulsion MUST have been painful""]; COULD (39) [""though he COULD see nothing to""]; MIGHT (35) [""He MIGHT also be taxed with""]; SHOULD (33) [""translation ; what SHOULD one do with them""]; IS (29) [""be sure nobody IS demanding a text""]"


In [754]:
# @title Most lit
odf.sort_values('phil-lit', ascending=True).head(25)

Unnamed: 0_level_0,phil,lit,phil-lit,phil_egs,lit_egs,phil_egs2,lit_egs2
feat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
PROPN,21,53,-33,s (13); kant (12); god (10); f (10); aristotle (9); c (8); t (8); a (8); john (7); hume (6); g (6); frege (6); husserl (6),de (10); shakespeare (5); mr. (5); god (4); la (4); england (4); john (3); james (3); king (3); english (3); london (3); m. (3); spenser (3),"S (13) [""semantic question (S) is what Dretske""]; KANT (12) [""KANT 's dictum affirming""]; GOD (10) [""the laws of GOD, and so forth""]; F (10) [""functional question (F""]; ARISTOTLE (9) [""which reinforces ARISTOTLE 's remark that""]; C (8) [""the final C major chord a part""]; T (8) [""D, P, T and R""]; A (8) [""the fact that A has the concept""]; JOHN (7) [""wrinkling of JOHN 's brow is a part""]; HUME (6) [""In this sense HUME is right when he""]; G (6) [""deduced, only G — del sentences""]; FREGE (6) [""Fregean mindful of FREGE 's reflections""]; HUSSERL (6) [""influenced by HUSSERL 's Logical Investigations""]","DE (10) [""Yenes, Calle DE Segovia, Nim""]; SHAKESPEARE (5) [""SHAKESPEARE 's images are still""]; MR. (5) [""Fielding 's play MR. Modern twice suggests""]; GOD (4) [""the theme of GOD 's relation to""]; LA (4) [""En LA Imprenta de Yenes""]; ENGLAND (4) [""been sweeping ENGLAND""]; JOHN (3) [""epigraph from St. JOHN of the Cross""]; JAMES (3) [""sometime neighbor JAMES H. Sledd that our""]; KING (3) [""Lying Valet. KING Henry VIII - Ditto""]; ENGLISH (3) [""arguing that the ENGLISH are more free than""]; LONDON (3) [""that envoys from LONDON visited Teresa""]; M. (3) [""p. ; John M. Headley, e Problem""]; SPENSER (3) [""observation and SPENSER 's fable""]"
X,7,38,-31,de (47); en (31); van (28); een (21); het (20); s (17); et (16); in (13); dat (13); die (13); zijn (9); te (9); met (8),de (35); et (32); la (21); le (16); a (13); que (11); les (10); en (10); des (9); il (9); qui (8); ne (7); un (6),"DE (47) [""la psychologie DE la forme, "" serves""]; EN (31) [""gist et dort, EN ce sollier Qu""]; VAN (28) [""het systeem VAN Kant nauwkeurig""]; EEN (21) [""samenvatting van EEN zeker complex van""]; HET (20) [""vergelijken met HET zoo wonderlijk""]; S (17) [""S or T or Y S, T and Y are compatible""]; ET (16) [""Finally, Dawid ET al""]; IN (13) [""categories at IN de ente, q. Specifically""]; DAT (13) [""en gevolgen, DAT zeer eigenaardige""]; DIE (13) [""Einführung in DIE symbolische Logik""]; ZIJN (9) [""het van belang ZIJN, het systeem van""]; TE (9) [""Kant nauwkeurig TE vergelijken met""]; MET (8) [""te vergelijken MET het zoo wonderlijk""]","DE (35) [""Abrasados en sed DE venganza, Odio""]; ET (32) [""plurimum quoque jure ET merito tribuit""]; LA (21) [""guerreros, De LA patria sosten y""]; LE (16) [""Dans LE troisisme il traite""]; A (13) [""a questi et A quelli""]; QUE (11) [""n'est proprement QUE la pratique de""]; LES (10) [""tels qu'il LES voit), relying""]; EN (10) [""Abrasados EN sed de venganza""]; DES (9) [""laquelle il don ne DES details tres instructifs""]; IL (9) [""sur laquelle IL don ne des details""]; QUI (8) [""negotium, quippe QUI praefecto oratori""]; NE (7) [""laquelle il don NE des details tres""]; UN (6) [""quasi nate ad UN parto (vol""]"
flat,6,27,-21,de (31); van (24); en (18); een (15); het (14); in (10); dat (8); die (8); zijn (8); j. (7); e (7); te (7); met (6),de (41); la (19); le (12); a (11); et (10); que (8); les (8); des (7); en (7); il (6); qui (6); ne (5); e (5),"DE (31) [""la psychologie DE la forme, "" serves""]; VAN (24) [""As Tim VAN Gelder and Robert""]; EN (18) [""gist et dort, EN ce sollier Qu""]; EEN (15) [""samenvatting van EEN zeker complex van""]; HET (14) [""vergelijken met HET zoo wonderlijk""]; IN (10) [""intellectum est IN intelligente immaterialiter""]; DAT (8) [""en gevolgen, DAT zeer eigenaardige""]; DIE (8) [""Hegel "" erfasst DIE Arbeit als das""]; ZIJN (8) [""het van belang ZIJN, het systeem van""]; J. (7) [""OF PROHIBITION J. ELLIOT ROSS""]; E (7) [""of T evidence E is produced in""]; TE (7) [""Kant nauwkeurig TE vergelijken met""]; MET (6) [""te vergelijken MET het zoo wonderlijk""]","DE (41) [""Yenes, Calle DE Segovia, Nim""]; LA (19) [""En LA Imprenta de Yenes""]; LE (12) [""Dans LE troisisme il traite""]; A (11) [""a questi et A quelli""]; ET (10) [""l'invention des choses ET de leur disposition""]; QUE (8) [""n'est proprement QUE la pratique de""]; LES (8) [""tels qu'il LES voit), relying""]; DES (7) [""laquelle il don ne DES details tres instructifs""]; EN (7) [""Abrasados EN sed de venganza""]; IL (6) [""sur laquelle IL don ne des details""]; QUI (6) [""negotium, quippe QUI praefecto oratori""]; NE (5) [""laquelle il don NE des details tres""]; E (5) [""M. Headley, E Problem of Counsel""]"
case,107,118,-12,of (338); in (161); to (87); for (59); with (41); by (40); as (38); on (35); 's (32); from (27); at (21); about (16); between (14),of (323); in (159); to (82); 's (68); for (44); with (43); by (40); as (39); on (31); from (30); at (23); into (12); between (11),"OF (338) [""earliest effort OF that particular""]; IN (161) [""branching quantifiers IN perfectly grammatical""]; TO (87) [""recent efforts TO entirely different""]; FOR (59) [""as long as FOR each separate writer""]; WITH (41) [""strategies which deals WITH the applications""]; BY (40) [""least one book BY each writer a not""]; AS (38) [""them, is known AS the theory of finite""]; ON (35) [""Skolem functions ON all earlier universally""]; 'S (32) [""Moreover, Lakoff 'S constraints are""]; FROM (27) [""can be seen FROM the idea that in""]; AT (21) [""has reviewed AT least one book""]; ABOUT (16) [""nevertheless known ABOUT f.p.o""]; BETWEEN (14) [""omniting "" and "" BETWEEN conjuncts other""]","OF (323) [""sufferings, and OF burlesque on passion""]; IN (159) [""concerned with IN his analysis of""]; TO (82) [""may lend even TO tragic poetry""]; 'S (68) [""In Dr. Johnson 'S words, these im""]; FOR (44) [""difficult standard, FOR a comparison without""]; WITH (43) [""passion playing WITH it, aiding and""]; BY (40) [""more dazzling BY their novelty""]; AS (39) [""of the poem AS a whole, and admires""]; ON (31) [""of burlesque ON passion playing""]; FROM (30) [""greatest distance FROM each other""]; AT (23) [""although "" placed AT the greatest distance""]; INTO (12) [""flung himself INTO the political fray""]; BETWEEN (11) [""evident differences BETWEEN the morn and the""]"
nmod:poss,13,24,-11,its (163); our (158); his (151); their (135); my (69); her (28); one (22); whose (19); your (10); agent (6); kant (5); man (5); subject (4),his (286); their (96); its (93); her (69); our (43); my (41); your (15); whose (14); one (8); man (4); shakespeare (4); pound (4); author (3),"ITS (163) [""behavior, besides ITS operating as a""]; OUR (158) [""assuming that OUR semantical games""]; HIS (151) [""the country of HIS new residence has""]; THEIR (135) [""writers like THEIR most recent efforts""]; MY (69) [""that part of MY strategies which""]; HER (28) [""standards (it is HER similarity relation""]; ONE (22) [""A particle in ONE 's brain is in""]; WHOSE (19) [""chair speculators WHOSE work does not correspond""]; YOUR (10) [""and write to YOUR friends who might""]; AGENT (6) [""stating that the AGENT 's behavior resulted""]; KANT (5) [""KANT 's dictum affirming""]; MAN (5) [""and advocates MAN 's exploitation""]; SUBJECT (4) [""or weak the SUBJECT 's epistemic position""]","HIS (286) [""concerned with in HIS analysis of Lear""]; THEIR (96) [""It is THEIR "" circumstances""]; ITS (93) [""and relieving ITS in""]; HER (69) [""educated) by HER father 's friend""]; OUR (43) [""judgments as OUR watches, none""]; MY (41) [""MY purpose is to point""]; YOUR (15) [""able to see in YOUR decision a reflection""]; WHOSE (14) [""an audience, WHOSE very inmost spirits""]; ONE (8) [""emotion, for ONE 's feelings necessarily""]; MAN (4) [""the basis of MAN 's virtue within""]; SHAKESPEARE (4) [""SHAKESPEARE 's images are still""]; POUND (4) [""; or, in POUND 's more elaborate""]; AUTHOR (3) [""against an AUTHOR 's complete unconsciousness""]"
PUNCT,118,127,-9,", (404); . (289); "" (71); - (48); ( (39); ) (38); ' (33); : (20); ? (19); ; (18); — (7); [ (2); ] (2)",", (435); . (255); "" (93); - (47); ' (31); : (27); ; (26); ( (26); ) (21); ? (15); — (7); ! (4); [ (4)",", (404) [""different degrees, as long as for""]; . (289) [""themselves on""]; "" (71) [""morpheme "" wh-""]; - (48) [""beyond first - order logic, however""]; ( (39) [""applications of (G.E""]; ) (38) [""applications of (G.E""]; ' (33) [""it ' dialectic""]; : (20) [""ethnocentricism : whatever is good""]; ? (19) [""like expressions""]; ; (18) [""make a molecule ; molecules combine""]; — (7) [""audacious enterprise — given the fact""]; [ (2) [""study science [as] not the one""]; ] (2) [""science [as] not the one and""]",", (435) [""playing with it, aiding and relieving""]; . (255) [""tragic poetry""]; "" (93) [""circumstances "" that are dissimilar""]; - (47) [""within the mock - heroic framework""]; ' (31) [""Pope 's lines : ' Tis with our judgments""]; : (27) [""Pope 's lines : ' Tis with our""]; ; (26) [""disgusting kind "" ; but the characters""]; ( (26) [""de Muerte nor (strangely enough""]; ) (21) [""strangely enough) in El Verdugo""]; ? (15) [""relieving its in""]; — (7) [""the same reason — is excluded from""]; ! (4) [""sosten y esperanza""]; [ (4) [""others of what [is] grand in the""]"
punct,119,127,-9,", (404); . (289); "" (71); - (48); ( (39); ) (38); ' (33); : (20); ? (19); ; (18); — (7); [ (2); ] (2)",", (435); . (254); "" (93); - (47); ' (30); : (27); ; (26); ( (26); ) (21); ? (15); — (7); ! (4); [ (4)",", (404) [""different degrees, as long as for""]; . (289) [""themselves on""]; "" (71) [""morpheme "" wh-""]; - (48) [""beyond first - order logic, however""]; ( (39) [""applications of (G.E""]; ) (38) [""applications of (G.E""]; ' (33) [""it ' dialectic""]; : (20) [""ethnocentricism : whatever is good""]; ? (19) [""like expressions""]; ; (18) [""make a molecule ; molecules combine""]; — (7) [""audacious enterprise — given the fact""]; [ (2) [""study science [as] not the one""]; ] (2) [""science [as] not the one and""]",", (435) [""playing with it, aiding and relieving""]; . (254) [""tragic poetry""]; "" (93) [""circumstances "" that are dissimilar""]; - (47) [""within the mock - heroic framework""]; ' (30) [""Pope 's lines : ' Tis with our judgments""]; : (27) [""Pope 's lines : ' Tis with our""]; ; (26) [""disgusting kind "" ; but the characters""]; ( (26) [""de Muerte nor (strangely enough""]; ) (21) [""strangely enough) in El Verdugo""]; ? (15) [""relieving its in""]; — (7) [""the same reason — is excluded from""]; ! (4) [""sosten y esperanza""]; [ (4) [""others of what [is] grand in the""]"
conj,31,39,-8,have (5); not (4); etc (4); is (4); false (4); b (3); one (3); has (3); relations (3); what (3); on (3); less (3); others (3),one (4); make (3); is (2); made (2); have (2); man (2); cultural (2); had (2); death (2); history (2); that (2); world (2); more (2),"HAVE (5) [""they certainly HAVE no special name""]; NOT (4) [""having Being and NOT - Being as its""]; ETC (4) [""way of doing, ETC""]; IS (4) [""not, there IS a hardware problem""]; FALSE (4) [""else be true or FALSE""]; B (3) [""Positions A and B in light of Donagan""]; ONE (3) [""an epistemic ONE""]; HAS (3) [""place, and HAS a lasting sense""]; RELATIONS (3) [""properties and RELATIONS are particulars""]; WHAT (3) [""state as well, WHAT is the content""]; ON (3) [""objects, and so ON""]; LESS (3) [""as more or LESS distant, as occluding""]; OTHERS (3) [""Warrender, and OTHERS have read and written""]","ONE (4) [""natural "" ONE uncorrupted by""]; MAKE (3) [""live and to MAKE a living among""]; IS (2) [""and there IS said to have written""]; MADE (2) [""found and the MADE have been intertwined""]; HAVE (2) [""But he would HAVE to rework the previous""]; MAN (2) [""or middle MAN""]; CULTURAL (2) [""the ethnic, CULTURAL, historical milieu""]; HAD (2) [""scarcely have HAD any great share""]; DEATH (2) [""shiftless doings and DEATH of her husband""]; HISTORY (2) [""sociology, literary HISTORY, or any other""]; THAT (2) [""minister and THAT there""]; WORLD (2) [""and the fallen WORLD""]; MORE (2) [""gathered more and MORE impetus after the""]"
ADP,108,115,-7,of (344); in (163); to (94); for (60); as (42); with (42); by (40); on (37); from (28); at (24); about (17); between (14); than (12),of (341); in (167); to (89); for (48); with (46); as (43); by (41); on (34); from (32); at (26); into (12); between (12); than (12),"OF (344) [""earliest effort OF that particular""]; IN (163) [""branching quantifiers IN perfectly grammatical""]; TO (94) [""recent efforts TO entirely different""]; FOR (60) [""as long as FOR each separate writer""]; AS (42) [""them, is known AS the theory of finite""]; WITH (42) [""strategies which deals WITH the applications""]; BY (40) [""least one book BY each writer a not""]; ON (37) [""pronounce themselves ON""]; FROM (28) [""can be seen FROM the idea that in""]; AT (24) [""has reviewed AT least one book""]; ABOUT (17) [""nevertheless known ABOUT f.p.o""]; BETWEEN (14) [""omniting "" and "" BETWEEN conjuncts other""]; THAN (12) [""satisfying manner THAN Lakoff 's account""]","OF (341) [""sufferings, and OF burlesque on passion""]; IN (167) [""relieving its IN""]; TO (89) [""may lend even TO tragic poetry""]; FOR (48) [""difficult standard, FOR a comparison without""]; WITH (46) [""passion playing WITH it, aiding and""]; AS (43) [""of the poem AS a whole, and admires""]; BY (41) [""more dazzling BY their novelty""]; ON (34) [""of burlesque ON passion playing""]; FROM (32) [""greatest distance FROM each other""]; AT (26) [""although "" placed AT the greatest distance""]; INTO (12) [""flung himself INTO the political fray""]; BETWEEN (12) [""evident differences BETWEEN the morn and the""]; THAN (12) [""checking "" THAN in "" encouraging""]"
nmod,52,56,-4,theory (9); what (8); world (7); knowledge (7); view (6); it (6); them (6); experience (5); truth (5); science (5); nature (5); objects (5); life (5),life (9); world (6); history (6); language (5); play (5); what (4); literature (4); them (4); time (4); it (4); man (4); art (4); nature (3),"THEORY (9) [""support to the THEORY of complete determination""]; WHAT (8) [""Examples of WHAT might happen""]; WORLD (7) [""else in the WORLD""]; KNOWLEDGE (7) [""of scientific KNOWLEDGE is, of course""]; VIEW (6) [""theoretical point of VIEW""]; IT (6) [""the details of IT, the knowledge""]; THEM (6) [""that most of THEM go beyond first""]; EXPERIENCE (5) [""comprehensive EXPERIENCE, our thinking""]; TRUTH (5) [""covariance of TRUTH and belief, the""]; SCIENCE (5) [""de-categorization of SCIENCE and trans - specialization""]; NATURE (5) [""concerning the NATURE and function of""]; OBJECTS (5) [""relations between OBJECTS and concepts""]; LIFE (5) [""the rest of LIFE""]","LIFE (9) [""observation on human LIFE, without elevation""]; WORLD (6) [""conditions of the WORLD""]; HISTORY (6) [""whose ideas on HISTORY and poetry are""]; LANGUAGE (5) [""constructed LANGUAGE of these poets""]; PLAY (5) [""practically every PLAY""]; WHAT (4) [""than others of WHAT [is] grand in""]; LITERATURE (4) [""his theory of LITERATURE Hazlitt is principally""]; THEM (4) [""illustration for THEM""]; TIME (4) [""the nick of TIME to save him, so""]; IT (4) [""modification of IT by an increased""]; MAN (4) [""substitution of a MAN for a doomed friend""]; ART (4) [""to a work of ART""]; NATURE (3) [""the objects of NATURE, or affecting""]"
