In [1]:
import sys; sys.path.append('..')
from osp import *
pd.options.display.max_colwidth = None
pd.options.display.max_rows = 20

In [2]:
df_meta = get_corpus_metadata()
ids_phil = df_meta.query('discipline == "Philosophy"').index.tolist()
ids_lit = df_meta.query('discipline == "Literature"').index.tolist()
len(ids_phil), len(ids_lit)

(32277, 25343)

In [3]:
import numpy as np

groups = [
    ('Philosophy', ids_phil),
    ('Literature', ids_lit),
]

def get_mdw_pos(ids1, ids2, name1="Group 1", name2="Group 2", feat_n=FEAT_N, feat_min_count=FEAT_MIN_COUNT, incl_deprel=True, incl_pos=True, feat_n_egs=FEAT_N//2, rename_cols=False):
    df_pos_grp1 = get_pos_counts(ids1, incl_deprel=incl_deprel, incl_pos=incl_pos)
    df_pos_grp2 = get_pos_counts(ids2, incl_deprel=incl_deprel, incl_pos=incl_pos)

    words_grp1 = get_pos_word_counts(ids1)
    words_grp2 = get_pos_word_counts(ids2)

    egs_grp1 = get_pos_word_egs(ids1)
    egs_grp2 = get_pos_word_egs(ids2)


    df_pos = pd.concat([df_pos_grp1.assign(_target=name1), df_pos_grp2.assign(_target=name2)])
    fisher_results = fisher_test_pos(df_pos, target_col='_target', g1=name1, g2=name2)

    df_means = df_pos.groupby('_target').mean()
    feat2grp2mean = df_means.to_dict()
    df_sums = df_pos.groupby('_target').sum()

    ld = []
    feat1 = None

    colname1 = f'1 ({name1})'
    colname2 = f'2 ({name2})'

    for feat in feat2grp2mean:
        feat_d = {'feat':feat}
        grp2mean = feat2grp2mean[feat]
        
        feat_d[f'fpk1'] = grp2mean[name1]
        feat_d[f'fpk2'] = grp2mean[name2]
        feat_d[f'top1'] = get_egs(words_grp1[feat], n=feat_n, min_count=feat_min_count)
        feat_d[f'top2'] = get_egs(words_grp2[feat], n=feat_n, min_count=feat_min_count)

        feat_d[f'egs1'] = get_egs(words_grp1[feat], n=feat_n_egs, min_count=feat_min_count, word2eg=egs_grp1[feat])
        feat_d[f'egs2'] = get_egs(words_grp2[feat], n=feat_n_egs, min_count=feat_min_count, word2eg=egs_grp2[feat])
        # feat_d['total'] = feat_d[g1] + feat_d[g2]
        
        # feat1 = feat
        ld.append(feat_d)
    odf = pd.DataFrame(ld).dropna().set_index('feat')
    odf['fpk1-fpk2'] = odf[f'fpk1'] - odf[f'fpk2']
    odf['fpk1/fpk2'] = odf[f'fpk1'] / odf[f'fpk2']
    odf = fisher_results.join(odf).sort_values('p_value', ascending=True)
    odf['odds_ratio_log'] = np.log10(odf['odds_ratio'])
    odf['odds_ratio_log_abs'] = np.abs(odf['odds_ratio_log'])
    odf['feat_desc'] = [FEAT2DESC.get(feat,'?') for feat in odf.index]
    odf = odf.reset_index()

    def desc_result(row):
        if row.odds_ratio > 1:
            return f'{row.feat_desc}s are {row.odds_ratio:.1f}x more common in {name1} than {name2}.'
        else:
            return f'{row.feat_desc}s are {1/row.odds_ratio if row.odds_ratio != 0 else 0:.1f}x more common in {name2} than {name1}.'
    
    odf = odf.sort_values('odds_ratio_log_abs', ascending=False)
    odf['mdw_rank'] = [i+1 for i in range(len(odf))]

    odf = odf.sort_values('odds_ratio', ascending=False)
    odf['mdw1_rank'] = [i+1 for i in range(len(odf))]
    odf = odf.sort_values('odds_ratio', ascending=True)
    odf['mdw2_rank'] = [i+1 for i in range(len(odf))]

    odf['result_desc'] = odf.apply(desc_result, axis=1)
    odf = odf[[
        'feat', 
        'feat_desc', 
        'result_desc',
        
        'fpk1','fpk2',
        'odds_ratio',
        # 'fpk1-fpk2',
        # 'fpk1/fpk2',
        'sum1','sum2',
        'sig','top1','top2','egs1','egs2',
        'mdw_rank',
        'mdw1_rank',
        'mdw2_rank',
        ]]
    # odf = odf.drop(columns=['fpk1','fpk2','fpk1-fpk2'])
    # odf = odf[[c for c in odf.columns if c not in end_cols] + end_cols]
    odf = odf.rename(columns={
        'sum1': f'{name1} (#)',
        'sum2': f'{name2} (#)',
        'pct1': f'{name1} (%)',
        'pct2': f'{name2} (%)',
        'fpk1': f'{name1} (#/k)',
        'fpk2': f'{name2} (#/k)',
        'odds_ratio': f'{name1} / {name2} (OR)',
        'sig': 'Significance',
        'top1': f'{name1} (top {feat_n})',
        'top2': f'{name2} (top {feat_n})',
        'egs1': f'{name1} (examples)',
        'egs2': f'{name2} (examples)',
        'fpk1-fpk2': f'{name1} - {name2} (#/k)',
    }) if rename_cols else odf
    return odf.set_index(['feat','feat_desc','result_desc']).sort_values('mdw_rank').dropna()

In [14]:


odf = get_mdw_pos(ids_phil, ids_lit, 'Philosophy', 'Literature', feat_n=25, feat_min_count=1, incl_deprel=True, feat_n_egs=3,rename_cols=False).round(2).query('sig=="***"')

In [15]:
odf.sort_values('mdw1_rank').head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fpk1,fpk2,odds_ratio,sum1,sum2,sig,top1,top2,egs1,egs2,mdw_rank,mdw1_rank,mdw2_rank
feat,feat_desc,result_desc,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
LS,List item marker,List item markers are 4.5x more common in Philosophy than Literature.,0.41,0.09,4.46,2368,267,***,ii a i iii v vi b k iv vii xxxi viii xliv xlvi xxxv xxi xxv xiv d xxvi xi xxxix bn ^ £),ii a vi v iii i viii iv vii xi xli xii xxxvi xxviii xxii xxxviii xxxvii xxxi xviii k lviii xliii lxvi,"""behind criterion (II"" ""raising here are (A) how the identity"" ""satisfaction of (I) is not only an""","""II fut tout puissant"" ""Items : (A) The issue of"" ""VI""",1,1,98
$,?,?s are 3.9x more common in Philosophy than Literature.,0.02,0.01,3.93,125,16,***,£ $ \( £- }( <£ t(,£ $ £–,"""sublanguage L of $ £ with a notion of"" ""sublanguage L of $ £ with a notion"" ""A) then if \(B / A) is broken""","""home with a £ stove and a collection"" ""assumes a base * $ en@ get, grasp"" ""five guineas (£– £ in current buying""",2,2,97
NFP,Superfluous punctuation,Superfluous punctuations are 3.7x more common in Philosophy than Literature.,0.36,0.1,3.68,2077,284,***,* { | ~ -> - · > ^ -* \ ** /? /. >. := _ -< => ̄ = *) *( ?> >(,* - | { > ~ z ^ [. ;- :: *** /. >. _ '( .t. ~. _. -- -> :) :; ; =',"""G < / >(*), we call physical"" ""Axiom P { ext (S) S G"" ""to equal P(| E), in parallel""","""X *"" ""OF MORE - AND - LESS : LETTER"" ""la muerte, | la eternajuventud""",4,3,96
FW,Foreign word,Foreign words are 3.4x more common in Philosophy than Literature.,0.93,0.27,3.4,5396,798,***,"i.e. e.g. etc i.e etc. e.g cf eds. pp. c. viz. cf. ed. , esp. i. nt soc.","etc e.g. etc. i.e. pp. i.e , ed. e.g c. so cf. v. cf esp. eds. i. ca. ibid. eng. nt viz. &c. str. mar","""modern physics, I.E., classical mechanics"" ""E.G. I expect that I"" ""the present, ETC""","""CINEMATIC MUSICAL ETC"" ""the sequenced (E.G., comic strip"" ""the merchants, ETC., it is impossible""",5,4,95
SYM,Symbol,Symbols are 2.7x more common in Philosophy than Literature.,0.37,0.14,2.71,2174,403,***,= / + { ̄ \ -> \( => // ^ /- ~ /( ^( |= x +( *) := \- /a -+ /\ +/,/ = + – .is _. ^ .e *s o/,"""predicates, £ = { E, F,. Individual"" ""of sentences < / >v that are verifiable"" ""p (E) + p(F), lj""","""for my part, / See reasons and"" ""protocomes (= admiral or provost"" ""human], [+ concrete],""",9,6,93
EX,Existential there,Existential theres are 1.9x more common in Philosophy than Literature.,2.94,1.52,1.94,17156,4441,***,there,there,"""foreseeable bound, THERE is no reason why""","""observe that THERE is not one but""",14,7,92
expl,Expletive,Expletives are 1.9x more common in Philosophy than Literature.,5.75,3.05,1.89,33481,8928,***,there it neither,it there neither,"""in time t, THERE will be some set"" ""IT is not satisfactory"" ""But NEITHER vindicates realism""","""IT is precisely for"" ""THERE are, of course"" ""NEITHER is it sufficient""",15,8,91
nsubj:outer,?,?s are 1.8x more common in Philosophy than Literature.,2.32,1.27,1.83,13504,3710,***,what it that point this problem reason question way idea aim one all answer claim purpose fact thing view difference goal conclusion which task argument,it what that point this one which purpose way reason question problem thing all fact aim effect result function answer we whatever task he difference,"""WHAT this amounts to"" ""IT is for the patient"" ""THAT toward the end""","""IT is, too, so closely"" ""observed : Now, WHAT specifically defines"" ""sublimation affected; THAT is, whether sublimation""",17,9,90
ccomp,Clausal complement,Clausal complements are 1.8x more common in Philosophy than Literature.,9.75,5.37,1.82,56780,15720,***,is have has are true be know what do possible make one ought exist false exists case made makes had wrong justified necessary take right,is have had has made are was come what one read become found used make be take do know seems see find came written seen,"""asserts that there IS a particular set"" ""indispensable we HAVE no choice but to"" ""our procedure HAS to cope with an""","""observe that there IS not one but two"" ""Kleist would HAVE her appear as the"" ""iconography, HAD a language often""",18,10,89
csubj,Clausal subject,Clausal subjects are 1.8x more common in Philosophy than Literature.,2.47,1.4,1.76,14405,4107,***,say see have think having make suppose note do doing believe is take know find knowing give consider show assume imagine hold seeing understand use,say see find have make note read think had assume writing imagine understand do consider know recognize suppose give made take suggest distinguish explain believe,"""thing simply to SAY that philosophers"" ""important to SEE that in the fist"" ""expedient to HAVE any relations which""","""appropriate to SAY that they retreat"" ""justifiable to SEE them as the present"" ""remarkable that we FIND embedded in this""",19,11,88


In [16]:
odf.sort_values('mdw2_rank').head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fpk1,fpk2,odds_ratio,sum1,sum2,sig,top1,top2,egs1,egs2,mdw_rank,mdw1_rank,mdw2_rank
feat,feat_desc,result_desc,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
NNPS,"Proper noun, plural","Proper noun, plurals are 3.9x more common in Literature than Philosophy.",0.58,2.24,0.26,3387,6553,***,states netherlands greeks principles ages americans nations essays studies foundations sciences morals humeans stoics jews lectures meditations laws kantians descartes problems christians books dogmas logos,states jews ages americans works studies letters indians tales christians germans poems greeks essays notes men women arts europeans scots critics poets lives years romans,"""STATES are defined as"" ""Printed in the NETHERLANDS"" ""The GREEKS were good at it""","""the United STATES of the latter nineteenth"" ""possibilities that the JEWS and their conceptually"" ""of the Middle AGES) and transforms""",3,98,1
list,?,?s are 2.8x more common in Literature than Philosophy.,0.17,0.48,0.36,1009,1402,***,vol. ed. pp. press. eds. trans op cambridge ma university ed no. nj philosophy q. chap abstract oxford journal logic p part paris eds vol,die pp. trans berlin ed ed. der op vol. das p. u. bd. goethe university univ. deutsche pp univ vol junker studien hans. cir h.,"""of Descartes, VOL. Trans"" ""N. Beckman (ED. Amherst, MA"" ""Analysis, PP""","""allerwinzigste Punkt (DIE Ausgewanderten"" ""Harris, Cult, PP. Lang"" ""Movement, TRANS""",8,96,3
VBD,"Verb, past tense","Verb, past tenses are 2.7x more common in Literature than Philosophy.",5.84,15.46,0.38,34021,45311,***,was were had did thought said saw made came took became found knew used gave believed put meant argued seemed called began led held wrote,was had were did made came wrote became said took saw found thought knew began went seemed gave used felt read called wanted brought sought,"""arbitrary subset WAS such that the axiom"" ""subsets that WERE not previously"" ""broader than HAD previously been""","""the room there WAS little furniture"" ""These women then HAD to redefine themselves"" ""These gains WERE crucial in part""",10,95,4
NNP,"Proper noun, singular","Proper noun, singulars are 2.4x more common in Literature than Philosophy.",18.43,44.55,0.41,107358,130521,***,god kant s a aristotle c t f descartes hume plato hegel sect heidegger husserl q john frege socrates university mr. professor quine russell e,god shakespeare english milton mr. new john england wordsworth king james coleridge sir chaucer christ la hamlet london spenser mrs. shelley lady johnson henry lord,"""beings such as GOD, who is not bound"" ""should have set KANT s mind thinking"" ""Axiom P { ext (S) S G is a partition""","""their duty to GOD, to their mother"" ""Montaigne, SHAKESPEARE, Napoleon, and"" ""called Spoken ENGLISH ), Drama, and""",11,94,5
vocative,?,?s are 2.3x more common in Literature than Philosophy.,0.15,0.34,0.44,863,992,***,i cf t q a ergo facie b vol al contra g l chap x hypothesi e fig de f kant man peter w o,i cf sir lord op lady me god friend man madam t mr. ed richard thou cf. james anna faith rohrscheid we pp you vol,"""enemy... [I] f both divisions"" ""CF. In conjunction"" ""group case : [T] he proposed procedure""","""I. Of Generic Worlds"" ""CF. At this point"" ""you, my dear SIR""",12,93,6


In [7]:
ids1 = df_meta.query('discipline == "Philosophy" & 1920<=year<1970').index.tolist()
ids2 = df_meta.query('discipline == "Philosophy" & 2025>=year>=1970').index.tolist()
odf = get_mdw_pos(ids1, ids2, 'Early Philosophy', 'Late Philosophy', feat_n=25, feat_min_count=1, incl_deprel=True, feat_n_egs=3,rename_cols=False).round(2).sort_values('mdw1_rank')
odf = odf[odf.sig=="***"]
odf.sort_values('mdw1_rank').head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fpk1,fpk2,odds_ratio,sum1,sum2,sig,top1,top2,egs1,egs2,mdw_rank,mdw1_rank,mdw2_rank
feat,feat_desc,result_desc,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
csubj:pass,?,?s are 1.6x more common in Early Philosophy than Late Philosophy.,0.19,0.11,1.65,257,483,***,have has made given determined intended prove determine regarded become having needed translatable developed excluded left refer essential conditioned taken understood said true reduced involves,have give having has doing used stand exist explained entails achieve lies lead provides make become meaning differs require meant directing represented obeying intended considered,"""vectorial variable HAVE the same range"" ""in Chapter II HAS to be combined"" ""religion was MADE by Schlenimacher""","""should already HAVE all future possibilities"" ""inference should GIVE some new information"" ""his account, HAVING sense impressions""",12,1,98
'',?,?s are 1.6x more common in Early Philosophy than Late Philosophy.,8.3,5.05,1.65,11277,21251,***,""" ' » '. ''",""" ' » « ''","""of subject as formulated by"" ""of existence - the phrase is"" ""sunflowers are yellow » are intelligible""","""word pure when referring"" ""extension of set at t is sufficiently"" ""Connectives A, V, ». Parentheses""",13,2,97
``,?,?s are 1.6x more common in Early Philosophy than Late Philosophy.,8.01,5.0,1.6,10877,21027,***,""" ' « ''",""" ' « » ``","""the category of subject as formulated"" ""the previous vulgar measure"" ""sentences like « all sunflowers""","""understand the word pure when referring"" ""extension of set at t is sufficiently"" ""doo - be doo « now , then""",14,3,96
VBD,"Verb, past tense","Verb, past tenses are 1.6x more common in Early Philosophy than Late Philosophy.",7.92,5.04,1.57,10759,21199,***,was were had did thought said made saw became found came seemed knew gave called wrote began took meant led went used held believed felt,was were had did said thought saw made took came argued believed used put knew gave meant found became called led wanted noted began caused,"""the sciences WAS pervasive of his"" ""reality as if they WERE pre-Kantian"" ""Dewey decided he HAD been wrong in trying""","""arbitrary subset WAS such that the axiom"" ""subsets that WERE not previously"" ""broader than HAD previously been""",15,4,95
conj,Conjunct,Conjuncts are 1.3x more common in Early Philosophy than Late Philosophy.,36.74,27.85,1.32,49890,117110,***,have is one etc what less has not more false etc. other object time that philosophy action nature sense make good relations true knowledge others,have etc b not one is what on has false others other properties theory relations al more true knowledge less time do etc. make belief,"""and one should HAVE"" ""and that there IS a universal moral"" ""and not any ONE of them apart from""","""w, it will HAVE to grow between"" ""phenomenon and data, ETC"" ""of A and of B comes into view""",22,6,93
CC,Coordinating conjunction,Coordinating conjunctions are 1.3x more common in Early Philosophy than Late Philosophy.,36.39,27.77,1.32,49419,116770,***,and or but both nor either yet neither plus,and or but both either nor yet neither & plus n,"""experience, AND these categories"" ""yet developed OR done justice to"" ""should be, BUT it is in a more""","""process of adding AND revising axioms"" ""to be unending OR, at least, to"" ""philosophic problems, BUT these are not peripheral""",24,7,92
cc,Coordinating conjunction,Coordinating conjunctions are 1.3x more common in Early Philosophy than Late Philosophy.,35.67,27.44,1.31,48441,115382,***,and or but nor yet rather as / plus,and or but rather / nor yet as & + plus n,"""an unvarying AND formal pattern"" ""vindication, OR who lay claim to"" ""BUT in his paper on""","""adopt lemma a AND remain a platonist"" ""arbitrary subset OR combination in"" ""BUT if our understanding""",25,8,91
RBS,"Adverb, superlative","Adverb, superlatives are 1.3x more common in Early Philosophy than Late Philosophy.",0.72,0.55,1.3,980,2326,***,most best least longest lest,most best least foremost,"""The MOST constructive part"" ""believed, can be BEST realized in a free"" ""we are at LEAST assured of this""","""claim that the MOST substantial advances"" ""of course, BEST captured by Shaw"" ""that it is at LEAST conceivable for""",26,9,90
WP$,Possessive wh-pronoun,Possessive wh-pronouns are 1.2x more common in Early Philosophy than Late Philosophy.,0.33,0.27,1.24,449,1123,***,whose,whose,"""so to speak, WHOSE extrinsic origin""","""the property) WHOSE elements are interpreted""",32,11,88
nmod:unmarked,?,?s are 1.2x more common in Early Philosophy than Late Philosophy.,0.63,0.52,1.21,860,2206,***,itself himself themselves pp. p no. herself another years today doubt times myself matter pp ourselves a.d ed. b week o'clock ix march b.c hemself,itself themselves himself p herself pp. c x today way g time myself f another b doubt a y tomorrow years little e k u,"""with the thing ITSELF"" ""analyses of Husserl HIMSELF (Logical Investigations"" ""conflicting dictates THEMSELVES seems requisite""","""which the element ITSELF can enter"" ""just the objects THEMSELVES, rather than anything"" ""might be, Plato HIMSELF does not forsake""",37,12,87


In [8]:
odf.sort_values('mdw2_rank').head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fpk1,fpk2,odds_ratio,sum1,sum2,sig,top1,top2,egs1,egs2,mdw_rank,mdw1_rank,mdw2_rank
feat,feat_desc,result_desc,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
$,?,?s are 9.8x more common in Late Philosophy than Early Philosophy.,0.0,0.03,0.1,4,121,***,$,£ $ \( £- }( <£ t(,"""suppose that $ is a num - valued""","""sublanguage L of $ £ with a notion of"" ""sublanguage L of $ £ with a notion"" ""A) then if \(B / A) is broken""",4,95,4
SYM,Symbol,Symbols are 8.2x more common in Late Philosophy than Early Philosophy.,0.06,0.5,0.12,82,2087,***,= / + -a ~ // +( => ..* /-,= / + { ̄ \ -> \( => // ^ /- /( ~ ^( |= x *) := \- /a -+ /\ +( +/,"""and let K = A stranger wins"" ""resources and / or values distributed"" ""of C is : x + yw""","""predicates, £ = { E, F,. Individual"" ""of sentences < / >v that are verifiable"" ""p (E) + p(F), lj""",5,94,5
NFP,Superfluous punctuation,Superfluous punctuations are 2.9x more common in Late Philosophy than Early Philosophy.,0.15,0.43,0.34,200,1800,***,* - *) ~ -* ?> =' *( '( > *. ^ ;o -( v ;) =y '] { :d --* vs. t /? '..,* { | ~ -> · > ^ -* \ ** /? /. - >. := _ -< => ̄ = }. *( >( '(,"""a ground for * Mr. Schiller believes"" ""In the first of"" ""ourselves merely with *) Kleene, op""","""G < / >(*), we call physical"" ""Axiom P { ext (S) S G"" ""to equal P(| E), in parallel""",7,92,7
-RRB-,Right parenthesis,Right parenthesiss are 2.3x more common in Late Philosophy than Early Philosophy.,2.52,5.74,0.44,3425,24126,***,) ],) ] >,"""venture to call it) : for an inquiry"" ""rectilinear] triangle, I who""","""given!, V(t) must be an Rx for"" ""the human mind] answers to such"" ""G £ U Sf I > ext (A) G ¿""",8,91,8
-LRB-,Left parenthesis,Left parenthesiss are 2.2x more common in Late Philosophy than Early Philosophy.,2.68,6.01,0.45,3641,25277,***,( [ < -( a(,( [ < -( { a( t(,"""definition (Nature and Mind"" ""nature of a [rectilinear] triangle"" ""formal scale < f) for a by num""","""powerset axiom : (Vu) (x) (Vy"" ""that something [independent of"" ""of sentences < / >v that are verifiable""",9,90,9
compound,Compound word modifier,Compound word modifiers are 2.1x more common in Late Philosophy than Early Philosophy.,9.02,19.18,0.47,12246,80644,***,self sense material value world subject time living truth class space group con animal life law starting color century language state term art contingent prima,self truth order belief decision time level state set world sense language knowledge probability theory quantum con material university object type model subject color identity,"""experience of SELF - communion to"" ""reception of SENSE - data, and it"" ""intelligible the whole MATERIAL cosmos""","""prudence and SELF - interest (the"" ""every aGl, TRUTH assignment vff"" ""about first - ORDER standards""",10,89,10
list,?,?s are 1.7x more common in Late Philosophy than Early Philosophy.,0.12,0.2,0.59,157,824,***,pp. op vol. journal chap paris no. pp vol university de notions philosophy ed series review trans. bk. logic iv. conduct mass ed. criticism brit.,vol. ed. press. eds. trans cambridge ma ed university pp. op nj q. abstract oxford p philosophy part department no. logic eds d t f,"""ch. Cp. L.U., PP. Cp. L.U., pp"" ""Hallowell, OP"" ""Recueils, VOL. SOVEREIGNTY""","""of Descartes, VOL. Trans"" ""N. Beckman (ED. Amherst, MA"" ""Cascadilla PRESS. Matushansky, O""",11,88,11
dep,Unclassified dependent,Unclassified dependents are 1.6x more common in Late Philosophy than Early Philosophy.,0.27,0.43,0.64,372,1803,***,ii iii c i pp pp. v b p. vii h.d iv op we x cf. d xi e vi ibid. g sec i. xii,c ii i x a v iii b p ibid e op f pp. h cf + iv k j n t y g φ,"""in Meditation II to have discovered"" ""Logic, Part III, on The Nature"" ""employees; (C) the percentage""","""d) iff a < C or (a = c and"" ""end of section II, however, I do"" ""choosing criteria (I) - (iv""",16,87,12
advcl:relcl,?,?s are 1.6x more common in Late Philosophy than Early Philosophy.,0.56,0.87,0.64,759,3667,***,have used has made related make is say looks know seems do think what true possible does are came seem work makes feels seen arise,have think is come has works know possible are used makes true take came work ought holds comes say understood use case thinks means do,"""is that they HAVE meaning"" ""any kind has USED elements which"" ""defined that HAS the following characters""","""decision to act can HAVE side - effects"" ""about how I THINK we should conceive"" ""is true iff C IS""",17,86,13
discourse,Discourse element,Discourse elements are 1.5x more common in Late Philosophy than Early Philosophy.,0.79,1.23,0.65,1076,5157,***,a no b ii say so i like well yes iii c please v right viz d vol e viz. pp. oh vi alas p,so a ii i say b no iii like well yes p v viz o k c iv h y n vi w e oh,"""modification is : (A) it enables us"" ""VOLUME L, NO"" ""anything, and (B) intuitively""","""SO, we may ask"" ""raising here are (A) how the identity"" ""In (II), we also treat""",18,85,14


In [9]:
ids1 = df_meta.query('discipline == "Literature" & 1920<=year<1970').index.tolist()
ids2 = df_meta.query('discipline == "Literature" & 2025>=year>=1970').index.tolist()
odf = get_mdw_pos(ids1, ids2, 'Early Literature', 'Late Literature', feat_n=25, feat_min_count=1, incl_deprel=True, feat_n_egs=3,rename_cols=False).round(2)
# odf[odf.sig=="***"]

In [10]:
odf.sort_values('mdw1_rank').head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fpk1,fpk2,odds_ratio,sum1,sum2,sig,top1,top2,egs1,egs2,mdw_rank,mdw1_rank,mdw2_rank
feat,feat_desc,result_desc,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
LS,List item marker,List item markers are 2.6x more common in Early Literature than Late Literature.,0.15,0.06,2.65,125,111,***,ii a vi i iii v viii xli xi vii xxxvi xxviii xxii xxxviii xxxvii xxxi xviii lviii xliii lxvi,ii a vi v iii i iv vii viii xi k,"""Cressida, III, II"" ""Items : (A) The issue of"" ""VI""","""II"" ""consciousness, (A) the other is"" ""VI""",5,1,98
csubj:pass,?,?s are 2.3x more common in Early Literature than Late Literature.,0.12,0.05,2.3,101,103,***,made preceded extension writing come interjected found allusion type contain prepared assigned provide indebted revivify indicates soldier listened comes carved led used superseded spelled derived,have used drawn considered influenced one ahistorical natural refraining harvesting opening challenged ornaments had visit done acting determined pricks employed taking includes emerged printed gathers,"""play had already MADE use of the Standards"" ""poems are PRECEDED by two letters"" ""system was an EXTENSION of the worship""","""does not always HAVE a singular verb"" ""it has been USED in English verse"" ""and consistently DRAWN, and their hierarchical""",6,2,97
vocative,?,?s are 2.2x more common in Early Literature than Late Literature.,0.52,0.24,2.17,435,470,***,cf i sir op lady lord god madam rohrscheid you james vol lu j. dryden me cf. ed catherine rev. al patkul blake arnold thou,i sir lord cf me t friend mr. anna lydia god man richard daughter radlova thou ed scott aufidius trans susan hector joanna mutandis booth,"""CF. He is so unquestioning"" ""See also I"" ""you, my dear SIR""","""I. Of Generic Worlds"" ""to him, SIR, I ve lived three"" ""might say, LORD""",7,3,96
list,?,?s are 1.9x more common in Early Literature than Late Literature.,0.64,0.33,1.94,535,648,***,die berlin der op das u. pp. bd. goethe vol. deutsche junker studien p. hans. ed. h. e. diinnhaupt goethes univ. geschichte g. r. vol,trans ed ed. pp. university vol. p. pp univ univ. print reply cambridge rev. print. studies rights college york oxford vol princeton die blank history,"""TODT, H., DIE deutsche Begegnung"" ""Engelke, Heinr. BERLIN"" ""SCHMIDT, K., DER Wandel des Naturgefuhls""","""Movement, TRANS"" ""Bruce Fink, ED"" ""ed. Burchell, ED""",10,4,95
UH,Interjection,Interjections are 1.9x more common in Early Literature than Late Literature.,1.25,0.67,1.87,1036,1299,***,no like oh o well yes nay ah um please alas ha iii ay viz ich ii iv amen xli ch vol ib op cf,no yes like well oh say o alas please nay vol ah iv um yeah ex ich ii yea ing fol er aer tis hee,"""NO, I ask how things"" ""seem dream - LIKE"" ""belief, yet, OH""","""See Cohen, NO"" ""H.C. : YES, but it means"" ""sofa or the LIKE, as the event""",11,5,94
NFP,Superfluous punctuation,Superfluous punctuations are 1.8x more common in Early Literature than Late Literature.,0.14,0.08,1.79,115,151,***,- * | > ~ ;- z { ~. :) :: ; *s </ ^ *- ~~,* | - { [. > ^ *** /. >. ~ _ :: '( .t. _. -- -> :; x~?? :i.:: :::: ;. ?- z,"""A bird cursin l"" ""descriptions of * American Notes"" ""so many years | As Day tells houres""","""X *"" ""la muerte, | la eternajuventud"" ""OF MORE - AND - LESS : LETTER""",12,6,93
discourse,Discourse element,Discourse elements are 1.8x more common in Early Literature than Late Literature.,1.2,0.67,1.79,995,1308,***,no like so oh ii o well yes a nay ah um please iii sir alas ha viz ay ta ich i ch iv b,no so yes well like oh say o ii a please alas nay vol um i right p. aer ah pp. iii iv yeah n,"""NO, I ask how things"" ""No, LIKE a bank for love"" ""SO potent is her beauty""","""Pictorial Imagery, NO"" ""SO the political can"" ""H.C. : YES, but it means""",13,7,92
FW,Foreign word,Foreign words are 1.7x more common in Early Literature than Late Literature.,0.37,0.22,1.68,303,423,***,"etc e.g. etc. i.e. pp. i.e e.g c. so cf. v. ed. cf , i. eng. ibid. str. mar p. then bd. g. ca. soc.","etc e.g. i.e. etc. pp. i.e , ed. e.g esp. eds. c. ca. cf nt i. so &c. ibid. v. asap henceforth viz. bk. wer","""this state, ETC"" ""under ESEA (E.G., the tying of"" ""the merchants, ETC., it is impossible""","""CINEMATIC MUSICAL ETC"" ""the sequenced (E.G., comic strip"" ""the world (I.E., with the vita""",14,8,91
orphan,?,?s are 1.7x more common in Early Literature than Late Literature.,0.06,0.03,1.66,48,68,**,time end general hu past wynnere night noon era drama indirectly popery others mar also god seven respectful city capacity acting corpus course existence flea,particular all then instance england ourselves generations siren onward always institution universal modernist sequence margins feedback fiat doing other objects especially times things persuadable sassoon,"""for the second TIME signs in blood"" ""everyone as an END"" ""fishermen in GENERAL""","""violence and in PARTICULAR on assassination"" ""and above ALL out of reach of"" ""comments since THEN""",15,9,90
VBD,"Verb, past tense","Verb, past tenses are 1.5x more common in Early Literature than Late Literature.",19.59,13.44,1.46,16259,26212,***,was had were did made wrote came found became said thought took knew saw gave went used seemed began felt read 'd called appeared brought,was had were did made came wrote became said took saw began thought found knew seemed went felt wanted used called read gave sought brought,"""It WAS not my intention"" ""After I HAD distinguished these"" ""These types WERE as common as the""","""the room there WAS little furniture"" ""These women then HAD to redefine themselves"" ""These gains WERE crucial in part""",17,10,89


In [11]:
odf.sort_values('mdw2_rank').head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fpk1,fpk2,odds_ratio,sum1,sum2,sig,top1,top2,egs1,egs2,mdw_rank,mdw1_rank,mdw2_rank
feat,feat_desc,result_desc,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
AFX,?,?s are 7.2x more common in Late Literature than Early Literature.,0.0,0.01,0.14,1,17,*,wel,mid non reli,"""table, of those WEL known knights""","""promulgated in the MID -fifteenth century"" ""defences of the NON -foot - based alternating"" ""nor age nor RELI gion mattered when""",3,96,3
ADD,?,?s are 5.1x more common in Late Literature than Early Literature.,0.0,0.01,0.2,1,12,,a.'.,"http://muse.jhu.edu/), .maloryproject.com www.jstor.org. www.atanarjuat.com/production_diary/apak_interview.html. g.b.h. http://gallica.bnf.fr. http://gallica. http://www.louvre.fr/llv/oeuvres/detail_notice. donaldm. scytheless %t_streeter@uvmvax.uvm.edu http://www.randomhouse.com/catalog/display.pperl—isbn= books.google.com","""A.""","""Project Muse (HTTP://MUSE.JHU.EDU/), demonstrate"" ""http://www.MALORYPROJECT.COM"" ""available at WWW.JSTOR.ORG""",4,95,4
GW,?,?s are 2.1x more common in Late Literature than Early Literature.,0.0,0.0,0.47,1,5,,geschichtsdrama herbert,michael jenny futuri raymond,"""PETERSEN, J., GESCHICHTSDRAMA u. Nationaler Mythos"" ""EASTER SERMONS By HERBERT H. UMBACH""","""MICHAEL PERRAUDIN"" ""JENNY MEZCIEMS"" ""FUTURI SPES VIRTUTEM ALIT""",8,94,5
csubj:outer,?,?s are 2.0x more common in Late Literature than Early Literature.,0.02,0.05,0.5,19,90,**,raising die accuse answer exist accept revert call gloss describe seem dramatize divide fits emphasize define deny point clear,write make live see speak talk take seek making matters read relive do try attempt troubles memorize modernize conquer frustrating direct moving deserves identify know,"""protests that RAISING him by love and"" ""Flavian should DIE among the rare"" ""To ACCUSE Moore of writing""","""and that to WRITE is to bare one"" ""make art and to MAKE art is to take"" ""To LIVE without a metaphysical""",9,93,6
-RRB-,Right parenthesis,Right parenthesiss are 1.5x more common in Late Literature than Early Literature.,2.36,3.59,0.66,1961,7002,***,) ] >,) ] > [.,"""regularly do), Mrs. Ward printed"" ""with [them], they were upon"" ""keep [him] > it close until""","""in the story) from outer or discourse"" ""the village] takes pride"" ""Telemachus, > the first chapter""",16,92,7
-LRB-,Left parenthesis,Left parenthesiss are 1.4x more common in Late Literature than Early Literature.,2.95,4.06,0.73,2447,7912,***,( [ < { ...,( [ <,"""press numbers (as Dodsley s publications"" ""he lived with [them], they were"" ""tsjok, tsjuk < Germi""","""of the content (time as represented"" ""in which it [the village] takes"" ""identical with the < Atlantic mountains""",20,91,8
advcl:relcl,?,?s are 1.3x more common in Late Literature than Early Literature.,0.41,0.55,0.74,340,1074,***,came become had made said has felt have found feels influenced makes achieved becomes is can love set leads used come are introduced holds led,came makes made say work read became is becomes go transformed have seems had come appear become live works saw see end know begins took,"""whenever Sterne CAME to York, & when"" ""Anselmo have BECOME goatherd and shepherd"" ""me who never HAD a Child in Shakespeare""","""that s how it CAME out"" ""paradoxically MAKES the mind intelligible"" ""wise, who MADE""",23,90,9
compound,Compound word modifier,Compound word modifiers are 1.3x more common in Late Literature than Early Literature.,11.93,15.8,0.75,9898,30812,***,self century love prose half language stage london school living twenty a subject folk paradise manuscript country world line college title renaissance character verse light,self century world class material art family language life living time reading post subject e love state gender war renaissance film university animal half writing,"""are not always SELF - consistent individuals"" ""an eighteenth CENTURY hand on the inner"" ""The LOVE - sick King is""","""did not present SELF - portraits on"" ""nineteenth - CENTURY Europe"" ""of the second WORLD War""",24,89,10
WP$,Possessive wh-pronoun,Possessive wh-pronouns are 1.3x more common in Late Literature than Early Literature.,0.39,0.5,0.78,325,984,***,whose,whose whos,"""poorer in talent WHOSE tone is purer and""","""any language, WHOSE life experience"" ""that a boyfriend WHOS disposed to have""",28,88,11
cc:preconj,?,?s are 1.2x more common in Late Literature than Early Literature.,0.95,1.17,0.81,785,2277,***,both either neither whether that,both either neither whether that,"""In this volume BOTH the Britannia and"" ""peculiarities EITHER (hjoei for hjoed"" ""But if NEITHER the show nor the""","""suggested by BOTH poem and novel"" ""I can begin EITHER with the toes or"" ""fulfillment, then, NEITHER in reducing the""",32,87,12


In [12]:
odf.mean(numeric_only=True)

fpk1             20.853125
fpk2             20.853437
odds_ratio        1.089792
sum1          17307.906250
sum2          40664.666667
mdw_rank         50.500000
mdw1_rank        48.500000
mdw2_rank        50.500000
dtype: float64