In [1]:
import sys; sys.path.append('..')
from osp import *

In [16]:
df_meta = get_corpus_metadata()
ids_phil = df_meta[df_meta['discipline'] == 'Philosophy'].index.tolist()
ids_lit = df_meta[df_meta['discipline'] == 'Literature'].index.tolist()

In [None]:
from scipy.stats import fisher_exact

def fisher_test_pos(df_pos, target_col='_target', g1='Philosophy', g2='Literature'):
    """
    Run Fisher's exact test for each POS tag comparing two groups.
    
    For each POS, constructs a 2x2 contingency table:
                    | POS count | Other POS counts |
        Group 1     |    a      |        b         |
        Group 2     |    c      |        d         |
    
    Returns DataFrame with odds ratios and p-values.
    """
    # Split by group
    df_g1 = df_pos[df_pos[target_col] == g1].drop(columns=[target_col])
    df_g2 = df_pos[df_pos[target_col] == g2].drop(columns=[target_col])
    
    # Sum counts across all documents in each group
    g1_totals = df_g1.sum()
    g2_totals = df_g2.sum()
    
    # Total counts per group (across all POS)
    g1_total = g1_totals.sum()
    g2_total = g2_totals.sum()

    colname1 = f'1 ({g1})'
    colname2 = f'2 ({g2})'
    
    results = []
    for pos in g1_totals.index:
        # 2x2 contingency table
        a = g1_totals[pos]           # POS count in g1
        b = g1_total - a             # Other POS in g1
        c = g2_totals[pos]           # POS count in g2
        d = g2_total - c             # Other POS in g2
        
        table = [[a, b], [c, d]]
        odds_ratio, p_value = fisher_exact(table)
        
        results.append({
            'feat': pos,
            f'sum1': int(a),
            f'sum2': int(c),
            # f'pct{colname1}': a / g1_total * 100,
            # f'pct{colname2}': c / g2_total * 100,
            'odds_ratio': odds_ratio,
            'p_value': p_value,
            'sig': '*' if p_value < 0.05 else ('**' if p_value < 0.01 else ('***' if p_value < 0.001 else ''))
        })
    
    result_df = pd.DataFrame(results).set_index('feat')
    result_df['sig'] = result_df['p_value'].apply(lambda p: '***' if p < 0.001 else ('**' if p < 0.01 else ('*' if p < 0.05 else '')))
    return result_df.sort_values('odds_ratio', ascending=False)


In [41]:
import numpy as np

groups = [
    ('Philosophy', ids_phil),
    ('Literature', ids_lit),
]

def get_mdw_pos(ids1, ids2, name1="Group 1", name2="Group 2", feat_n=FEAT_N, feat_min_count=FEAT_MIN_COUNT, incl_deprel=True, incl_pos=True):
    df_pos_grp1 = get_pos_counts(ids1, incl_deprel=incl_deprel, incl_pos=incl_pos)
    df_pos_grp2 = get_pos_counts(ids2, incl_deprel=incl_deprel, incl_pos=incl_pos)

    words_grp1 = get_pos_word_counts(ids1)
    words_grp2 = get_pos_word_counts(ids2)

    egs_grp1 = get_pos_word_egs(ids1)
    egs_grp2 = get_pos_word_egs(ids2)


    df_pos = pd.concat([df_pos_grp1.assign(_target=name1), df_pos_grp2.assign(_target=name2)])
    fisher_results = fisher_test_pos(df_pos, target_col='_target', g1=name1, g2=name2)

    df_means = df_pos.groupby('_target').mean()
    feat2grp2mean = df_means.to_dict()
    df_sums = df_pos.groupby('_target').sum()

    ld = []
    feat1 = None

    colname1 = f'1 ({name1})'
    colname2 = f'2 ({name2})'

    for feat in feat2grp2mean:
        feat_d = {'feat':feat}
        grp2mean = feat2grp2mean[feat]
        
        feat_d[f'fpk1'] = grp2mean[name1]
        feat_d[f'fpk2'] = grp2mean[name2]
        feat_d[f'top1'] = get_egs(words_grp1[feat], n=feat_n, min_count=feat_min_count)
        feat_d[f'top2'] = get_egs(words_grp2[feat], n=feat_n, min_count=feat_min_count)

        feat_d[f'egs1'] = get_egs(words_grp1[feat], n=feat_n, min_count=feat_min_count, word2eg=egs_grp1[feat])
        feat_d[f'egs2'] = get_egs(words_grp2[feat], n=feat_n, min_count=feat_min_count, word2eg=egs_grp2[feat])
        # feat_d['total'] = feat_d[g1] + feat_d[g2]
        
        # feat1 = feat
        ld.append(feat_d)
    odf = pd.DataFrame(ld).dropna().set_index('feat')
    diffcol = f'fpk1-fpk2'
    odf[diffcol] = odf[f'fpk1'] - odf[f'fpk2']
    odf = fisher_results.join(odf).sort_values('odds_ratio', ascending=False)
    # odf['odds_ratio_log'] = np.log10(odf['odds_ratio'])
    # odf['odds_ratio_log_abs'] = np.abs(odf['odds_ratio_log'])
    odf['feat_desc'] = [POS2DESC.get(feat,'?') for feat in odf.index]
    odf = odf.reset_index()
    # odf = odf[['feat', 'feat_desc','sum1','sum2','fpk1','fpk2','fpk1-fpk2','odds_ratio','top1','top2','egs1','egs2','p_value', 'sig']]
    odf = odf.set_index(['feat','feat_desc'])
    end_cols = ['p_value', 'sig']
    odf = odf[[c for c in odf.columns if c not in end_cols] + end_cols]
    return odf.rename(columns={
        'sum1': f'# {colname1}',
        'sum2': f'# {colname2}',
        'fpk1': f'#/K {colname1}',
        'fpk2': f'#/K {colname2}',
    })


In [42]:
pd.options.display.max_colwidth = None
odf = get_mdw_pos(ids_phil, ids_lit, 'Philosophy', 'Literature', feat_n=10, feat_min_count=1, incl_deprel=False).round(2).query('sig == "***"')
odf

Unnamed: 0_level_0,Unnamed: 1_level_0,sum1 (Philosophy),sum2 (Literature),odds_ratio,#/K 1 (Philosophy),#/K 2 (Literature),top1,top2,egs1,egs2,fpk1-fpk2,p_value,sig
feat,feat_desc,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
NFP,Superfluous punctuation,547,71,3.91,0.33,0.08,* { ~ | - · -> > \ **,- * | { *** ;- > z ~ ^,"""G < / >(*****), we call physical"" ""Axiom P **{** ext (S) S G"" ""estimate of p (**~** I / M) is still"" ""PF(q) cF **|** EH"" ""not freeze (**-** > £ —"" ""Statistical evidence **·** Epistemic impurism"" ""for interpreting **->** as a material conditional"" ""case that y **>** xx, where xx is"" ""Iu{H) - mod? **\**, we see that"" ""Both (i ******) the stick is""","""OF MORE - AND **-** LESS : LETTER"" ""in what manner ***** Pyrrhus and f Cineas"" ""la muerte, **|** la eternajuventud"" ""impetus, hurls **{** rapii) all the"" ""Madame de L *******, of whom he has"" ""than formerly **;-** the rage of retiring"" ""alternating with **>** Leireso > or"" ""later writers. **z"" ""get? L-?n~* **~** S~"" ""STf fS, **^** Rlb^ The earth""",0.25,0.0,***
LS,List item marker,624,87,3.64,0.38,0.1,ii a i iii v k vi b iv xlvi,ii a vi v i viii iii iv xli xxxi,"""behind criterion (**ii"" ""claim above : (**a**) All the ingredients"" ""satisfaction of (**i**) is not only an"" ""motivates condition (**iii"" ""V"" ""now think of (**K** /) relative to"" ""see Section **VI**), such a search"" ""past), (**b**) it has been confirmed"" ""atypical, (**iv**) others often"" ""XLVI""","""II** fut tout puissant"" ""A**. In fact Jaggard"" ""VI"" ""V"" ""above, note **i"" ""vnI, and **vIIi**) greatly widen"" ""take effect (**iII"" ""Vandals (**IV"" ""XLI**. XLI"" ""XXXI""",0.28,0.0,***
FW,Foreign word,1553,223,3.54,0.94,0.26,i.e. e.g. etc i.e etc. e.g eds. cf pp. c.,"e.g. i.e. etc etc. pp. i.e e.g , so v.","""modern physics, **i.e.**, classical mechanics"" ""attributions, **e.g.**, mood attributions"" ""experiences, **etc"" ""inconsistent with, **i.e"" ""architecture, **etc.**, than of philosophy"" ""properties (**e.g"" ""van Koppen (**Eds.** Amsterdam : John"" ""natural language (**cf"" ""Nature and Mind, **pp.** But I shall not"" ""counterfactually to imply **C.** Fourthly, if I""","""detailed notebooks (**e.g.** Published by permission"" ""to Cleanness, **i.e.**, printing the"" ""parable, **etc"" ""another, **etc"" ""thinks Varnhagen (**pp.** But John is a shade"" ""apostrophe (**i.e"" ""the longer, **e.g"" ""el campo anima **,** tu claro verde"" ""beidenthalp bewar **so** daz ich rehte gevar"" ""Sarrazin suggests (**v.** his Kyd p. Ioo""",0.68,0.0,***
SYM,Symbol,592,120,2.51,0.36,0.14,= / + { \ /- ^ // /( x,/ = + .is,"""predicates, £ **=** { E, F,. Individual"" ""of sentences < **/** >v that are verifiable"" ""means, if K **+** is the next cardinal"" ""predicates, £ = **{** E, F,. Individual"" ""fte] |, **\** chairman\)) and"" ""difference in **/-** values of two point"" ""G t^x and x **^** t"" ""likewise acquire **//** - clauses as"" ""simply write **/(** C /) for it"" ""y) V (x **x** z) x x (y A z""","""first body [we **/** they] have any"" ""protocomes (**=** admiral or provost"" ""thus beta **+** Ν versus alpha"" ""the third.., **.is** denoted by the""",0.22,0.0,***
EX,Existential there,4933,1323,1.9,2.99,1.55,there,there,"""property, then **there** is no reason to""","""There** is still room for""",1.44,0.0,***
MD,Modal,27650,8459,1.67,16.75,9.91,can would will may must should might could shall ought,can would may will must could might should shall 'd,"""way, but it **can** be provided with"" ""specifically, he **would** observe that we"" ""The answer **will** be so context"" ""however, one **may** wonder whether"" ""holding responsible **must** answer to certain"" ""to say, and **should** have said, on"" ""While anger **might** be a good epistemic"" ""quantum physicist **could** argue that it recovers"" ""of brevity we **shall** understand the"" ""think that Nagel **ought** to drop his strong""","""the topographies **can** be understood as"" ""Holocaust victims **would** have raised objections"" ""a given piece **may** be spectacular"" ""what follows I **will** discuss how the"" ""characters **must** be plausible"" ""such spirits as **could** contemn death"" ""We **might** hesitate about"" ""The poet **should** please both; but"" ""possesses which **shall** qualify him to"" ""Gold it turn ** d** to Dross (Psyche""",6.84,0.0,***
-RRB-,Right parenthesis,7754,2617,1.51,4.7,3.06,) ] > },) ] > [.,"""with certainty **)**, defining a physical"" ""face of [vor"" ""G £ U Sf I **>** ext (A) G ¿"" ""p (~ G/D)""","""the perpetrator"" ""preclude [s **]** voyeuristic distance"" ""in Province **>** < Paris : topographies"" ""der Gegenwart""",1.63,0.0,***
VBP,"Verb, non-3rd person singular present",35260,12179,1.48,21.36,14.26,are have do am need think know believe take want,are have do am know think find seem need make,"""/ >v that **are** verifiable (or"" ""anger that we **have** in response to"" ""notions that **do** not coincide with"" ""First, if I **am** right, then investigating"" ""tradition, I **need** not go into here"" ""I **think** this is probably"" ""Sein und Zeit **know** well that Heidegger"" ""sensitive, I **believe**, as to be adequately"" ""general ideas **take** on in every new"" ""Hence I **want** to attempt to make""","""even if they **are** meant to overcome"" ""We **have** what might be termed"" ""surprisingly, **do** not end with last"" ""I **am** irresolute about"" ""But I **know** the plot"" ""by those who **think** only of his dramatic"" ""Here we **find** a complexity in"" ""and Barres **seem**, however, to"" ""about how we **need** to be tricked into"" ""direct involvement **make** way for what Silverman""",7.1,0.0,***
VB,"Verb, base form",63416,23312,1.4,38.41,27.3,be have say do see make let know take consider,be have see make say do take find let think,"""but it can **be** provided with a"" ""reason could we **have** to rely on angers"" ""Why not **say**, as both Michael"" ""but this wo nt **do**, as we also lack"" ""We can **see** this by running"" ""per se could **make** anger at the violator"" ""Let** us add now some"" ""be useful to **know** just what Marx"" ""in general, **take** longer than only"" ""Consider** the complex of""","""topographies can **be** understood as knots"" ""victims would **have** raised objections"" ""colonialism : **see** Palimpsestic Memory"" ""it tends to **make** of its symbols"" ""when men could **say** little for futurity"" ""his sons to **do** their duty to God"" ""you will either **take** the attitude of"" ""the ghetto to **find** her way around"" ""Let** us apply this to"" ""she is made to **think** of the trains that""",11.11,0.0,***
-LRB-,Left parenthesis,8234,3049,1.37,4.99,3.57,( [ < -( a( '( φ( o( { pr(,( [ <,"""are verifiable **(** or testable) according"" ""the face of **[** vor"" ""of sentences **<** / >v that are verifiable"" ""implies (i) **-(** v"" ""NxGx -> F=G) **A(** F= G - NxFx _ NxGx"" ""and I(Sj ** (** i)) = true otherwise"" ""causal set, then **φ(** p) in the spacetime"" ""¬ O (Aa) **O(** all - of - a"" ""variable a : x G **{** x} I > a (x"" ""Common Cause) > **Pr(** Obs | Coincidence""","""overcome them **(** for we tend to"" ""that preclude **[** s] voyeuristic"" ""in Province > **<** Paris : topographies""",1.42,0.0,***


In [39]:
# pd.options.display.max_colwidth = None
# # pprint(dict(odf.loc['cop']))
# odf

In [6]:
ids1 = df_meta.query('discipline == "Philosophy" & 1920<=year<1970').index.tolist()
ids2 = df_meta.query('discipline == "Philosophy" & 2025>=year>=1970').index.tolist()
odf = get_mdw_pos(ids1, ids2, 'Early Philosophy', 'Late Philosophy', feat_n=10, feat_min_count=1).round(2)
odf[odf.sig=="***"]

Unnamed: 0_level_0,feat_desc,sum1,sum2,fpk1,fpk2,fpk1-fpk2,odds_ratio,sig,top1,top2,egs1,egs2,p_value
feat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
'',?,3475,5826,8.56,4.96,3.6,1.73,***,""" ' '' '. »",""" ' « »","""by definition ** ** (Nature and Mind"" ""of existence ** ** - the phrase is"" ""agent : e.g., ** ** using a motor vehicle"" ""or things **"" ""sunflowers are yellow **»** are intelligible""","""word pure ** ** when referring"" ""ext (S) ** ** S G is a partition"" ""be - doo **«** now will turn"" ""Connectives A, V, **»**. Parentheses""",0.0
``,?,3339,5760,8.22,4.91,3.32,1.68,***,""" ' « ''",""" ' » « ¬","""and about ** ** generic traits"" ""the previous ** ** vulgar measure"" ""sentences like **«** all sunflowers"" ""which remains ** ** gently optimistic""","""understand the word ** ** pure when referring"" ""fi, U and ** ** set theoretical"" ""I am uttering **»** shoo - be - doo"" ""doo - be doo **«** now , then"" ""truth to P and **¬** P but falsity to""",0.0
VBD,"Verb, past tense",2888,5501,7.11,4.69,2.43,1.52,***,was were had did thought made saw said knew became,was were had did said thought saw made took gave,"""clearer what I **was** trying to say"" ""The Greeks **were** good at it, and"" ""Dewey decided he **had** been wrong in trying"" ""that, if we **did** not say this"" ""that I once **thought** scientific ethics"" ""In them Dewey **made** a conscious attempt"" ""conditions I **saw** only one spot"" ""puzzled by what I **said** or failed to say"" ""he not only **knew**, but also wanted"" ""others, it **became** increasingly clear""","""huge literature **was** produced on this"" ""If I **were** to attempt to do"" ""procedure that **had** as its input perceptual"" ""obtain : If one **did** not utter a token"" ""she explicitly **said"" ""showing that Marx **thought** there was no such"" ""We **saw** in the previous"" ""this same work **made** thoughtful entry"" ""the referring **took** place"" ""each of which **gave** priority to its""",0.0
conj,?,15145,32596,37.3,27.76,9.54,1.35,***,have false is what more one less not has etc,have not b etc one is what has on false,"""and one should **have"" ""and colors is **false"" ""and there **is** no difference between"" ""explanation and **what** is explained"" ""conditions one or **more** positive retroflex"" ""yet he is the **one** who is responsible"" ""claims more or **less** concrete"" ""discovered or **not"" ""degree, but **has** no specifiers which"" ""wood, metals, **etc""","""want to, or **have** some evidence to"" ""conscious or **not"" ""or just before **B**) all take the"" ""experiences, **etc"" ""but not by **one** who seeks, as"" ""but not / > **is** about o"" ""and completely **what** the difference"" ""every sentence **has** both a truth value"" ""flower, and so **on"" ""at worst, **false""",0.0
CC,Coordinating conjunction,14976,32531,36.89,27.71,9.18,1.34,***,and or but both nor either yet neither,and or but both either nor yet neither & plus,"""so formidable **and** practiced a controversialist"" ""what I said **or** failed to say in"" ""by its method **but** by its subject"" ""shall contend **both** that the form of"" ""Nor** would such debate"" ""can be said **either** to be, or to lead"" ""not true, and **yet** who pcssess a language"" ""applicability, but **neither** have value or use""","""given theory T **and** classical language"" ""verifiable (**or** testable) according"" ""formal way, **but** it can be provided"" ""sentence has **both** a truth value and"" ""evolutionary forces have **either** pushed our angry"" ""experiences, **nor** to our bodily states"" ""Yet** in this case Marthas"" ""other item, **neither** to our past experiences"" ""situation (Hubin **&** Lambeth"" ""Subsective **plus** coercion""",0.0
cc,?,14721,32114,36.26,27.35,8.9,1.33,***,and or but nor yet rather as /,and or but rather / nor yet as & plus,"""that traditional **and** conventional language"" ""new distinctions **or** the new concepts"" ""my teachers, **but** by its inherent"" ""Nor** would such debate"" ""not true, and **yet** who pcssess a language"" ""true geometry, **rather** than alternate"" ""Lockian doctrine, **as** well as the doctrine"" ""since accident **/** essence discrimination""","""x)) } **and** denote by V the"" ""Physical object, **or** individual example"" ""But** it also supports"" ""on detection **rather** than absolute"" ""FG) VxxVyi **/** VxVy (xx * yy"" ""Nor** do I deny that"" ""Yet** in this case Marthas"" ""philologically sound **as** well as more thoughtful"" ""situation (Hubin **&** Lambeth"" ""Subsective **plus** coercion""",0.0
aux:pass,?,5335,12840,13.14,10.94,2.2,1.2,***,be is are been was being were am 's get,be is are been being was were am get gets,"""whether they can **be** discovered is to"" ""of abstraction **is** called for; indeed"" ""upon whither we **are** led, and the only"" ""other scientist, **been** driven to invent"" ""version that **was** argued against"" ""terminological one **being** effected in his"" ""remainders **were** denied is obvious"" ""failed - and I **am** hardly sufficiently"" ""determine is what ** s** called influence"" ""anything; events **get** connected in the""","""however, it can **be** extended to account"" ""new perspective **is** adopted, our derivation"" ""States **are** defined as equivalence"" ""model has **been** recently worked"" ""not in fact **being** violated"" ""huge literature **was** produced on this"" ""experi ences if, **were** I confronted with"" ""bargain and I **am** motivated only"" ""intuitions that **get** studied and the"" ""the data that **gets** reported are not""",0.0
nsubj:pass,?,4438,11005,10.93,9.37,1.56,1.17,***,it which they that we he this what one i,it that they which we this he i one what,"""It** should be pointed"" ""considerations **which** have been adduced"" ""test of whether **they** can be discovered"" ""of new ideas **that** can be so generalized"" ""upon whither **we** are led, and the"" ""the fact that **he** has not been aimed"" ""This** is well illustrated"" ""What** the relation is"" ""One** might be led to"" ""failed - and **I** am hardly sufficiently""","""however, **it** can be extended"" ""disjunctive analysis **that** is more plausibly"" ""objects, and that **they** can be exhibited"" ""political culture, **which** is critically influenced"" ""In a word, **we** are acquainted"" ""This** might be regarded"" ""language, so that **he** would not be satisfied"" ""we bargain and **I** am motivated only"" ""but of the way **one** is situated in"" ""What** is meant by justice""",0.0
VBN,"Verb, past participle",10207,25347,25.14,21.59,3.55,1.17,***,been given made called known taken said used found involved,given been made based used taken seen said called justified,"""they have not **been** made sufficiently"" ""defensible, no **given** percept will be"" ""have not been **made** sufficiently clear"" ""be more aptly **called** pro demonstratives"" ""on the then **known** laws and facts"" ""for any percept **taken** singly"" ""and should have **said**, on these two"" ""wherever he had **used** experience"" ""generalization have been **found** in the past, is"" ""analysis of what is **involved** in the making of""","""choosing, for a **given** theory T and classical"" ""It has **been** shown that the"" ""I have **made** the case for fitting"" ""criterion is **based** on paradigmatic"" ""kind knowledge **used** in the sociology"" ""Taken** together, the"" ""clearer language is **seen** repeatedly by the"" ""as well have **said"" ""theoretical proposal **called** extended semantic"" ""probabilistic, are **justified** in terms of their""",0.0
",",?,22601,56553,55.67,48.17,7.5,1.16,***,", ; - ...",", ; - / ...","""VOLUME L **,** No"" ""metaphysical **;** he also resolved"" ""of existence **-** the phrase is of"" ""but preservation **...** thus the law of""","""consists in choosing **,** for a given theory"" ""come forth **;** φ ως, from φ άος"" ""criteria (i) **-** (iv"" ""The triple (< **/** >v, -<, L) is"" ""in dia lectic **...**, looked at from""",0.0


In [7]:
ids1 = df_meta.query('discipline == "Literature" & 1920<=year<1970').index.tolist()
ids2 = df_meta.query('discipline == "Literature" & 2025>=year>=1970').index.tolist()
odf = get_mdw_pos(ids1, ids2, 'Early Literature', 'Late Literature', feat_n=10, feat_min_count=1).round(2)
odf[odf.sig=="***"]

Unnamed: 0_level_0,feat_desc,sum1,sum2,fpk1,fpk2,fpk1-fpk2,odds_ratio,sig,top1,top2,egs1,egs2,p_value
feat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
csubj:pass,?,33,20,0.14,0.04,0.1,3.81,***,extension writing made come provide indicates led used superseded spelled,used done impaired establishing have threatened thing act seems promise,"""system was an **extension** of the worship"" ""Writing** and speaking should"" ""play had already **made** use of the Standards"" ""not themselves **come** in with dancing"" ""of analogy can **provide** the key to"" ""Paradise as a place **indicates** the fading in Adam"" ""themselves had **led** the way and were"" ""word is not **used** when there is a"" ""not entirely **superseded** the earlier use"" ""nearly always **spelled** in full, and that""","""cognates - are **used** extensively as"" ""done, have **done** a good, good thing"" ""individual was **impaired** in his or her"" ""Indeed **establishing** the metaphoric"" ""language, **have** a potential for"" ""society could be **threatened**, not just by rising"" ""the furthest **thing** from the minds"" ""but a simple **act** of transmission"" ""problem **seems** appropriate to"" ""is the latent **promise** of a twentieth""",0.0
LS,List item marker,40,34,0.16,0.06,0.1,2.72,***,ii a vi v i viii iii xli xxxi xi,ii vi v i a iii iv vii viii,"""second Sunday (**ii"" ""A**. In fact Jaggard"" ""VI"" ""V"" ""following : (**i**) Therefore see"" ""I. **viii**) attack on Pantalobus"" ""take effect (**iII"" ""XLI**. XLI"" ""XXXI"" ""XI""","""II**. READING THE"" ""VI"" ""V"" ""above, note **i"" ""A**. The exact relation"" ""others (v. **iii"" ""Vandals (**IV"" ""VII"" ""vnI, and **vIIi**) greatly widen""",0.0
NFP,Superfluous punctuation,36,31,0.15,0.06,0.09,2.68,***,- * ;- { | > z ~~,| - * { *** ~ ^ ') \ /.,"""Tintern Abbey : **-** If this Be but"" ""front inalte ***** rable, Contemple"" ""than formerly **;-** the rage of retiring"" ""and studies **{** Disc"" ""en castellano **|** por I A. R. y R"" ""alternating with **>** Leireso > or"" ""later writers. **z"" ""than discreet;""","""la muerte, **|** la eternajuventud"" ""OF MORE - AND **-** LESS : LETTER"" ""in what manner ***** Pyrrhus and f Cineas"" ""impetus, hurls **{** rapii) all the"" ""Madame de L *******, of whom he has"" ""get? L-?n~* **~** S~"" ""STf fS, **^** Rlb^ The earth"" ""without a dowry. **"" ""softened, in **\** Alen\con"" ""in / fiycard""",0.0
vocative,?,124,124,0.51,0.22,0.29,2.31,***,cf i sir vol op lady patkul j. god you,i sir lydia radlova anna me mutandis booth cf pauline,"""Cf**. The Duke s men"" ""I**. The principal"" ""Sir**, I am vexed"" ""Misc., **Vol"" ""Spondanus, **op"" ""Fair **lady**, do you think"" ""Now, **Patkul**, may thine injured"" ""J.** I think Milton"" ""God** he never becomes"" ""God b w i **you**, an you talk in""","""to reject (**I**) because of my"" ""Sir** you may now perceyve"" ""Lydia** I ll see you out"" ""Radlova** I ll expect you"" ""in any case, **Anna** Andreyevna"" ""Bless me, **me** too, father"" ""Mutatis **mutandis**, the Ottoman historian"" ""own account, **Booth**, the method of"" ""Cf**. The precedence"" ""Pauline** Listen, you d""",0.0
UH,Interjection,314,364,1.29,0.65,0.64,1.99,***,no oh like o well ich xli alas please nay,no yes like well oh o say um iv please,"""perhaps that **no** ( ) interpreter"" ""belief, yet, **oh"" ""secret brow, And **like** a beaten hound"" ""st not,? **O** twas a dreadful"" ""or The World **Well** Lost"" ""clear focus : **ich** darf wol guoter"" ""XLI"" ""Why, **alas**, will my young"" ""The poet should **please** both; but be sure"" ""omission of one, **nay** sometimes of two""","""annotations (**No**, Norah"" ""H.C. : **Yes**, but it means"" ""grew together, **Like** to a double cherry"" ""Well**, is it about how"" ""people share : **Oh**, if men s secret"" ""O** fieble moone"" ""and not, **say**, the natural"" ""verkiirzt oder **um** eine Geringfugigkeit"" ""IV"" ""please all, **please** none at all""",0.0
discourse,?,310,371,1.28,0.66,0.61,1.93,***,no so oh ii like o sir well ich please,no so yes oh well like o say ii um,"""perhaps that **no** ( ) interpreter"" ""So** in The Prelude"" ""belief, yet, **oh"" ""second Sunday (**ii"" ""secret brow, And **like** a beaten hound"" ""s version, **O**, thou"" ""sir, sir, **sir**, I would have"" ""or The World **Well** Lost"" ""clear focus : **ich** darf wol guoter"" ""The poet should **please** both; but be sure""","""annotations (**No**, Norah"" ""So**, you do not know"" ""H.C. : **Yes**, but it means"" ""people share : **Oh**, if men s secret"" ""Well**, is it about how"" ""which both **like**; while each apologises"" ""O** fieble moone"" ""and not, **say**, the natural"" ""II**. READING THE"" ""verkiirzt oder **um** eine Geringfugigkeit""",0.0
FW,Foreign word,92,118,0.38,0.21,0.17,1.8,***,e.g. etc. i.e. etc i.e so e.g pp. ibid. v.,"e.g. i.e. etc etc. pp. , i.e ed. e.g esp.","""E.G.** Both arguments"" ""a purity , **etc.**, a few lines earlier"" ""to Cleanness, **i.e.**, printing the"" ""education, life **etc"" ""apostrophe (**i.e"" ""beidenthalp bewar **so** daz ich rehte gevar"" ""the longer, **e.g"" ""my moan (**pp.** John Eliot writes"" ""I **Ibid.**, Preface, by"" ""Sarrazin suggests (**v.** his Kyd p. Ioo""","""detailed notebooks (**e.g.** Published by permission"" ""not arrested, **i.e.**, guilty or innocent"" ""parable, **etc"" ""Racial Laws, **etc"" ""Giamatti, **pp.** Chateaubriand s"" ""el campo anima **,** tu claro verde"" ""literary work (**i.e"" ""Euvres completes, **ed.** All subsequent"" ""hopes for (**e.g"" ""computer storage; **esp.** McGann s insistence""",0.0
CD,Cardinal number,1638,2326,6.74,4.15,2.59,1.63,***,one two three ii four five iii twenty six seven,one two three four ii five iii twenty ten hundred,"""beauty; he is **one** of the most quotable"" ""without a girl or **two** which Richard used"" ""But Rymer s **three** notes suffice for"" ""ACT **II"" ""a multiple of **four"" ""from two to **five** lines"" ""ACT **III"" ""Only **twenty** - seven Names make"" ""an octave and **six** lines"" ""Only twenty - **seven** Names make up the""","""as a place — **one** where its violent"" ""in his massive **two** - volume History"" ""two of the **three** Christian virtues"" ""published the first **four** sonnets in the"" ""in Sonnet **ii**, the flower metaphors"" ""Christ s Hospital **Five** and Thirty Years"" ""III"" ""Twenty** - seven times in"" ""to death after **ten** years of pursuit"" ""looks out over a **hundred** gardens, the same""",0.0
list,?,108,157,0.44,0.28,0.16,1.59,***,vol. op pp. art. art chaucer ed. p. vol e,ed trans pp. vol. ed. university studies pp p. college,"""Bond, **Vol.** I, p"" ""I Grandgent, **op"" ""Rev., ix, **pp"" ""CCLXXIV, **art.** CCLXXV, art"" ""Eliz. CCLXXVIII, **art"" ""English III (**Chaucer"" ""Ho -Elianw, **ed"" ""Court Masque, **p"" ""R. E. S., **VOL"" ""weil ich mein [**e""","""Bruce Fink, **ed"" ""of Judgment, **trans"" ""Remembered, **pp"" ""Bradbook, Muriel. **Vol.** Brisman, Leslie"" ""ed. Burchell, **ed"" ""i - Azam **University**, Pakistan"" ""Yale French **Studies**, no"" ""Mathieu - Job, **pp"" ""du bezahlen (**p"" ""Nichols Dickinson **College""",0.0
nummod,?,1020,1563,4.2,2.79,1.41,1.51,***,two one three four five ii six seven hundred twenty,two one three four five hundred twenty seven ten six,"""The **two** great obstacles"" ""The pagans had **one** advantage over"" ""But Rymer s **three** notes suffice for"" ""blank verse in **four** - line groups"" ""the Stage these **five** Years"" ""lines in I. **ii"" ""out of forty - **six** divisions, the"" ""Only twenty - **seven** Names make up the"" ""the sum of a **hundred** naughts is still"" ""in storms some **twenty** years after - would""","""in his massive **two** - volume History"" ""final line with **one** more narrative"" ""being some **three** - quarters Hungarian"" ""Of the first **four** sonnets, three"" ""Christ s Hospital **Five** and Thirty Years"" ""looks out over a **hundred** gardens, the same"" ""MLA for over **twenty** years"" ""and Harris s **seven** - year - old boy"" ""to death after **ten** years of pursuit"" ""Of the **six** tortuous signs""",0.0
