In [154]:
import pandas as pd
import re
import regex

# Reading cells 

We have manually made tables in `etc/`, we just annotate them with POS and concatenate to make a single table:

In [155]:
LatInfLexi_verbs_cells = pd.read_csv("etc/LatInfLexi-verbs_cells.csv")
LatInfLexi_verbs_cells["POS"] = "verb"
LatInfLexi_nouns_cells = pd.read_csv("etc/LatInfLexi-nouns_cells.csv")
LatInfLexi_nouns_cells["POS"] = "noun"
LatInfLexi_cells = pd.concat([LatInfLexi_verbs_cells, LatInfLexi_nouns_cells])

# Reading forms

We get the previous version's forms

In [156]:
%%bash 
git checkout v1.1 -- LatInfLexi-nouns.csv
git checkout v1.1 -- LatInfLexi-verbs.csv

We read these tables

In [157]:
LatInfLexi_verbs = pd.read_csv("LatInfLexi-verbs.csv")
LatInfLexi_nouns = pd.read_csv("LatInfLexi-nouns.csv")

# Making some corrections

In [158]:
changes = {
# vowel length in the present system of lexeme "praescisco" (and consequently IPA transcriptions) 
633730: { "form" : "praescīscō" , "form_IPA" : "prajskiːskoː" } ,
633731: { "form" : "praescīscis" , "form_IPA" : "prajskiːskis" } ,
633732: { "form" : "praescīscit" , "form_IPA" : "prajskiːskit" } ,
633733: { "form" : "praescīscimus" , "form_IPA" : "prajskiːskimus" } ,
633734: { "form" : "praescīscitis" , "form_IPA" : "prajskiːskitis" } ,
633735: { "form" : "praescīscunt" , "form_IPA" : "prajskiːskunt" } ,
633736: { "form" : "praescīscēbam" , "form_IPA" : "prajskiːskeːbam" } ,
633737: { "form" : "praescīscēbās" , "form_IPA" : "prajskiːskeːbaːs" } ,
633738: { "form" : "praescīscēbat" , "form_IPA" : "prajskiːskeːbat" } ,
633739: { "form" : "praescīscēbāmus" , "form_IPA" : "prajskiːskeːbaːmus" } ,
633740: { "form" : "praescīscēbātis" , "form_IPA" : "prajskiːskeːbaːtis" } ,
633741: { "form" : "praescīscēbant" , "form_IPA" : "prajskiːskeːbant" } ,
633742: { "form" : "praescīscam" , "form_IPA" : "prajskiːskam" } ,
633743: { "form" : "praescīscēs" , "form_IPA" : "prajskiːskeːs" } ,
633744: { "form" : "praescīscet" , "form_IPA" : "prajskiːsket" } ,
633745: { "form" : "praescīscēmus" , "form_IPA" : "prajskiːskeːmus" } ,
633746: { "form" : "praescīscētis" , "form_IPA" : "prajskiːskeːtis" } ,
633747: { "form" : "praescīscent" , "form_IPA" : "prajskiːskent" } ,
633748: { "form" : "praescīscam" , "form_IPA" : "prajskiːskam" } ,
633749: { "form" : "praescīscās" , "form_IPA" : "prajskiːskaːs" } ,
633750: { "form" : "praescīscat" , "form_IPA" : "prajskiːskat" } ,
633751: { "form" : "praescīscāmus" , "form_IPA" : "prajskiːskaːmus" } ,
633752: { "form" : "praescīscātis" , "form_IPA" : "prajskiːskaːtis" } ,
633753: { "form" : "praescīscant" , "form_IPA" : "prajskiːskant" } ,
633754: { "form" : "praescīscerem" , "form_IPA" : "prajskiːskerem" } ,
633755: { "form" : "praescīscerēs" , "form_IPA" : "prajskiːskereːs" } ,
633756: { "form" : "praescīsceret" , "form_IPA" : "prajskiːskeret" } ,
633757: { "form" : "praescīscerēmus" , "form_IPA" : "prajskiːskereːmus" } ,
633758: { "form" : "praescīscerētis" , "form_IPA" : "prajskiːskereːtis" } ,
633759: { "form" : "praescīscerent" , "form_IPA" : "prajskiːskerent" } ,
633760: { "form" : "praescīsce" , "form_IPA" : "prajskiːske" } ,
633761: { "form" : "praescīscite" , "form_IPA" : "prajskiːskite" } ,
633762: { "form" : "praescīscitō" , "form_IPA" : "prajskiːskitoː" } ,
633763: { "form" : "praescīscitō" , "form_IPA" : "prajskiːskitoː" } ,
633764: { "form" : "praescīscitōte" , "form_IPA" : "prajskiːskitoːte" } ,
633765: { "form" : "praescīscuntō" , "form_IPA" : "prajskiːskuntoː" } ,
633766: { "form" : "praescīscere" , "form_IPA" : "prajskiːskere" } ,
633767: { "form" : "praescīscor" , "form_IPA" : "prajskiːskor" } ,
633768: { "form" : "praescīsceris" , "form_IPA" : "prajskiːskeris" } ,
633769: { "form" : "praescīscitur" , "form_IPA" : "prajskiːskitur" } ,
633770: { "form" : "praescīscimur" , "form_IPA" : "prajskiːskimur" } ,
633771: { "form" : "praescīsciminī" , "form_IPA" : "prajskiːskiminiː" } ,
633772: { "form" : "praescīscuntur" , "form_IPA" : "prajskiːskuntur" } ,
633773: { "form" : "praescīscēbar" , "form_IPA" : "prajskiːskeːbar" } ,
633774: { "form" : "praescīscēbāris" , "form_IPA" : "prajskiːskeːbaːris" } ,
633775: { "form" : "praescīscēbātur" , "form_IPA" : "prajskiːskeːbaːtur" } ,
633776: { "form" : "praescīscēbāmur" , "form_IPA" : "prajskiːskeːbaːmur" } ,
633777: { "form" : "praescīscēbāminī" , "form_IPA" : "prajskiːskeːbaːminiː" } ,
633778: { "form" : "praescīscēbantur" , "form_IPA" : "prajskiːskeːbantur" } ,
633779: { "form" : "praescīscar" , "form_IPA" : "prajskiːskar" } ,
633780: { "form" : "praescīscēris" , "form_IPA" : "prajskiːskeːris" } ,
633781: { "form" : "praescīscētur" , "form_IPA" : "prajskiːskeːtur" } ,
633782: { "form" : "praescīscēmur" , "form_IPA" : "prajskiːskeːmur" } ,
633783: { "form" : "praescīscēminī" , "form_IPA" : "prajskiːskeːminiː" } ,
633784: { "form" : "praescīscentur" , "form_IPA" : "prajskiːskentur" } ,
633785: { "form" : "praescīscar" , "form_IPA" : "prajskiːskar" } ,
633786: { "form" : "praescīscāris" , "form_IPA" : "prajskiːskaːris" } ,
633787: { "form" : "praescīscātur" , "form_IPA" : "prajskiːskaːtur" } ,
633788: { "form" : "praescīscāmur" , "form_IPA" : "prajskiːskaːmur" } ,
633789: { "form" : "praescīscāminī" , "form_IPA" : "prajskiːskaːminiː" } ,
633790: { "form" : "praescīscantur" , "form_IPA" : "prajskiːskantur" } ,
633791: { "form" : "praescīscerer" , "form_IPA" : "prajskiːskerer" } ,
633792: { "form" : "praescīscerēris" , "form_IPA" : "prajskiːskereːris" } ,
633793: { "form" : "praescīscerētur" , "form_IPA" : "prajskiːskereːtur" } ,
633794: { "form" : "praescīscerēmur" , "form_IPA" : "prajskiːskereːmur" } ,
633795: { "form" : "praescīscerēminī" , "form_IPA" : "prajskiːskereːminiː" } ,
633796: { "form" : "praescīscerentur" , "form_IPA" : "prajskiːskerentur" } ,
633797: { "form" : "praescīscere" , "form_IPA" : "prajskiːskere" } ,
633798: { "form" : "praescīsciminī" , "form_IPA" : "prajskiːskiminiː" } ,
633799: { "form" : "praescīscitor" , "form_IPA" : "prajskiːskitor" } ,
633800: { "form" : "praescīscitor" , "form_IPA" : "prajskiːskitor" } ,
633801: { "form" : "praescīscuntor" , "form_IPA" : "prajskiːskuntor" } ,
633802: { "form" : "praescīscī" , "form_IPA" : "prajskiːskiː" } ,
633803: { "form" : "praescīscendī" , "form_IPA" : "prajskiːskendiː" } ,
633804: { "form" : "praescīscendō" , "form_IPA" : "prajskiːskendoː" } ,
633805: { "form" : "praescīscendum" , "form_IPA" : "prajskiːskendum" } ,
633806: { "form" : "praescīscendō" , "form_IPA" : "prajskiːskendoː" } ,
633807: { "form" : "praescīscendus" , "form_IPA" : "prajskiːskendus" } ,
633808: { "form" : "praescīscenda" , "form_IPA" : "prajskiːskenda" } ,
633809: { "form" : "praescīscendum" , "form_IPA" : "prajskiːskendum" } ,
633810: { "form" : "praescīscendī" , "form_IPA" : "prajskiːskendiː" } ,
633811: { "form" : "praescīscendae" , "form_IPA" : "prajskiːskendaj" } ,
633812: { "form" : "praescīscendī" , "form_IPA" : "prajskiːskendiː" } ,
633813: { "form" : "praescīscendō" , "form_IPA" : "prajskiːskendoː" } ,
633814: { "form" : "praescīscendae" , "form_IPA" : "prajskiːskendaj" } ,
633815: { "form" : "praescīscendō" , "form_IPA" : "prajskiːskendoː" } ,
633816: { "form" : "praescīscendum" , "form_IPA" : "prajskiːskendum" } ,
633817: { "form" : "praescīscendam" , "form_IPA" : "prajskiːskendam" } ,
633818: { "form" : "praescīscendum" , "form_IPA" : "prajskiːskendum" } ,
633819: { "form" : "praescīscende" , "form_IPA" : "prajskiːskende" } ,
633820: { "form" : "praescīscenda" , "form_IPA" : "prajskiːskenda" } ,
633821: { "form" : "praescīscendum" , "form_IPA" : "prajskiːskendum" } ,
633822: { "form" : "praescīscendō" , "form_IPA" : "prajskiːskendoː" } ,
633823: { "form" : "praescīscendā" , "form_IPA" : "prajskiːskendaː" } ,
633824: { "form" : "praescīscendō" , "form_IPA" : "prajskiːskendoː" } ,
633825: { "form" : "praescīscendī" , "form_IPA" : "prajskiːskendiː" } ,
633826: { "form" : "praescīscendae" , "form_IPA" : "prajskiːskendaj" } ,
633827: { "form" : "praescīscenda" , "form_IPA" : "prajskiːskenda" } ,
633828: { "form" : "praescīscendōrum" , "form_IPA" : "prajskiːskendoːrum" } ,
633829: { "form" : "praescīscendārum" , "form_IPA" : "prajskiːskendaːrum" } ,
633830: { "form" : "praescīscendōrum" , "form_IPA" : "prajskiːskendoːrum" } ,
633831: { "form" : "praescīscendīs" , "form_IPA" : "prajskiːskendiːs" } ,
633832: { "form" : "praescīscendīs" , "form_IPA" : "prajskiːskendiːs" } ,
633833: { "form" : "praescīscendīs" , "form_IPA" : "prajskiːskendiːs" } ,
633834: { "form" : "praescīscendōs" , "form_IPA" : "prajskiːskendoːs" } ,
633835: { "form" : "praescīscendās" , "form_IPA" : "prajskiːskendaːs" } ,
633836: { "form" : "praescīscenda" , "form_IPA" : "prajskiːskenda" } ,
633837: { "form" : "praescīscendī" , "form_IPA" : "prajskiːskendiː" } ,
633838: { "form" : "praescīscendae" , "form_IPA" : "prajskiːskendaj" } ,
633839: { "form" : "praescīscenda" , "form_IPA" : "prajskiːskenda" } ,
633840: { "form" : "praescīscendīs" , "form_IPA" : "prajskiːskendiːs" } ,
633841: { "form" : "praescīscendīs" , "form_IPA" : "prajskiːskendiːs" } ,
633842: { "form" : "praescīscendīs" , "form_IPA" : "prajskiːskendiːs" } ,
633843: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633844: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633845: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633846: { "form" : "praescīscentis" , "form_IPA" : "prajskiːskentis" } ,
633847: { "form" : "praescīscentis" , "form_IPA" : "prajskiːskentis" } ,
633848: { "form" : "praescīscentis" , "form_IPA" : "prajskiːskentis" } ,
633849: { "form" : "praescīscentī" , "form_IPA" : "prajskiːskentiː" } ,
633850: { "form" : "praescīscentī" , "form_IPA" : "prajskiːskentiː" } ,
633851: { "form" : "praescīscentī" , "form_IPA" : "prajskiːskentiː" } ,
633852: { "form" : "praescīscentem" , "form_IPA" : "prajskiːskentem" } ,
633853: { "form" : "praescīscentem" , "form_IPA" : "prajskiːskentem" } ,
633854: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633855: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633856: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633857: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633858: { "form" : "praescīscente" , "form_IPA" : "prajskiːskente" } ,
633859: { "form" : "praescīscente" , "form_IPA" : "prajskiːskente" } ,
633860: { "form" : "praescīscente" , "form_IPA" : "prajskiːskente" } ,
633861: { "form" : "praescīscentēs" , "form_IPA" : "prajskiːskenteːs" } ,
633862: { "form" : "praescīscentēs" , "form_IPA" : "prajskiːskenteːs" } ,
633863: { "form" : "praescīscentia" , "form_IPA" : "prajskiːskentia" } ,
633864: { "form" : "praescīscentium" , "form_IPA" : "prajskiːskentium" } ,
633865: { "form" : "praescīscentium" , "form_IPA" : "prajskiːskentium" } ,
633866: { "form" : "praescīscentium" , "form_IPA" : "prajskiːskentium" } ,
633867: { "form" : "praescīscentibus" , "form_IPA" : "prajskiːskentibus" } ,
633868: { "form" : "praescīscentibus" , "form_IPA" : "prajskiːskentibus" } ,
633869: { "form" : "praescīscentibus" , "form_IPA" : "prajskiːskentibus" } ,
633870: { "form" : "praescīscentēs" , "form_IPA" : "prajskiːskenteːs" } ,
633871: { "form" : "praescīscentēs" , "form_IPA" : "prajskiːskenteːs" } ,
633872: { "form" : "praescīscentia" , "form_IPA" : "prajskiːskentia" } ,
633873: { "form" : "praescīscentēs" , "form_IPA" : "prajskiːskenteːs" } ,
633874: { "form" : "praescīscentēs" , "form_IPA" : "prajskiːskenteːs" } ,
633875: { "form" : "praescīscentia" , "form_IPA" : "prajskiːskentia" } ,
633876: { "form" : "praescīscentibus" , "form_IPA" : "prajskiːskentibus" } ,
633877: { "form" : "praescīscentibus" , "form_IPA" : "prajskiːskentibus" } ,
633878: { "form" : "praescīscentibus" , "form_IPA" : "prajskiːskentibus" } ,
# "i" in prs.act.ind.2/3sg of lexeme "aio" is vowel, not glide (cf. Bennett grammar, §135)
41149: { "form_IPA" : "aiːs" } ,
41150: { "form_IPA" : "ait" }
}

In [159]:
for row in changes:
    #print(row)
    for column in changes[row]:
        #print(column)
        #print(changes[row][column])
        LatInfLexi_verbs.loc[row,column] = changes[row][column]

# Combining forms tables

Adding POS

In [160]:
LatInfLexi_verbs["POS"] = "verb"
LatInfLexi_nouns["POS"] = "noun"

Generating form_id

In [161]:
def add_form_id(df, suffix=""):
    df.index.name = "form_id"
    df.reset_index(inplace=True)
    df["form_id"] = "form_" + df["form_id"].apply(str) + suffix

add_form_id(LatInfLexi_verbs, "_v")
add_form_id(LatInfLexi_nouns, "_n")

Concatenating forms tables:

In [162]:
LatInfLexi_forms = pd.concat([LatInfLexi_verbs, LatInfLexi_nouns])

# Converting forms to Paralex format

Obtaining columns in Paralex format:

In [163]:
col_map = {"form": "orth_form", "form_IPA": "phon_form", "PoSTag:features": "cell",
           "freqTFTL":"frequency",
           "freqAntiquitas":"frequency_Antiquitas" ,
           "freqAetasPatrum":"frequency_AetasPatrum",
            "freqMediumAeuum":"frequency_MediumAeuum",
            "freqRecentiorLatinitas":"frequency_RecentiorLatinitas"
           }
LatInfLexi_forms.rename(col_map, axis=1, inplace=True)

Mapping cells to new scheme:

In [164]:
LatInfLexi_cells_mapper = LatInfLexi_cells.set_index("LatInFlexi-cell").cell_id.to_dict()
LatInfLexi_forms.loc[:,"cell"] = LatInfLexi_forms.cell.map(LatInfLexi_cells_mapper)

In [165]:
LatInfLexi_forms.sample(5)

Unnamed: 0,form_id,lexeme,cell,orth_form,phon_form,frequency,frequency_Antiquitas,frequency_AetasPatrum,frequency_MediumAeuum,frequency_RecentiorLatinitas,POS
544518,form_544518_v,obuersor/-o,prf.pass.ptcp.voc.n.sg,obuersātum,obwersaːtum,3,1,0,1,1,verb
164603,form_164603_v,conspiro,iprf.act.ind.3.pl,cōnspīrābant,koːnspiːraːbant,4,0,2,2,0,verb
21837,form_21837_v,adiungo,fut.act.ptcp.acc.n.pl,adiūnctūra,adjuːnktuːra,0,0,0,0,0,verb
148862,form_148862_v,confero,prs.act.sbjv.1.sg,cōnferam,koːnferam,90,46,21,19,4,verb
69937,form_69937_v,assequor,gdv.acc.f.sg,assequendam,assekwendam,21,0,0,7,14,verb


Setting form_id as index

In [166]:
LatInfLexi_forms = LatInfLexi_forms.set_index("form_id")

In [167]:
LatInfLexi_forms

Unnamed: 0_level_0,lexeme,cell,orth_form,phon_form,frequency,frequency_Antiquitas,frequency_AetasPatrum,frequency_MediumAeuum,frequency_RecentiorLatinitas,POS
form_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
form_0_v,abalieno,prs.act.ind.1.sg,abaliēnō,abalieːnoː,0,0,0,0,0,verb
form_1_v,abalieno,prs.act.ind.2.sg,abaliēnās,abalieːnaːs,0,0,0,0,0,verb
form_2_v,abalieno,prs.act.ind.3.sg,abaliēnat,abalieːnat,5,2,2,1,0,verb
form_3_v,abalieno,prs.act.ind.1.pl,abaliēnāmus,abalieːnaːmus,0,0,0,0,0,verb
form_4_v,abalieno,prs.act.ind.2.pl,abaliēnātis,abalieːnaːtis,2,1,1,0,0,verb
...,...,...,...,...,...,...,...,...,...,...
form_12451_n,uxor,gen.pl,uxōrum,uksoːrum,265,15,124,122,4,noun
form_12452_n,uxor,dat.pl,uxōribus,uksoːribus,725,30,317,373,5,noun
form_12453_n,uxor,acc.pl,uxōrēs,uksoːreːs,2073,90,1064,903,16,noun
form_12454_n,uxor,voc.pl,uxōrēs,uksoːreːs,2073,90,1064,903,16,noun


Adding stress

In [168]:
C = r"b|d|ɡ|m|n|l|r|z|p|pʰ|f|t|tʰ|s|k|kʰ|h"
V = r"waj|[jw][aeiouy]ː|[aeiouy]ː?|[jw][aeiouy]|[aeiouy][jw]"
segmenter = f"(?:{C}|({V}))*?"


def search_vowels(word):
    segmented = regex.fullmatch(segmenter, word)
    return segmented.spans(1)

def find_latin_stress(word):

    if word == "#DEF#":
        return word

    def stress(idxs):
        i = idxs[0]
        if word[i] in 'wj':
            i += 1
        return word[:i] + "ˈ" + word[i:]

    indexes = search_vowels(word)

    # 2 syllables or less => stress first syllable
    if len(indexes) <= 2:
        return stress(indexes[0])

    *_, antepenult, penult, ultimate = indexes

    # If the penult has a long vowel it is stressed
    if word[slice(*penult)][-1] in {"ː", "j", "w"}:
        return stress(penult)

    # Single C after the penult => short penult => stress antepenult
    if (ultimate[0] - penult[1]) < 2:
        return stress(antepenult)

    # For the rest, it depends on the consonant sequence
    c_seq = word[penult[1]:ultimate[0]]
    cl = re.compile("^[bdɡpctd]ʰ?[rl]$")

    # C seq is a liquid cluster => short penult => stress antepenult
    if cl.match(c_seq):
        return stress(antepenult)

    # other C sequence => long penult => stress penult
    return stress(penult)


LatInfLexi_forms.loc[:, "phon_form"] = LatInfLexi_forms["phon_form"].apply(find_latin_stress)

Separating sounds with spaces

In [169]:
def splitter(series, split_pattern):
    series = series.str.split(pat=split_pattern, regex=True)
    return series.apply(lambda x: " ".join([char for char in x if char]))

sounds = ['b', 'd', 'ɡ', 'm', 'n', 'l', 'r', 'z', 'p', 'pʰ', 'f', 't', 'tʰ', 's', 'k', 'kʰ', 'h', 'j', 'w', 'a', 'aː', 'e', 'eː', 'i', 'iː', 'o', 'oː', 'u', 'uː', 'ˈa', 'ˈaː', 'ˈe', 'ˈeː', 'ˈi', 'ˈiː', 'ˈo', 'ˈoː', 'ˈu', 'ˈuː', 'ˈy', 'ˈyː']
split_pattern = "(" + "|".join(sorted(sounds, key=len, reverse=True)) + ")"
LatInfLexi_forms["phon_form"] = splitter(LatInfLexi_forms["phon_form"], split_pattern)

# Adjusting the transcription

In [170]:
# Adding frequencies to the cells

In [171]:
cells_freq = LatInfLexi_forms.groupby("cell")[["frequency", 
                                              "frequency_Antiquitas",
                                              "frequency_AetasPatrum",
                                              "frequency_MediumAeuum",
                                              "frequency_RecentiorLatinitas"]].agg(sum)
cells_freq.index.name = "cell_id"

  "frequency_RecentiorLatinitas"]].agg(sum)


In [172]:
cells_freq

Unnamed: 0_level_0,frequency,frequency_Antiquitas,frequency_AetasPatrum,frequency_MediumAeuum,frequency_RecentiorLatinitas
cell_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
abl.pl,835283,115180,310927,382000,27176
abl.sg,2816933,297275,962861,1463839,92958
acc.pl,1565479,216650,557840,734860,56129
acc.sg,2663992,246582,945900,1391128,80382
dat.pl,835283,115180,310927,382000,27176
...,...,...,...,...,...
prs.pass.sbjv.3.sg,168334,9945,66246,83631,8512
sup.abl,156341,17168,41767,88748,8658
sup.acc,591907,50305,198546,323267,19789
voc.pl,2113371,236521,758001,1044528,74321


In [173]:
LatInfLexi_cells.set_index("cell_id", inplace=True)

In [174]:
LatInfLexi_cells = pd.merge(LatInfLexi_cells, cells_freq, left_index=True, right_index=True)

# Creating the lexemes table

In [175]:
LatInfLexi_lexemes = LatInfLexi_forms.groupby(["lexeme", "POS"])[["frequency", 
                                                                      "frequency_Antiquitas",
                                                                      "frequency_AetasPatrum",
                                                                      "frequency_MediumAeuum",
                                                                      "frequency_RecentiorLatinitas"]].agg(sum).reset_index("POS", drop=False)
LatInfLexi_lexemes.index.name = "lexeme_id"

  "frequency_RecentiorLatinitas"]].agg(sum).reset_index("POS", drop=False)


In [176]:
LatInfLexi_lexemes

Unnamed: 0_level_0,POS,frequency,frequency_Antiquitas,frequency_AetasPatrum,frequency_MediumAeuum,frequency_RecentiorLatinitas
lexeme_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
abalieno,verb,393,101,176,113,3
abdico,verb,2256,773,760,683,40
abdo,verb,4850,971,2111,1644,124
abduco,verb,3194,937,1057,1116,84
abeo,verb,6626,1723,1466,3229,208
...,...,...,...,...,...,...
uulgus,noun,7917,1588,2524,2311,1494
uulnero,verb,10975,1167,4903,4784,121
uulnus,noun,18041,3618,7531,6637,255
uultus,noun,25464,4688,9117,11269,390


# Output

Writing it all to file

In [None]:
LatInfLexi_lexemes.to_csv("LatInfLexi-lexemes.csv")
LatInfLexi_forms.to_csv("LatInfLexi-forms.csv")
LatInfLexi_cells.to_csv("LatInfLexi-cells.csv")

Remove temporary files from v1.1

In [None]:
%%bash
rm LatInfLexi-nouns.csv
rm LatInfLexi-verbs.csv
