In [129]:
import pandas as pd
import numpy as np
import re
import regex

from tqdm import tqdm
tqdm.pandas()

# Reading cells 

We have manually made tables in `etc/`, we just annotate them with POS and concatenate to make a single table:

In [130]:
LatInfLexi_verbs_cells = pd.read_csv("etc/LatInfLexi-verbs_cells.csv")
LatInfLexi_verbs_cells["POS"] = "verb"
LatInfLexi_nouns_cells = pd.read_csv("etc/LatInfLexi-nouns_cells.csv")
LatInfLexi_nouns_cells["POS"] = "noun"
LatInfLexi_cells = pd.concat([LatInfLexi_verbs_cells, LatInfLexi_nouns_cells])

# Reading forms

We get the previous version's forms

In [131]:
%%bash 
git checkout v1.1 -- LatInfLexi-nouns.csv
git checkout v1.1 -- LatInfLexi-verbs.csv

We read these tables

In [132]:
LatInfLexi_verbs = pd.read_csv("LatInfLexi-verbs.csv")
LatInfLexi_nouns = pd.read_csv("LatInfLexi-nouns.csv")

# Making some corrections

In [133]:
changes_verbs = {
# vowel length in the present system of lexeme "praescisco" (and consequently IPA transcriptions) 
633730: { "form" : "praescīscō" , "form_IPA" : "prajskiːskoː" } ,
633731: { "form" : "praescīscis" , "form_IPA" : "prajskiːskis" } ,
633732: { "form" : "praescīscit" , "form_IPA" : "prajskiːskit" } ,
633733: { "form" : "praescīscimus" , "form_IPA" : "prajskiːskimus" } ,
633734: { "form" : "praescīscitis" , "form_IPA" : "prajskiːskitis" } ,
633735: { "form" : "praescīscunt" , "form_IPA" : "prajskiːskunt" } ,
633736: { "form" : "praescīscēbam" , "form_IPA" : "prajskiːskeːbam" } ,
633737: { "form" : "praescīscēbās" , "form_IPA" : "prajskiːskeːbaːs" } ,
633738: { "form" : "praescīscēbat" , "form_IPA" : "prajskiːskeːbat" } ,
633739: { "form" : "praescīscēbāmus" , "form_IPA" : "prajskiːskeːbaːmus" } ,
633740: { "form" : "praescīscēbātis" , "form_IPA" : "prajskiːskeːbaːtis" } ,
633741: { "form" : "praescīscēbant" , "form_IPA" : "prajskiːskeːbant" } ,
633742: { "form" : "praescīscam" , "form_IPA" : "prajskiːskam" } ,
633743: { "form" : "praescīscēs" , "form_IPA" : "prajskiːskeːs" } ,
633744: { "form" : "praescīscet" , "form_IPA" : "prajskiːsket" } ,
633745: { "form" : "praescīscēmus" , "form_IPA" : "prajskiːskeːmus" } ,
633746: { "form" : "praescīscētis" , "form_IPA" : "prajskiːskeːtis" } ,
633747: { "form" : "praescīscent" , "form_IPA" : "prajskiːskent" } ,
633748: { "form" : "praescīscam" , "form_IPA" : "prajskiːskam" } ,
633749: { "form" : "praescīscās" , "form_IPA" : "prajskiːskaːs" } ,
633750: { "form" : "praescīscat" , "form_IPA" : "prajskiːskat" } ,
633751: { "form" : "praescīscāmus" , "form_IPA" : "prajskiːskaːmus" } ,
633752: { "form" : "praescīscātis" , "form_IPA" : "prajskiːskaːtis" } ,
633753: { "form" : "praescīscant" , "form_IPA" : "prajskiːskant" } ,
633754: { "form" : "praescīscerem" , "form_IPA" : "prajskiːskerem" } ,
633755: { "form" : "praescīscerēs" , "form_IPA" : "prajskiːskereːs" } ,
633756: { "form" : "praescīsceret" , "form_IPA" : "prajskiːskeret" } ,
633757: { "form" : "praescīscerēmus" , "form_IPA" : "prajskiːskereːmus" } ,
633758: { "form" : "praescīscerētis" , "form_IPA" : "prajskiːskereːtis" } ,
633759: { "form" : "praescīscerent" , "form_IPA" : "prajskiːskerent" } ,
633760: { "form" : "praescīsce" , "form_IPA" : "prajskiːske" } ,
633761: { "form" : "praescīscite" , "form_IPA" : "prajskiːskite" } ,
633762: { "form" : "praescīscitō" , "form_IPA" : "prajskiːskitoː" } ,
633763: { "form" : "praescīscitō" , "form_IPA" : "prajskiːskitoː" } ,
633764: { "form" : "praescīscitōte" , "form_IPA" : "prajskiːskitoːte" } ,
633765: { "form" : "praescīscuntō" , "form_IPA" : "prajskiːskuntoː" } ,
633766: { "form" : "praescīscere" , "form_IPA" : "prajskiːskere" } ,
633767: { "form" : "praescīscor" , "form_IPA" : "prajskiːskor" } ,
633768: { "form" : "praescīsceris" , "form_IPA" : "prajskiːskeris" } ,
633769: { "form" : "praescīscitur" , "form_IPA" : "prajskiːskitur" } ,
633770: { "form" : "praescīscimur" , "form_IPA" : "prajskiːskimur" } ,
633771: { "form" : "praescīsciminī" , "form_IPA" : "prajskiːskiminiː" } ,
633772: { "form" : "praescīscuntur" , "form_IPA" : "prajskiːskuntur" } ,
633773: { "form" : "praescīscēbar" , "form_IPA" : "prajskiːskeːbar" } ,
633774: { "form" : "praescīscēbāris" , "form_IPA" : "prajskiːskeːbaːris" } ,
633775: { "form" : "praescīscēbātur" , "form_IPA" : "prajskiːskeːbaːtur" } ,
633776: { "form" : "praescīscēbāmur" , "form_IPA" : "prajskiːskeːbaːmur" } ,
633777: { "form" : "praescīscēbāminī" , "form_IPA" : "prajskiːskeːbaːminiː" } ,
633778: { "form" : "praescīscēbantur" , "form_IPA" : "prajskiːskeːbantur" } ,
633779: { "form" : "praescīscar" , "form_IPA" : "prajskiːskar" } ,
633780: { "form" : "praescīscēris" , "form_IPA" : "prajskiːskeːris" } ,
633781: { "form" : "praescīscētur" , "form_IPA" : "prajskiːskeːtur" } ,
633782: { "form" : "praescīscēmur" , "form_IPA" : "prajskiːskeːmur" } ,
633783: { "form" : "praescīscēminī" , "form_IPA" : "prajskiːskeːminiː" } ,
633784: { "form" : "praescīscentur" , "form_IPA" : "prajskiːskentur" } ,
633785: { "form" : "praescīscar" , "form_IPA" : "prajskiːskar" } ,
633786: { "form" : "praescīscāris" , "form_IPA" : "prajskiːskaːris" } ,
633787: { "form" : "praescīscātur" , "form_IPA" : "prajskiːskaːtur" } ,
633788: { "form" : "praescīscāmur" , "form_IPA" : "prajskiːskaːmur" } ,
633789: { "form" : "praescīscāminī" , "form_IPA" : "prajskiːskaːminiː" } ,
633790: { "form" : "praescīscantur" , "form_IPA" : "prajskiːskantur" } ,
633791: { "form" : "praescīscerer" , "form_IPA" : "prajskiːskerer" } ,
633792: { "form" : "praescīscerēris" , "form_IPA" : "prajskiːskereːris" } ,
633793: { "form" : "praescīscerētur" , "form_IPA" : "prajskiːskereːtur" } ,
633794: { "form" : "praescīscerēmur" , "form_IPA" : "prajskiːskereːmur" } ,
633795: { "form" : "praescīscerēminī" , "form_IPA" : "prajskiːskereːminiː" } ,
633796: { "form" : "praescīscerentur" , "form_IPA" : "prajskiːskerentur" } ,
633797: { "form" : "praescīscere" , "form_IPA" : "prajskiːskere" } ,
633798: { "form" : "praescīsciminī" , "form_IPA" : "prajskiːskiminiː" } ,
633799: { "form" : "praescīscitor" , "form_IPA" : "prajskiːskitor" } ,
633800: { "form" : "praescīscitor" , "form_IPA" : "prajskiːskitor" } ,
633801: { "form" : "praescīscuntor" , "form_IPA" : "prajskiːskuntor" } ,
633802: { "form" : "praescīscī" , "form_IPA" : "prajskiːskiː" } ,
633803: { "form" : "praescīscendī" , "form_IPA" : "prajskiːskendiː" } ,
633804: { "form" : "praescīscendō" , "form_IPA" : "prajskiːskendoː" } ,
633805: { "form" : "praescīscendum" , "form_IPA" : "prajskiːskendum" } ,
633806: { "form" : "praescīscendō" , "form_IPA" : "prajskiːskendoː" } ,
633807: { "form" : "praescīscendus" , "form_IPA" : "prajskiːskendus" } ,
633808: { "form" : "praescīscenda" , "form_IPA" : "prajskiːskenda" } ,
633809: { "form" : "praescīscendum" , "form_IPA" : "prajskiːskendum" } ,
633810: { "form" : "praescīscendī" , "form_IPA" : "prajskiːskendiː" } ,
633811: { "form" : "praescīscendae" , "form_IPA" : "prajskiːskendaj" } ,
633812: { "form" : "praescīscendī" , "form_IPA" : "prajskiːskendiː" } ,
633813: { "form" : "praescīscendō" , "form_IPA" : "prajskiːskendoː" } ,
633814: { "form" : "praescīscendae" , "form_IPA" : "prajskiːskendaj" } ,
633815: { "form" : "praescīscendō" , "form_IPA" : "prajskiːskendoː" } ,
633816: { "form" : "praescīscendum" , "form_IPA" : "prajskiːskendum" } ,
633817: { "form" : "praescīscendam" , "form_IPA" : "prajskiːskendam" } ,
633818: { "form" : "praescīscendum" , "form_IPA" : "prajskiːskendum" } ,
633819: { "form" : "praescīscende" , "form_IPA" : "prajskiːskende" } ,
633820: { "form" : "praescīscenda" , "form_IPA" : "prajskiːskenda" } ,
633821: { "form" : "praescīscendum" , "form_IPA" : "prajskiːskendum" } ,
633822: { "form" : "praescīscendō" , "form_IPA" : "prajskiːskendoː" } ,
633823: { "form" : "praescīscendā" , "form_IPA" : "prajskiːskendaː" } ,
633824: { "form" : "praescīscendō" , "form_IPA" : "prajskiːskendoː" } ,
633825: { "form" : "praescīscendī" , "form_IPA" : "prajskiːskendiː" } ,
633826: { "form" : "praescīscendae" , "form_IPA" : "prajskiːskendaj" } ,
633827: { "form" : "praescīscenda" , "form_IPA" : "prajskiːskenda" } ,
633828: { "form" : "praescīscendōrum" , "form_IPA" : "prajskiːskendoːrum" } ,
633829: { "form" : "praescīscendārum" , "form_IPA" : "prajskiːskendaːrum" } ,
633830: { "form" : "praescīscendōrum" , "form_IPA" : "prajskiːskendoːrum" } ,
633831: { "form" : "praescīscendīs" , "form_IPA" : "prajskiːskendiːs" } ,
633832: { "form" : "praescīscendīs" , "form_IPA" : "prajskiːskendiːs" } ,
633833: { "form" : "praescīscendīs" , "form_IPA" : "prajskiːskendiːs" } ,
633834: { "form" : "praescīscendōs" , "form_IPA" : "prajskiːskendoːs" } ,
633835: { "form" : "praescīscendās" , "form_IPA" : "prajskiːskendaːs" } ,
633836: { "form" : "praescīscenda" , "form_IPA" : "prajskiːskenda" } ,
633837: { "form" : "praescīscendī" , "form_IPA" : "prajskiːskendiː" } ,
633838: { "form" : "praescīscendae" , "form_IPA" : "prajskiːskendaj" } ,
633839: { "form" : "praescīscenda" , "form_IPA" : "prajskiːskenda" } ,
633840: { "form" : "praescīscendīs" , "form_IPA" : "prajskiːskendiːs" } ,
633841: { "form" : "praescīscendīs" , "form_IPA" : "prajskiːskendiːs" } ,
633842: { "form" : "praescīscendīs" , "form_IPA" : "prajskiːskendiːs" } ,
633843: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633844: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633845: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633846: { "form" : "praescīscentis" , "form_IPA" : "prajskiːskentis" } ,
633847: { "form" : "praescīscentis" , "form_IPA" : "prajskiːskentis" } ,
633848: { "form" : "praescīscentis" , "form_IPA" : "prajskiːskentis" } ,
633849: { "form" : "praescīscentī" , "form_IPA" : "prajskiːskentiː" } ,
633850: { "form" : "praescīscentī" , "form_IPA" : "prajskiːskentiː" } ,
633851: { "form" : "praescīscentī" , "form_IPA" : "prajskiːskentiː" } ,
633852: { "form" : "praescīscentem" , "form_IPA" : "prajskiːskentem" } ,
633853: { "form" : "praescīscentem" , "form_IPA" : "prajskiːskentem" } ,
633854: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633855: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633856: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633857: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633858: { "form" : "praescīscente" , "form_IPA" : "prajskiːskente" } ,
633859: { "form" : "praescīscente" , "form_IPA" : "prajskiːskente" } ,
633860: { "form" : "praescīscente" , "form_IPA" : "prajskiːskente" } ,
633861: { "form" : "praescīscentēs" , "form_IPA" : "prajskiːskenteːs" } ,
633862: { "form" : "praescīscentēs" , "form_IPA" : "prajskiːskenteːs" } ,
633863: { "form" : "praescīscentia" , "form_IPA" : "prajskiːskentia" } ,
633864: { "form" : "praescīscentium" , "form_IPA" : "prajskiːskentium" } ,
633865: { "form" : "praescīscentium" , "form_IPA" : "prajskiːskentium" } ,
633866: { "form" : "praescīscentium" , "form_IPA" : "prajskiːskentium" } ,
633867: { "form" : "praescīscentibus" , "form_IPA" : "prajskiːskentibus" } ,
633868: { "form" : "praescīscentibus" , "form_IPA" : "prajskiːskentibus" } ,
633869: { "form" : "praescīscentibus" , "form_IPA" : "prajskiːskentibus" } ,
633870: { "form" : "praescīscentēs" , "form_IPA" : "prajskiːskenteːs" } ,
633871: { "form" : "praescīscentēs" , "form_IPA" : "prajskiːskenteːs" } ,
633872: { "form" : "praescīscentia" , "form_IPA" : "prajskiːskentia" } ,
633873: { "form" : "praescīscentēs" , "form_IPA" : "prajskiːskenteːs" } ,
633874: { "form" : "praescīscentēs" , "form_IPA" : "prajskiːskenteːs" } ,
633875: { "form" : "praescīscentia" , "form_IPA" : "prajskiːskentia" } ,
633876: { "form" : "praescīscentibus" , "form_IPA" : "prajskiːskentibus" } ,
633877: { "form" : "praescīscentibus" , "form_IPA" : "prajskiːskentibus" } ,
633878: { "form" : "praescīscentibus" , "form_IPA" : "prajskiːskentibus" } ,
# "i" in prs.act.ind.2/3sg of lexeme "aio" is vowel, not glide (cf. Bennett grammar, §135)
41149: { "form_IPA" : "aiːs" } ,
41150: { "form_IPA" : "ait" } ,
# fix to switch sbjv.1pl/3pl of "aio"
41169: { "form" : "#DEF#" , "form_IPA" : "#DEF#" },
41171: { "form" : "aiant" , "form_IPA" : "ajant" },
# recovering prf.3sg of "aio"
41299: { "form" : "ait" , "form_IPA" : "ait" },
# recovering passive forms of transitive derivatives of irregular "eo" (cf. Bennett grammar, §132.1, identification of transitives based on lemlat's codles
18579: { "form" : "adeor" , "form_IPA" : "adeor" } ,
18580: { "form" : "adīris" , "form_IPA" : "adiːris" } ,
18582: { "form" : "adīmur" , "form_IPA" : "adiːmur" } ,
18583: { "form" : "adīminī" , "form_IPA" : "adiːminiː" } ,
18584: { "form" : "adeuntur" , "form_IPA" : "adeuntur" } ,
18585: { "form" : "adībar" , "form_IPA" : "adiːbar" } ,
18586: { "form" : "adībāris" , "form_IPA" : "adiːbaːris" } ,
18588: { "form" : "adībāmur" , "form_IPA" : "adiːbaːmur" } ,
18589: { "form" : "adībāminī" , "form_IPA" : "adiːbaːminiː" } ,
18590: { "form" : "adībantur" , "form_IPA" : "adiːbantur" } ,
18591: { "form" : "adībor" , "form_IPA" : "adiːbor" } ,
18592: { "form" : "adīberis" , "form_IPA" : "adiːberis" } ,
18594: { "form" : "adībimur" , "form_IPA" : "adiːbimur" } ,
18595: { "form" : "adībiminī" , "form_IPA" : "adiːbiminiː" } ,
18596: { "form" : "adībuntur" , "form_IPA" : "adiːbuntur" } ,
18597: { "form" : "adear" , "form_IPA" : "adear" } ,
18598: { "form" : "adeāris" , "form_IPA" : "adeaːris" } ,
18600: { "form" : "adeāmur" , "form_IPA" : "adeaːmur" } ,
18601: { "form" : "adeāminī" , "form_IPA" : "adeaːminiː" } ,
18602: { "form" : "adeantur" , "form_IPA" : "adeantur" } ,
18603: { "form" : "adīrer" , "form_IPA" : "adiːrer" } ,
18604: { "form" : "adīrēris" , "form_IPA" : "adiːreːris" } ,
18606: { "form" : "adīrēmur" , "form_IPA" : "adiːreːmur" } ,
18607: { "form" : "adīrēminī" , "form_IPA" : "adiːreːminiː" } ,
18608: { "form" : "adīrentur" , "form_IPA" : "adiːrentur" } ,
18609: { "form" : "adīre" , "form_IPA" : "adiːre" } ,
18610: { "form" : "adīminī" , "form_IPA" : "adiːminiː" } ,
18611: { "form" : "adītor" , "form_IPA" : "adiːtor" } ,
18612: { "form" : "adītor" , "form_IPA" : "adiːtor" } ,
18613: { "form" : "adeuntor" , "form_IPA" : "adeuntor" } ,
18619: { "form" : "adeundus" , "form_IPA" : "adeundus" } ,
18620: { "form" : "adeunda" , "form_IPA" : "adeunda" } ,
18621: { "form" : "adeundum" , "form_IPA" : "adeundum" } ,
18622: { "form" : "adeundī" , "form_IPA" : "adeundiː" } ,
18623: { "form" : "adeundae" , "form_IPA" : "adeundaj" } ,
18624: { "form" : "adeundī" , "form_IPA" : "adeundiː" } ,
18625: { "form" : "adeundō" , "form_IPA" : "adeundoː" } ,
18626: { "form" : "adeundae" , "form_IPA" : "adeundaj" } ,
18627: { "form" : "adeundō" , "form_IPA" : "adeundoː" } ,
18628: { "form" : "adeundum" , "form_IPA" : "adeundum" } ,
18629: { "form" : "adeundam" , "form_IPA" : "adeundam" } ,
18631: { "form" : "adeunde" , "form_IPA" : "adeunde" } ,
18632: { "form" : "adeunda" , "form_IPA" : "adeunda" } ,
18633: { "form" : "adeundum" , "form_IPA" : "adeundum" } ,
18634: { "form" : "adeundō" , "form_IPA" : "adeundoː" } ,
18635: { "form" : "adeundā" , "form_IPA" : "adeundaː" } ,
18636: { "form" : "adeundō" , "form_IPA" : "adeundoː" } ,
18637: { "form" : "adeundī" , "form_IPA" : "adeundiː" } ,
18638: { "form" : "adeundae" , "form_IPA" : "adeundaj" } ,
18639: { "form" : "adeunda" , "form_IPA" : "adeunda" } ,
18640: { "form" : "adeundōrum" , "form_IPA" : "adeundoːrum" } ,
18641: { "form" : "adeundārum" , "form_IPA" : "adeundaːrum" } ,
18642: { "form" : "adeundōrum" , "form_IPA" : "adeundoːrum" } ,
18643: { "form" : "adeundīs" , "form_IPA" : "adeundiːs" } ,
18644: { "form" : "adeundīs" , "form_IPA" : "adeundiːs" } ,
18645: { "form" : "adeundīs" , "form_IPA" : "adeundiːs" } ,
18646: { "form" : "adeundōs" , "form_IPA" : "adeundoːs" } ,
18647: { "form" : "adeundās" , "form_IPA" : "adeundaːs" } ,
18648: { "form" : "adeunda" , "form_IPA" : "adeunda" } ,
18649: { "form" : "adeundī" , "form_IPA" : "adeundiː" } ,
18650: { "form" : "adeundae" , "form_IPA" : "adeundaj" } ,
18651: { "form" : "adeunda" , "form_IPA" : "adeunda" } ,
18652: { "form" : "adeundīs" , "form_IPA" : "adeundiːs" } ,
18653: { "form" : "adeundīs" , "form_IPA" : "adeundiːs" } ,
18654: { "form" : "adeundīs" , "form_IPA" : "adeundiːs" } ,
53377: { "form" : "anteeor" , "form_IPA" : "anteeor" } ,
53378: { "form" : "anteīris" , "form_IPA" : "anteiːris" } ,
53380: { "form" : "anteīmur" , "form_IPA" : "anteiːmur" } ,
53381: { "form" : "anteīminī" , "form_IPA" : "anteiːminiː" } ,
53382: { "form" : "anteeuntur" , "form_IPA" : "anteeuntur" } ,
53383: { "form" : "anteībar" , "form_IPA" : "anteiːbar" } ,
53384: { "form" : "anteībāris" , "form_IPA" : "anteiːbaːris" } ,
53386: { "form" : "anteībāmur" , "form_IPA" : "anteiːbaːmur" } ,
53387: { "form" : "anteībāminī" , "form_IPA" : "anteiːbaːminiː" } ,
53388: { "form" : "anteībantur" , "form_IPA" : "anteiːbantur" } ,
53389: { "form" : "anteībor" , "form_IPA" : "anteiːbor" } ,
53390: { "form" : "anteīberis" , "form_IPA" : "anteiːberis" } ,
53392: { "form" : "anteībimur" , "form_IPA" : "anteiːbimur" } ,
53393: { "form" : "anteībiminī" , "form_IPA" : "anteiːbiminiː" } ,
53394: { "form" : "anteībuntur" , "form_IPA" : "anteiːbuntur" } ,
53395: { "form" : "anteear" , "form_IPA" : "anteear" } ,
53396: { "form" : "anteeāris" , "form_IPA" : "anteeaːris" } ,
53398: { "form" : "anteeāmur" , "form_IPA" : "anteeaːmur" } ,
53399: { "form" : "anteeāminī" , "form_IPA" : "anteeaːminiː" } ,
53400: { "form" : "anteeantur" , "form_IPA" : "anteeantur" } ,
53401: { "form" : "anteīrer" , "form_IPA" : "anteiːrer" } ,
53402: { "form" : "anteīrēris" , "form_IPA" : "anteiːreːris" } ,
53404: { "form" : "anteīrēmur" , "form_IPA" : "anteiːreːmur" } ,
53405: { "form" : "anteīrēminī" , "form_IPA" : "anteiːreːminiː" } ,
53406: { "form" : "anteīrentur" , "form_IPA" : "anteiːrentur" } ,
53407: { "form" : "anteīre" , "form_IPA" : "anteiːre" } ,
53408: { "form" : "anteīminī" , "form_IPA" : "anteiːminiː" } ,
53409: { "form" : "anteītor" , "form_IPA" : "anteiːtor" } ,
53410: { "form" : "anteītor" , "form_IPA" : "anteiːtor" } ,
53411: { "form" : "anteeuntor" , "form_IPA" : "anteeuntor" } ,
53417: { "form" : "anteeundus" , "form_IPA" : "anteeundus" } ,
53418: { "form" : "anteeunda" , "form_IPA" : "anteeunda" } ,
53419: { "form" : "anteeundum" , "form_IPA" : "anteeundum" } ,
53420: { "form" : "anteeundī" , "form_IPA" : "anteeundiː" } ,
53421: { "form" : "anteeundae" , "form_IPA" : "anteeundaj" } ,
53422: { "form" : "anteeundī" , "form_IPA" : "anteeundiː" } ,
53423: { "form" : "anteeundō" , "form_IPA" : "anteeundoː" } ,
53424: { "form" : "anteeundae" , "form_IPA" : "anteeundaj" } ,
53425: { "form" : "anteeundō" , "form_IPA" : "anteeundoː" } ,
53426: { "form" : "anteeundum" , "form_IPA" : "anteeundum" } ,
53427: { "form" : "anteeundam" , "form_IPA" : "anteeundam" } ,
53429: { "form" : "anteeunde" , "form_IPA" : "anteeunde" } ,
53430: { "form" : "anteeunda" , "form_IPA" : "anteeunda" } ,
53431: { "form" : "anteeundum" , "form_IPA" : "anteeundum" } ,
53432: { "form" : "anteeundō" , "form_IPA" : "anteeundoː" } ,
53433: { "form" : "anteeundā" , "form_IPA" : "anteeundaː" } ,
53434: { "form" : "anteeundō" , "form_IPA" : "anteeundoː" } ,
53435: { "form" : "anteeundī" , "form_IPA" : "anteeundiː" } ,
53436: { "form" : "anteeundae" , "form_IPA" : "anteeundaj" } ,
53437: { "form" : "anteeunda" , "form_IPA" : "anteeunda" } ,
53438: { "form" : "anteeundōrum" , "form_IPA" : "anteeundoːrum" } ,
53439: { "form" : "anteeundārum" , "form_IPA" : "anteeundaːrum" } ,
53440: { "form" : "anteeundōrum" , "form_IPA" : "anteeundoːrum" } ,
53441: { "form" : "anteeundīs" , "form_IPA" : "anteeundiːs" } ,
53442: { "form" : "anteeundīs" , "form_IPA" : "anteeundiːs" } ,
53443: { "form" : "anteeundīs" , "form_IPA" : "anteeundiːs" } ,
53444: { "form" : "anteeundōs" , "form_IPA" : "anteeundoːs" } ,
53445: { "form" : "anteeundās" , "form_IPA" : "anteeundaːs" } ,
53446: { "form" : "anteeunda" , "form_IPA" : "anteeunda" } ,
53447: { "form" : "anteeundī" , "form_IPA" : "anteeundiː" } ,
53448: { "form" : "anteeundae" , "form_IPA" : "anteeundaj" } ,
53449: { "form" : "anteeunda" , "form_IPA" : "anteeunda" } ,
53450: { "form" : "anteeundīs" , "form_IPA" : "anteeundiːs" } ,
53451: { "form" : "anteeundīs" , "form_IPA" : "anteeundiːs" } ,
53452: { "form" : "anteeundīs" , "form_IPA" : "anteeundiːs" } ,
101129: { "form" : "circumeor" , "form_IPA" : "kirkumeor" } ,
101130: { "form" : "circumīris" , "form_IPA" : "kirkumiːris" } ,
101132: { "form" : "circumīmur" , "form_IPA" : "kirkumiːmur" } ,
101133: { "form" : "circumīminī" , "form_IPA" : "kirkumiːminiː" } ,
101134: { "form" : "circumeuntur" , "form_IPA" : "kirkumeuntur" } ,
101135: { "form" : "circumībar" , "form_IPA" : "kirkumiːbar" } ,
101136: { "form" : "circumībāris" , "form_IPA" : "kirkumiːbaːris" } ,
101138: { "form" : "circumībāmur" , "form_IPA" : "kirkumiːbaːmur" } ,
101139: { "form" : "circumībāminī" , "form_IPA" : "kirkumiːbaːminiː" } ,
101140: { "form" : "circumībantur" , "form_IPA" : "kirkumiːbantur" } ,
101141: { "form" : "circumībor" , "form_IPA" : "kirkumiːbor" } ,
101142: { "form" : "circumīberis" , "form_IPA" : "kirkumiːberis" } ,
101144: { "form" : "circumībimur" , "form_IPA" : "kirkumiːbimur" } ,
101145: { "form" : "circumībiminī" , "form_IPA" : "kirkumiːbiminiː" } ,
101146: { "form" : "circumībuntur" , "form_IPA" : "kirkumiːbuntur" } ,
101147: { "form" : "circumear" , "form_IPA" : "kirkumear" } ,
101148: { "form" : "circumeāris" , "form_IPA" : "kirkumeaːris" } ,
101150: { "form" : "circumeāmur" , "form_IPA" : "kirkumeaːmur" } ,
101151: { "form" : "circumeāminī" , "form_IPA" : "kirkumeaːminiː" } ,
101152: { "form" : "circumeantur" , "form_IPA" : "kirkumeantur" } ,
101153: { "form" : "circumīrer" , "form_IPA" : "kirkumiːrer" } ,
101154: { "form" : "circumīrēris" , "form_IPA" : "kirkumiːreːris" } ,
101156: { "form" : "circumīrēmur" , "form_IPA" : "kirkumiːreːmur" } ,
101157: { "form" : "circumīrēminī" , "form_IPA" : "kirkumiːreːminiː" } ,
101158: { "form" : "circumīrentur" , "form_IPA" : "kirkumiːrentur" } ,
101159: { "form" : "circumīre" , "form_IPA" : "kirkumiːre" } ,
101160: { "form" : "circumīminī" , "form_IPA" : "kirkumiːminiː" } ,
101161: { "form" : "circumītor" , "form_IPA" : "kirkumiːtor" } ,
101162: { "form" : "circumītor" , "form_IPA" : "kirkumiːtor" } ,
101163: { "form" : "circumeuntor" , "form_IPA" : "kirkumeuntor" } ,
101169: { "form" : "circumeundus" , "form_IPA" : "kirkumeundus" } ,
101170: { "form" : "circumeunda" , "form_IPA" : "kirkumeunda" } ,
101171: { "form" : "circumeundum" , "form_IPA" : "kirkumeundum" } ,
101172: { "form" : "circumeundī" , "form_IPA" : "kirkumeundiː" } ,
101173: { "form" : "circumeundae" , "form_IPA" : "kirkumeundaj" } ,
101174: { "form" : "circumeundī" , "form_IPA" : "kirkumeundiː" } ,
101175: { "form" : "circumeundō" , "form_IPA" : "kirkumeundoː" } ,
101176: { "form" : "circumeundae" , "form_IPA" : "kirkumeundaj" } ,
101177: { "form" : "circumeundō" , "form_IPA" : "kirkumeundoː" } ,
101178: { "form" : "circumeundum" , "form_IPA" : "kirkumeundum" } ,
101179: { "form" : "circumeundam" , "form_IPA" : "kirkumeundam" } ,
101181: { "form" : "circumeunde" , "form_IPA" : "kirkumeunde" } ,
101182: { "form" : "circumeunda" , "form_IPA" : "kirkumeunda" } ,
101183: { "form" : "circumeundum" , "form_IPA" : "kirkumeundum" } ,
101184: { "form" : "circumeundō" , "form_IPA" : "kirkumeundoː" } ,
101185: { "form" : "circumeundā" , "form_IPA" : "kirkumeundaː" } ,
101186: { "form" : "circumeundō" , "form_IPA" : "kirkumeundoː" } ,
101187: { "form" : "circumeundī" , "form_IPA" : "kirkumeundiː" } ,
101188: { "form" : "circumeundae" , "form_IPA" : "kirkumeundaj" } ,
101189: { "form" : "circumeunda" , "form_IPA" : "kirkumeunda" } ,
101190: { "form" : "circumeundōrum" , "form_IPA" : "kirkumeundoːrum" } ,
101191: { "form" : "circumeundārum" , "form_IPA" : "kirkumeundaːrum" } ,
101192: { "form" : "circumeundōrum" , "form_IPA" : "kirkumeundoːrum" } ,
101193: { "form" : "circumeundīs" , "form_IPA" : "kirkumeundiːs" } ,
101194: { "form" : "circumeundīs" , "form_IPA" : "kirkumeundiːs" } ,
101195: { "form" : "circumeundīs" , "form_IPA" : "kirkumeundiːs" } ,
101196: { "form" : "circumeundōs" , "form_IPA" : "kirkumeundoːs" } ,
101197: { "form" : "circumeundās" , "form_IPA" : "kirkumeundaːs" } ,
101198: { "form" : "circumeunda" , "form_IPA" : "kirkumeunda" } ,
101199: { "form" : "circumeundī" , "form_IPA" : "kirkumeundiː" } ,
101200: { "form" : "circumeundae" , "form_IPA" : "kirkumeundaj" } ,
101201: { "form" : "circumeunda" , "form_IPA" : "kirkumeunda" } ,
101202: { "form" : "circumeundīs" , "form_IPA" : "kirkumeundiːs" } ,
101203: { "form" : "circumeundīs" , "form_IPA" : "kirkumeundiːs" } ,
101204: { "form" : "circumeundīs" , "form_IPA" : "kirkumeundiːs" } ,
637577: { "form" : "praetereor" , "form_IPA" : "prajtereor" } ,
637578: { "form" : "praeterīris" , "form_IPA" : "prajteriːris" } ,
637580: { "form" : "praeterīmur" , "form_IPA" : "prajteriːmur" } ,
637581: { "form" : "praeterīminī" , "form_IPA" : "prajteriːminiː" } ,
637582: { "form" : "praetereuntur" , "form_IPA" : "prajtereuntur" } ,
637583: { "form" : "praeterībar" , "form_IPA" : "prajteriːbar" } ,
637584: { "form" : "praeterībāris" , "form_IPA" : "prajteriːbaːris" } ,
637586: { "form" : "praeterībāmur" , "form_IPA" : "prajteriːbaːmur" } ,
637587: { "form" : "praeterībāminī" , "form_IPA" : "prajteriːbaːminiː" } ,
637588: { "form" : "praeterībantur" , "form_IPA" : "prajteriːbantur" } ,
637589: { "form" : "praeterībor" , "form_IPA" : "prajteriːbor" } ,
637590: { "form" : "praeterīberis" , "form_IPA" : "prajteriːberis" } ,
637592: { "form" : "praeterībimur" , "form_IPA" : "prajteriːbimur" } ,
637593: { "form" : "praeterībiminī" , "form_IPA" : "prajteriːbiminiː" } ,
637594: { "form" : "praeterībuntur" , "form_IPA" : "prajteriːbuntur" } ,
637595: { "form" : "praeterear" , "form_IPA" : "prajterear" } ,
637596: { "form" : "praetereāris" , "form_IPA" : "prajtereaːris" } ,
637598: { "form" : "praetereāmur" , "form_IPA" : "prajtereaːmur" } ,
637599: { "form" : "praetereāminī" , "form_IPA" : "prajtereaːminiː" } ,
637600: { "form" : "praetereantur" , "form_IPA" : "prajtereantur" } ,
637601: { "form" : "praeterīrer" , "form_IPA" : "prajteriːrer" } ,
637602: { "form" : "praeterīrēris" , "form_IPA" : "prajteriːreːris" } ,
637604: { "form" : "praeterīrēmur" , "form_IPA" : "prajteriːreːmur" } ,
637605: { "form" : "praeterīrēminī" , "form_IPA" : "prajteriːreːminiː" } ,
637606: { "form" : "praeterīrentur" , "form_IPA" : "prajteriːrentur" } ,
637607: { "form" : "praeterīre" , "form_IPA" : "prajteriːre" } ,
637608: { "form" : "praeterīminī" , "form_IPA" : "prajteriːminiː" } ,
637609: { "form" : "praeterītor" , "form_IPA" : "prajteriːtor" } ,
637610: { "form" : "praeterītor" , "form_IPA" : "prajteriːtor" } ,
637611: { "form" : "praetereuntor" , "form_IPA" : "prajtereuntor" } ,
637617: { "form" : "praetereundus" , "form_IPA" : "prajtereundus" } ,
637618: { "form" : "praetereunda" , "form_IPA" : "prajtereunda" } ,
637619: { "form" : "praetereundum" , "form_IPA" : "prajtereundum" } ,
637620: { "form" : "praetereundī" , "form_IPA" : "prajtereundiː" } ,
637621: { "form" : "praetereundae" , "form_IPA" : "prajtereundaj" } ,
637622: { "form" : "praetereundī" , "form_IPA" : "prajtereundiː" } ,
637623: { "form" : "praetereundō" , "form_IPA" : "prajtereundoː" } ,
637624: { "form" : "praetereundae" , "form_IPA" : "prajtereundaj" } ,
637625: { "form" : "praetereundō" , "form_IPA" : "prajtereundoː" } ,
637626: { "form" : "praetereundum" , "form_IPA" : "prajtereundum" } ,
637627: { "form" : "praetereundam" , "form_IPA" : "prajtereundam" } ,
637629: { "form" : "praetereunde" , "form_IPA" : "prajtereunde" } ,
637630: { "form" : "praetereunda" , "form_IPA" : "prajtereunda" } ,
637631: { "form" : "praetereundum" , "form_IPA" : "prajtereundum" } ,
637632: { "form" : "praetereundō" , "form_IPA" : "prajtereundoː" } ,
637633: { "form" : "praetereundā" , "form_IPA" : "prajtereundaː" } ,
637634: { "form" : "praetereundō" , "form_IPA" : "prajtereundoː" } ,
637635: { "form" : "praetereundī" , "form_IPA" : "prajtereundiː" } ,
637636: { "form" : "praetereundae" , "form_IPA" : "prajtereundaj" } ,
637637: { "form" : "praetereunda" , "form_IPA" : "prajtereunda" } ,
637638: { "form" : "praetereundōrum" , "form_IPA" : "prajtereundoːrum" } ,
637639: { "form" : "praetereundārum" , "form_IPA" : "prajtereundaːrum" } ,
637640: { "form" : "praetereundōrum" , "form_IPA" : "prajtereundoːrum" } ,
637641: { "form" : "praetereundīs" , "form_IPA" : "prajtereundiːs" } ,
637642: { "form" : "praetereundīs" , "form_IPA" : "prajtereundiːs" } ,
637643: { "form" : "praetereundīs" , "form_IPA" : "prajtereundiːs" } ,
637644: { "form" : "praetereundōs" , "form_IPA" : "prajtereundoːs" } ,
637645: { "form" : "praetereundās" , "form_IPA" : "prajtereundaːs" } ,
637646: { "form" : "praetereunda" , "form_IPA" : "prajtereunda" } ,
637647: { "form" : "praetereundī" , "form_IPA" : "prajtereundiː" } ,
637648: { "form" : "praetereundae" , "form_IPA" : "prajtereundaj" } ,
637649: { "form" : "praetereunda" , "form_IPA" : "prajtereunda" } ,
637650: { "form" : "praetereundīs" , "form_IPA" : "prajtereundiːs" } ,
637651: { "form" : "praetereundīs" , "form_IPA" : "prajtereundiːs" } ,
637652: { "form" : "praetereundīs" , "form_IPA" : "prajtereundiːs" } ,
672375: { "form" : "queor" , "form_IPA" : "kweor" } ,
672376: { "form" : "quīris" , "form_IPA" : "kwiːris" } ,
672378: { "form" : "quīmur" , "form_IPA" : "kwiːmur" } ,
672379: { "form" : "quīminī" , "form_IPA" : "kwiːminiː" } ,
672380: { "form" : "queuntur" , "form_IPA" : "kweuntur" } ,
672381: { "form" : "quībar" , "form_IPA" : "kwiːbar" } ,
672382: { "form" : "quībāris" , "form_IPA" : "kwiːbaːris" } ,
672384: { "form" : "quībāmur" , "form_IPA" : "kwiːbaːmur" } ,
672385: { "form" : "quībāminī" , "form_IPA" : "kwiːbaːminiː" } ,
672386: { "form" : "quībantur" , "form_IPA" : "kwiːbantur" } ,
672387: { "form" : "quībor" , "form_IPA" : "kwiːbor" } ,
672388: { "form" : "quīberis" , "form_IPA" : "kwiːberis" } ,
672390: { "form" : "quībimur" , "form_IPA" : "kwiːbimur" } ,
672391: { "form" : "quībiminī" , "form_IPA" : "kwiːbiminiː" } ,
672392: { "form" : "quībuntur" , "form_IPA" : "kwiːbuntur" } ,
672393: { "form" : "quear" , "form_IPA" : "kwear" } ,
672394: { "form" : "queāris" , "form_IPA" : "kweaːris" } ,
672396: { "form" : "queāmur" , "form_IPA" : "kweaːmur" } ,
672397: { "form" : "queāminī" , "form_IPA" : "kweaːminiː" } ,
672398: { "form" : "queantur" , "form_IPA" : "kweantur" } ,
672399: { "form" : "quīrer" , "form_IPA" : "kwiːrer" } ,
672400: { "form" : "quīrēris" , "form_IPA" : "kwiːreːris" } ,
672402: { "form" : "quīrēmur" , "form_IPA" : "kwiːreːmur" } ,
672403: { "form" : "quīrēminī" , "form_IPA" : "kwiːreːminiː" } ,
672404: { "form" : "quīrentur" , "form_IPA" : "kwiːrentur" } ,
672405: { "form" : "quīre" , "form_IPA" : "kwiːre" } ,
672406: { "form" : "quīminī" , "form_IPA" : "kwiːminiː" } ,
672407: { "form" : "quītor" , "form_IPA" : "kwiːtor" } ,
672408: { "form" : "quītor" , "form_IPA" : "kwiːtor" } ,
672409: { "form" : "queuntor" , "form_IPA" : "kweuntor" } ,
672415: { "form" : "queundus" , "form_IPA" : "kweundus" } ,
672416: { "form" : "queunda" , "form_IPA" : "kweunda" } ,
672417: { "form" : "queundum" , "form_IPA" : "kweundum" } ,
672418: { "form" : "queundī" , "form_IPA" : "kweundiː" } ,
672419: { "form" : "queundae" , "form_IPA" : "kweundaj" } ,
672420: { "form" : "queundī" , "form_IPA" : "kweundiː" } ,
672421: { "form" : "queundō" , "form_IPA" : "kweundoː" } ,
672422: { "form" : "queundae" , "form_IPA" : "kweundaj" } ,
672423: { "form" : "queundō" , "form_IPA" : "kweundoː" } ,
672424: { "form" : "queundum" , "form_IPA" : "kweundum" } ,
672425: { "form" : "queundam" , "form_IPA" : "kweundam" } ,
672427: { "form" : "queunde" , "form_IPA" : "kweunde" } ,
672428: { "form" : "queunda" , "form_IPA" : "kweunda" } ,
672429: { "form" : "queundum" , "form_IPA" : "kweundum" } ,
672430: { "form" : "queundō" , "form_IPA" : "kweundoː" } ,
672431: { "form" : "queundā" , "form_IPA" : "kweundaː" } ,
672432: { "form" : "queundō" , "form_IPA" : "kweundoː" } ,
672433: { "form" : "queundī" , "form_IPA" : "kweundiː" } ,
672434: { "form" : "queundae" , "form_IPA" : "kweundaj" } ,
672435: { "form" : "queunda" , "form_IPA" : "kweunda" } ,
672436: { "form" : "queundōrum" , "form_IPA" : "kweundoːrum" } ,
672437: { "form" : "queundārum" , "form_IPA" : "kweundaːrum" } ,
672438: { "form" : "queundōrum" , "form_IPA" : "kweundoːrum" } ,
672439: { "form" : "queundīs" , "form_IPA" : "kweundiːs" } ,
672440: { "form" : "queundīs" , "form_IPA" : "kweundiːs" } ,
672441: { "form" : "queundīs" , "form_IPA" : "kweundiːs" } ,
672442: { "form" : "queundōs" , "form_IPA" : "kweundoːs" } ,
672443: { "form" : "queundās" , "form_IPA" : "kweundaːs" } ,
672444: { "form" : "queunda" , "form_IPA" : "kweunda" } ,
672445: { "form" : "queundī" , "form_IPA" : "kweundiː" } ,
672446: { "form" : "queundae" , "form_IPA" : "kweundaj" } ,
672447: { "form" : "queunda" , "form_IPA" : "kweunda" } ,
672448: { "form" : "queundīs" , "form_IPA" : "kweundiːs" } ,
672449: { "form" : "queundīs" , "form_IPA" : "kweundiːs" } ,
672450: { "form" : "queundīs" , "form_IPA" : "kweundiːs" } ,
768895: { "form" : "subeor" , "form_IPA" : "subeor" } ,
768896: { "form" : "subīris" , "form_IPA" : "subiːris" } ,
768898: { "form" : "subīmur" , "form_IPA" : "subiːmur" } ,
768899: { "form" : "subīminī" , "form_IPA" : "subiːminiː" } ,
768900: { "form" : "subeuntur" , "form_IPA" : "subeuntur" } ,
768901: { "form" : "subībar" , "form_IPA" : "subiːbar" } ,
768902: { "form" : "subībāris" , "form_IPA" : "subiːbaːris" } ,
768904: { "form" : "subībāmur" , "form_IPA" : "subiːbaːmur" } ,
768905: { "form" : "subībāminī" , "form_IPA" : "subiːbaːminiː" } ,
768906: { "form" : "subībantur" , "form_IPA" : "subiːbantur" } ,
768907: { "form" : "subībor" , "form_IPA" : "subiːbor" } ,
768908: { "form" : "subīberis" , "form_IPA" : "subiːberis" } ,
768910: { "form" : "subībimur" , "form_IPA" : "subiːbimur" } ,
768911: { "form" : "subībiminī" , "form_IPA" : "subiːbiminiː" } ,
768912: { "form" : "subībuntur" , "form_IPA" : "subiːbuntur" } ,
768913: { "form" : "subear" , "form_IPA" : "subear" } ,
768914: { "form" : "subeāris" , "form_IPA" : "subeaːris" } ,
768916: { "form" : "subeāmur" , "form_IPA" : "subeaːmur" } ,
768917: { "form" : "subeāminī" , "form_IPA" : "subeaːminiː" } ,
768918: { "form" : "subeantur" , "form_IPA" : "subeantur" } ,
768919: { "form" : "subīrer" , "form_IPA" : "subiːrer" } ,
768920: { "form" : "subīrēris" , "form_IPA" : "subiːreːris" } ,
768922: { "form" : "subīrēmur" , "form_IPA" : "subiːreːmur" } ,
768923: { "form" : "subīrēminī" , "form_IPA" : "subiːreːminiː" } ,
768924: { "form" : "subīrentur" , "form_IPA" : "subiːrentur" } ,
768925: { "form" : "subīre" , "form_IPA" : "subiːre" } ,
768926: { "form" : "subīminī" , "form_IPA" : "subiːminiː" } ,
768927: { "form" : "subītor" , "form_IPA" : "subiːtor" } ,
768928: { "form" : "subītor" , "form_IPA" : "subiːtor" } ,
768929: { "form" : "subeuntor" , "form_IPA" : "subeuntor" } ,
768935: { "form" : "subeundus" , "form_IPA" : "subeundus" } ,
768936: { "form" : "subeunda" , "form_IPA" : "subeunda" } ,
768937: { "form" : "subeundum" , "form_IPA" : "subeundum" } ,
768938: { "form" : "subeundī" , "form_IPA" : "subeundiː" } ,
768939: { "form" : "subeundae" , "form_IPA" : "subeundaj" } ,
768940: { "form" : "subeundī" , "form_IPA" : "subeundiː" } ,
768941: { "form" : "subeundō" , "form_IPA" : "subeundoː" } ,
768942: { "form" : "subeundae" , "form_IPA" : "subeundaj" } ,
768943: { "form" : "subeundō" , "form_IPA" : "subeundoː" } ,
768944: { "form" : "subeundum" , "form_IPA" : "subeundum" } ,
768945: { "form" : "subeundam" , "form_IPA" : "subeundam" } ,
768947: { "form" : "subeunde" , "form_IPA" : "subeunde" } ,
768948: { "form" : "subeunda" , "form_IPA" : "subeunda" } ,
768949: { "form" : "subeundum" , "form_IPA" : "subeundum" } ,
768950: { "form" : "subeundō" , "form_IPA" : "subeundoː" } ,
768951: { "form" : "subeundā" , "form_IPA" : "subeundaː" } ,
768952: { "form" : "subeundō" , "form_IPA" : "subeundoː" } ,
768953: { "form" : "subeundī" , "form_IPA" : "subeundiː" } ,
768954: { "form" : "subeundae" , "form_IPA" : "subeundaj" } ,
768955: { "form" : "subeunda" , "form_IPA" : "subeunda" } ,
768956: { "form" : "subeundōrum" , "form_IPA" : "subeundoːrum" } ,
768957: { "form" : "subeundārum" , "form_IPA" : "subeundaːrum" } ,
768958: { "form" : "subeundōrum" , "form_IPA" : "subeundoːrum" } ,
768959: { "form" : "subeundīs" , "form_IPA" : "subeundiːs" } ,
768960: { "form" : "subeundīs" , "form_IPA" : "subeundiːs" } ,
768961: { "form" : "subeundīs" , "form_IPA" : "subeundiːs" } ,
768962: { "form" : "subeundōs" , "form_IPA" : "subeundoːs" } ,
768963: { "form" : "subeundās" , "form_IPA" : "subeundaːs" } ,
768964: { "form" : "subeunda" , "form_IPA" : "subeunda" } ,
768965: { "form" : "subeundī" , "form_IPA" : "subeundiː" } ,
768966: { "form" : "subeundae" , "form_IPA" : "subeundaj" } ,
768967: { "form" : "subeunda" , "form_IPA" : "subeunda" } ,
768968: { "form" : "subeundīs" , "form_IPA" : "subeundiːs" } ,
768969: { "form" : "subeundīs" , "form_IPA" : "subeundiːs" } ,
768970: { "form" : "subeundīs" , "form_IPA" : "subeundiːs" } ,
814107: { "form" : "trānseor" , "form_IPA" : "traːnseor" } ,
814108: { "form" : "trānsīris" , "form_IPA" : "traːnsiːris" } ,
814110: { "form" : "trānsīmur" , "form_IPA" : "traːnsiːmur" } ,
814111: { "form" : "trānsīminī" , "form_IPA" : "traːnsiːminiː" } ,
814112: { "form" : "trānseuntur" , "form_IPA" : "traːnseuntur" } ,
814113: { "form" : "trānsībar" , "form_IPA" : "traːnsiːbar" } ,
814114: { "form" : "trānsībāris" , "form_IPA" : "traːnsiːbaːris" } ,
814116: { "form" : "trānsībāmur" , "form_IPA" : "traːnsiːbaːmur" } ,
814117: { "form" : "trānsībāminī" , "form_IPA" : "traːnsiːbaːminiː" } ,
814118: { "form" : "trānsībantur" , "form_IPA" : "traːnsiːbantur" } ,
814119: { "form" : "trānsībor" , "form_IPA" : "traːnsiːbor" } ,
814120: { "form" : "trānsīberis" , "form_IPA" : "traːnsiːberis" } ,
814122: { "form" : "trānsībimur" , "form_IPA" : "traːnsiːbimur" } ,
814123: { "form" : "trānsībiminī" , "form_IPA" : "traːnsiːbiminiː" } ,
814124: { "form" : "trānsībuntur" , "form_IPA" : "traːnsiːbuntur" } ,
814125: { "form" : "trānsear" , "form_IPA" : "traːnsear" } ,
814126: { "form" : "trānseāris" , "form_IPA" : "traːnseaːris" } ,
814128: { "form" : "trānseāmur" , "form_IPA" : "traːnseaːmur" } ,
814129: { "form" : "trānseāminī" , "form_IPA" : "traːnseaːminiː" } ,
814130: { "form" : "trānseantur" , "form_IPA" : "traːnseantur" } ,
814131: { "form" : "trānsīrer" , "form_IPA" : "traːnsiːrer" } ,
814132: { "form" : "trānsīrēris" , "form_IPA" : "traːnsiːreːris" } ,
814134: { "form" : "trānsīrēmur" , "form_IPA" : "traːnsiːreːmur" } ,
814135: { "form" : "trānsīrēminī" , "form_IPA" : "traːnsiːreːminiː" } ,
814136: { "form" : "trānsīrentur" , "form_IPA" : "traːnsiːrentur" } ,
814137: { "form" : "trānsīre" , "form_IPA" : "traːnsiːre" } ,
814138: { "form" : "trānsīminī" , "form_IPA" : "traːnsiːminiː" } ,
814139: { "form" : "trānsītor" , "form_IPA" : "traːnsiːtor" } ,
814140: { "form" : "trānsītor" , "form_IPA" : "traːnsiːtor" } ,
814141: { "form" : "trānseuntor" , "form_IPA" : "traːnseuntor" } ,
814147: { "form" : "trānseundus" , "form_IPA" : "traːnseundus" } ,
814148: { "form" : "trānseunda" , "form_IPA" : "traːnseunda" } ,
814149: { "form" : "trānseundum" , "form_IPA" : "traːnseundum" } ,
814150: { "form" : "trānseundī" , "form_IPA" : "traːnseundiː" } ,
814151: { "form" : "trānseundae" , "form_IPA" : "traːnseundaj" } ,
814152: { "form" : "trānseundī" , "form_IPA" : "traːnseundiː" } ,
814153: { "form" : "trānseundō" , "form_IPA" : "traːnseundoː" } ,
814154: { "form" : "trānseundae" , "form_IPA" : "traːnseundaj" } ,
814155: { "form" : "trānseundō" , "form_IPA" : "traːnseundoː" } ,
814156: { "form" : "trānseundum" , "form_IPA" : "traːnseundum" } ,
814157: { "form" : "trānseundam" , "form_IPA" : "traːnseundam" } ,
814159: { "form" : "trānseunde" , "form_IPA" : "traːnseunde" } ,
814160: { "form" : "trānseunda" , "form_IPA" : "traːnseunda" } ,
814161: { "form" : "trānseundum" , "form_IPA" : "traːnseundum" } ,
814162: { "form" : "trānseundō" , "form_IPA" : "traːnseundoː" } ,
814163: { "form" : "trānseundā" , "form_IPA" : "traːnseundaː" } ,
814164: { "form" : "trānseundō" , "form_IPA" : "traːnseundoː" } ,
814165: { "form" : "trānseundī" , "form_IPA" : "traːnseundiː" } ,
814166: { "form" : "trānseundae" , "form_IPA" : "traːnseundaj" } ,
814167: { "form" : "trānseunda" , "form_IPA" : "traːnseunda" } ,
814168: { "form" : "trānseundōrum" , "form_IPA" : "traːnseundoːrum" } ,
814169: { "form" : "trānseundārum" , "form_IPA" : "traːnseundaːrum" } ,
814170: { "form" : "trānseundōrum" , "form_IPA" : "traːnseundoːrum" } ,
814171: { "form" : "trānseundīs" , "form_IPA" : "traːnseundiːs" } ,
814172: { "form" : "trānseundīs" , "form_IPA" : "traːnseundiːs" } ,
814173: { "form" : "trānseundīs" , "form_IPA" : "traːnseundiːs" } ,
814174: { "form" : "trānseundōs" , "form_IPA" : "traːnseundoːs" } ,
814175: { "form" : "trānseundās" , "form_IPA" : "traːnseundaːs" } ,
814176: { "form" : "trānseunda" , "form_IPA" : "traːnseunda" } ,
814177: { "form" : "trānseundī" , "form_IPA" : "traːnseundiː" } ,
814178: { "form" : "trānseundae" , "form_IPA" : "traːnseundaj" } ,
814179: { "form" : "trānseunda" , "form_IPA" : "traːnseunda" } ,
814180: { "form" : "trānseundīs" , "form_IPA" : "traːnseundiːs" } ,
814181: { "form" : "trānseundīs" , "form_IPA" : "traːnseundiːs" } ,
814182: { "form" : "trānseundīs" , "form_IPA" : "traːnseundiːs" } ,
833157: { "form" : "uēneor" , "form_IPA" : "weːneor" } ,
833158: { "form" : "uēnīris" , "form_IPA" : "weːniːris" } ,
833160: { "form" : "uēnīmur" , "form_IPA" : "weːniːmur" } ,
833161: { "form" : "uēnīminī" , "form_IPA" : "weːniːminiː" } ,
833162: { "form" : "uēneuntur" , "form_IPA" : "weːneuntur" } ,
833163: { "form" : "uēnībar" , "form_IPA" : "weːniːbar" } ,
833164: { "form" : "uēnībāris" , "form_IPA" : "weːniːbaːris" } ,
833166: { "form" : "uēnībāmur" , "form_IPA" : "weːniːbaːmur" } ,
833167: { "form" : "uēnībāminī" , "form_IPA" : "weːniːbaːminiː" } ,
833168: { "form" : "uēnībantur" , "form_IPA" : "weːniːbantur" } ,
833169: { "form" : "uēnībor" , "form_IPA" : "weːniːbor" } ,
833170: { "form" : "uēnīberis" , "form_IPA" : "weːniːberis" } ,
833172: { "form" : "uēnībimur" , "form_IPA" : "weːniːbimur" } ,
833173: { "form" : "uēnībiminī" , "form_IPA" : "weːniːbiminiː" } ,
833174: { "form" : "uēnībuntur" , "form_IPA" : "weːniːbuntur" } ,
833175: { "form" : "uēnear" , "form_IPA" : "weːnear" } ,
833176: { "form" : "uēneāris" , "form_IPA" : "weːneaːris" } ,
833178: { "form" : "uēneāmur" , "form_IPA" : "weːneaːmur" } ,
833179: { "form" : "uēneāminī" , "form_IPA" : "weːneaːminiː" } ,
833180: { "form" : "uēneantur" , "form_IPA" : "weːneantur" } ,
833181: { "form" : "uēnīrer" , "form_IPA" : "weːniːrer" } ,
833182: { "form" : "uēnīrēris" , "form_IPA" : "weːniːreːris" } ,
833184: { "form" : "uēnīrēmur" , "form_IPA" : "weːniːreːmur" } ,
833185: { "form" : "uēnīrēminī" , "form_IPA" : "weːniːreːminiː" } ,
833186: { "form" : "uēnīrentur" , "form_IPA" : "weːniːrentur" } ,
833187: { "form" : "uēnīre" , "form_IPA" : "weːniːre" } ,
833188: { "form" : "uēnīminī" , "form_IPA" : "weːniːminiː" } ,
833189: { "form" : "uēnītor" , "form_IPA" : "weːniːtor" } ,
833190: { "form" : "uēnītor" , "form_IPA" : "weːniːtor" } ,
833191: { "form" : "uēneuntor" , "form_IPA" : "weːneuntor" } ,
833197: { "form" : "uēneundus" , "form_IPA" : "weːneundus" } ,
833198: { "form" : "uēneunda" , "form_IPA" : "weːneunda" } ,
833199: { "form" : "uēneundum" , "form_IPA" : "weːneundum" } ,
833200: { "form" : "uēneundī" , "form_IPA" : "weːneundiː" } ,
833201: { "form" : "uēneundae" , "form_IPA" : "weːneundaj" } ,
833202: { "form" : "uēneundī" , "form_IPA" : "weːneundiː" } ,
833203: { "form" : "uēneundō" , "form_IPA" : "weːneundoː" } ,
833204: { "form" : "uēneundae" , "form_IPA" : "weːneundaj" } ,
833205: { "form" : "uēneundō" , "form_IPA" : "weːneundoː" } ,
833206: { "form" : "uēneundum" , "form_IPA" : "weːneundum" } ,
833207: { "form" : "uēneundam" , "form_IPA" : "weːneundam" } ,
833209: { "form" : "uēneunde" , "form_IPA" : "weːneunde" } ,
833210: { "form" : "uēneunda" , "form_IPA" : "weːneunda" } ,
833211: { "form" : "uēneundum" , "form_IPA" : "weːneundum" } ,
833212: { "form" : "uēneundō" , "form_IPA" : "weːneundoː" } ,
833213: { "form" : "uēneundā" , "form_IPA" : "weːneundaː" } ,
833214: { "form" : "uēneundō" , "form_IPA" : "weːneundoː" } ,
833215: { "form" : "uēneundī" , "form_IPA" : "weːneundiː" } ,
833216: { "form" : "uēneundae" , "form_IPA" : "weːneundaj" } ,
833217: { "form" : "uēneunda" , "form_IPA" : "weːneunda" } ,
833218: { "form" : "uēneundōrum" , "form_IPA" : "weːneundoːrum" } ,
833219: { "form" : "uēneundārum" , "form_IPA" : "weːneundaːrum" } ,
833220: { "form" : "uēneundōrum" , "form_IPA" : "weːneundoːrum" } ,
833221: { "form" : "uēneundīs" , "form_IPA" : "weːneundiːs" } ,
833222: { "form" : "uēneundīs" , "form_IPA" : "weːneundiːs" } ,
833223: { "form" : "uēneundīs" , "form_IPA" : "weːneundiːs" } ,
833224: { "form" : "uēneundōs" , "form_IPA" : "weːneundoːs" } ,
833225: { "form" : "uēneundās" , "form_IPA" : "weːneundaːs" } ,
833226: { "form" : "uēneunda" , "form_IPA" : "weːneunda" } ,
833227: { "form" : "uēneundī" , "form_IPA" : "weːneundiː" } ,
833228: { "form" : "uēneundae" , "form_IPA" : "weːneundaj" } ,
833229: { "form" : "uēneunda" , "form_IPA" : "weːneunda" } ,
833230: { "form" : "uēneundīs" , "form_IPA" : "weːneundiːs" } ,
833231: { "form" : "uēneundīs" , "form_IPA" : "weːneundiːs" } ,
833232: { "form" : "uēneundīs" , "form_IPA" : "weːneundiːs" }
}

In [134]:
for row in changes_verbs:
    #print(row)
    for column in changes_verbs[row]:
        #print(column)
        #print(changes[row][column])
        LatInfLexi_verbs.loc[row,column] = changes_verbs[row][column]

In [135]:
changes_nouns = {
# removing capital letters introduced by excel for "falso" and "uero"
3638: { "form" : "falsō"},
3641: { "form" : "falsō"},
11870: { "form" : "uērō"},
11873: { "form" : "uērō"}
}

In [136]:
for row in changes_nouns:
    #print(row)
    for column in changes_nouns[row]:
        #print(column)
        #print(changes[row][column])
        LatInfLexi_nouns.loc[row,column] = changes_nouns[row][column]

# Distinguishing defective cells from missing data

In [137]:
defective_verbs = pd.read_csv("./etc/verbs_defectiveness_mapping_manual.csv", sep="\t", index_col="lexeme")
defective_nouns = pd.read_csv("./etc/nouns_defectiveness_mapping_manual.csv", sep="\t", index_col="lexeme")

In [138]:
for i in LatInfLexi_nouns.index:
    if LatInfLexi_nouns.loc[i,"form"] == "#DEF#" and LatInfLexi_nouns.loc[i,"form_IPA"] == "#DEF#" :
        if defective_nouns.loc[LatInfLexi_nouns.loc[i,"lexeme"],"systematicDefectiveness"] != "plurale tantum":
            LatInfLexi_nouns.loc[i,"form"] = ""
            LatInfLexi_nouns.loc[i,"form_IPA"] = ""
            #print(LatInfLexi_nouns.loc[i,"lexeme"],LatInfLexi_nouns.loc[i,"PoSTag:features"],LatInfLexi_nouns.loc[i,"form_IPA"],type(LatInfLexi_nouns.loc[i,"form_IPA"]))

In [139]:
LatInfLexi_nouns = LatInfLexi_nouns[LatInfLexi_nouns["form"] != ""]

In [140]:
never_defective_cells = ['VERB:Sup+-+-+-+Act+-+-+Acc+-', 'VERB:Sup+-+-+-+Pass+-+-+Abl+-', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Nom+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Nom+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Nom+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Gen+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Gen+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Gen+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Dat+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Dat+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Dat+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Acc+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Acc+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Acc+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Voc+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Voc+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Voc+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Abl+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Abl+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Abl+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Nom+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Nom+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Nom+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Gen+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Gen+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Gen+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Dat+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Dat+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Dat+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Acc+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Acc+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Acc+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Voc+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Voc+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Voc+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Abl+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Abl+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Abl+Neut', 'VERB:Part+-+Fut+-+Act+-+Sing+Nom+Masc', 'VERB:Part+-+Fut+-+Act+-+Sing+Nom+Fem', 'VERB:Part+-+Fut+-+Act+-+Sing+Nom+Neut', 'VERB:Part+-+Fut+-+Act+-+Sing+Gen+Masc', 'VERB:Part+-+Fut+-+Act+-+Sing+Gen+Fem', 'VERB:Part+-+Fut+-+Act+-+Sing+Gen+Neut', 'VERB:Part+-+Fut+-+Act+-+Sing+Dat+Masc', 'VERB:Part+-+Fut+-+Act+-+Sing+Dat+Fem', 'VERB:Part+-+Fut+-+Act+-+Sing+Dat+Neut', 'VERB:Part+-+Fut+-+Act+-+Sing+Acc+Masc', 'VERB:Part+-+Fut+-+Act+-+Sing+Acc+Fem', 'VERB:Part+-+Fut+-+Act+-+Sing+Acc+Neut', 'VERB:Part+-+Fut+-+Act+-+Sing+Voc+Masc', 'VERB:Part+-+Fut+-+Act+-+Sing+Voc+Fem', 'VERB:Part+-+Fut+-+Act+-+Sing+Voc+Neut', 'VERB:Part+-+Fut+-+Act+-+Sing+Abl+Masc', 'VERB:Part+-+Fut+-+Act+-+Sing+Abl+Fem', 'VERB:Part+-+Fut+-+Act+-+Sing+Abl+Neut', 'VERB:Part+-+Fut+-+Act+-+Plur+Nom+Masc', 'VERB:Part+-+Fut+-+Act+-+Plur+Nom+Fem', 'VERB:Part+-+Fut+-+Act+-+Plur+Nom+Neut', 'VERB:Part+-+Fut+-+Act+-+Plur+Gen+Masc', 'VERB:Part+-+Fut+-+Act+-+Plur+Gen+Fem', 'VERB:Part+-+Fut+-+Act+-+Plur+Gen+Neut', 'VERB:Part+-+Fut+-+Act+-+Plur+Dat+Masc', 'VERB:Part+-+Fut+-+Act+-+Plur+Dat+Fem', 'VERB:Part+-+Fut+-+Act+-+Plur+Dat+Neut', 'VERB:Part+-+Fut+-+Act+-+Plur+Acc+Masc', 'VERB:Part+-+Fut+-+Act+-+Plur+Acc+Fem', 'VERB:Part+-+Fut+-+Act+-+Plur+Acc+Neut', 'VERB:Part+-+Fut+-+Act+-+Plur+Voc+Masc', 'VERB:Part+-+Fut+-+Act+-+Plur+Voc+Fem', 'VERB:Part+-+Fut+-+Act+-+Plur+Voc+Neut', 'VERB:Part+-+Fut+-+Act+-+Plur+Abl+Masc', 'VERB:Part+-+Fut+-+Act+-+Plur+Abl+Fem', 'VERB:Part+-+Fut+-+Act+-+Plur+Abl+Neut']

In [None]:
for i in tqdm(LatInfLexi_verbs.index):
    if LatInfLexi_verbs.loc[i,"form"] == "#DEF#" and LatInfLexi_verbs.loc[i,"form_IPA"] == "#DEF#":
        if defective_verbs.loc[LatInfLexi_verbs.loc[i,"lexeme"],"systematicDefectiveness"] == "none" or LatInfLexi_verbs.loc[i,"PoSTag:features"] in never_defective_cells:
            LatInfLexi_verbs.loc[i,"form"] = ""
            LatInfLexi_verbs.loc[i,"form_IPA"] = ""
            #print(LatInfLexi_verbs.loc[i,"lexeme"],LatInfLexi_verbs.loc[i,"PoSTag:features"],LatInfLexi_verbs.loc[i,"form_IPA"],type(LatInfLexi_verbs.loc[i,"form_IPA"]))

 93%|████████████████████████████████████████████████████████████████████     | 793074/850392 [08:47<01:55, 496.91it/s]

In [None]:
LatInfLexi_verbs = LatInfLexi_verbs[LatInfLexi_verbs["form"] != ""]

# Combining forms tables

Adding POS

In [None]:
LatInfLexi_verbs["POS"] = "verb"
LatInfLexi_nouns["POS"] = "noun"

Generating form_id

In [None]:
def add_form_id(df, suffix=""):
    df.index.name = "form_id"
    df.reset_index(inplace=True)
    df["form_id"] = "form_" + df["form_id"].apply(str) + suffix

add_form_id(LatInfLexi_verbs, "_v")
add_form_id(LatInfLexi_nouns, "_n")

Concatenating forms tables:

In [None]:
LatInfLexi_forms = pd.concat([LatInfLexi_verbs, LatInfLexi_nouns])

# Converting forms to Paralex format

Obtaining columns in Paralex format:

In [None]:
col_map = {"form": "orth_form", "form_IPA": "phon_form", "PoSTag:features": "cell",
           "freqTFTL":"frequency",
           "freqAntiquitas":"frequency_Antiquitas" ,
           "freqAetasPatrum":"frequency_AetasPatrum",
            "freqMediumAeuum":"frequency_MediumAeuum",
            "freqRecentiorLatinitas":"frequency_RecentiorLatinitas"
           }
LatInfLexi_forms.rename(col_map, axis=1, inplace=True)

Mapping cells to new scheme:

In [None]:
LatInfLexi_cells_mapper = LatInfLexi_cells.set_index("LatInFlexi-cell").cell_id.to_dict()
LatInfLexi_forms.loc[:,"cell"] = LatInfLexi_forms.cell.map(LatInfLexi_cells_mapper)

In [None]:
LatInfLexi_forms.sample(5)

Setting form_id as index

In [None]:
LatInfLexi_forms = LatInfLexi_forms.set_index("form_id")

In [None]:
LatInfLexi_forms

Adding stress

In [None]:
C = r"b|d|ɡ|m|n|l|r|z|p|pʰ|f|t|tʰ|s|k|kʰ|h"
V = r"waj|[jw][aeiouy]ː|[aeiouy]ː?|[jw][aeiouy]|[aeiouy][jw]"
segmenter = f"(?:{C}|({V}))*?"


def search_vowels(word):
    segmented = regex.fullmatch(segmenter, word)
    return segmented.spans(1)

def find_latin_stress(word):

    if word == "#DEF#" or word == "":
        return word

    def stress(idxs):
        i = idxs[0]
        if word[i] in 'wj':
            i += 1
        return word[:i] + "ˈ" + word[i:]

    indexes = search_vowels(word)

    # 2 syllables or less => stress first syllable
    if len(indexes) <= 2:
        return stress(indexes[0])

    *_, antepenult, penult, ultimate = indexes

    # If the penult has a long vowel it is stressed
    if word[slice(*penult)][-1] in {"ː", "j", "w"}:
        return stress(penult)

    # Single C after the penult => short penult => stress antepenult
    if (ultimate[0] - penult[1]) < 2:
        return stress(antepenult)

    # For the rest, it depends on the consonant sequence
    c_seq = word[penult[1]:ultimate[0]]
    cl = re.compile("^[bdɡpctd]ʰ?[rl]$")

    # C seq is a liquid cluster => short penult => stress antepenult
    if cl.match(c_seq):
        return stress(antepenult)

    # other C sequence => long penult => stress penult
    return stress(penult)


LatInfLexi_forms.loc[:, "phon_form"] = LatInfLexi_forms["phon_form"].apply(find_latin_stress)

Separating sounds with spaces

In [None]:
def splitter(series, split_pattern):
    series = series.str.split(pat=split_pattern, regex=True)
    return series.apply(lambda x: " ".join([char for char in x if char]))

sounds = ['b', 'd', 'ɡ', 'm', 'n', 'l', 'r', 'z', 'p', 'pʰ', 'f', 't', 'tʰ', 's', 'k', 'kʰ', 'h', 'j', 'w', 'a', 'aː', 'e', 'eː', 'i', 'iː', 'o', 'oː', 'u', 'uː', 'ˈa', 'ˈaː', 'ˈe', 'ˈeː', 'ˈi', 'ˈiː', 'ˈo', 'ˈoː', 'ˈu', 'ˈuː', 'ˈy', 'ˈyː']
split_pattern = "(" + "|".join(sorted(sounds, key=len, reverse=True)) + ")"
LatInfLexi_forms["phon_form"] = splitter(LatInfLexi_forms["phon_form"], split_pattern)

# Adjusting the transcription

In [None]:
# Adding frequencies to the cells

In [None]:
cells_freq = LatInfLexi_forms.groupby("cell")[["frequency", 
                                              "frequency_Antiquitas",
                                              "frequency_AetasPatrum",
                                              "frequency_MediumAeuum",
                                              "frequency_RecentiorLatinitas"]].agg(sum)
cells_freq.index.name = "cell_id"

In [None]:
cells_freq

In [None]:
LatInfLexi_cells.set_index("cell_id", inplace=True)

In [None]:
LatInfLexi_cells = pd.merge(LatInfLexi_cells, cells_freq, left_index=True, right_index=True)

# Creating the lexemes table

In [None]:
LatInfLexi_lexemes = LatInfLexi_forms.groupby(["lexeme", "POS"])[["frequency", 
                                                                      "frequency_Antiquitas",
                                                                      "frequency_AetasPatrum",
                                                                      "frequency_MediumAeuum",
                                                                      "frequency_RecentiorLatinitas"]].agg(sum).reset_index("POS", drop=False)
LatInfLexi_lexemes.index.name = "lexeme_id"

In [None]:
LatInfLexi_lexemes

# Output

Writing it all to file

In [None]:
LatInfLexi_lexemes.to_csv("LatInfLexi-lexemes.csv")
LatInfLexi_forms.to_csv("LatInfLexi-forms.csv")
LatInfLexi_cells.to_csv("LatInfLexi-cells.csv")

Remove temporary files from v1.1

In [None]:
%%bash
rm LatInfLexi-nouns.csv
rm LatInfLexi-verbs.csv
