In [55]:
import pandas as pd
import numpy as np
import re
import regex

from tqdm import tqdm
tqdm.pandas()

# Reading cells 

We have manually made tables in `etc/`, we just annotate them with POS and concatenate to make a single table:

In [56]:
LatInfLexi_verbs_cells = pd.read_csv("etc/LatInfLexi-verbs_cells.csv")
LatInfLexi_verbs_cells["POS"] = "verb"
LatInfLexi_nouns_cells = pd.read_csv("etc/LatInfLexi-nouns_cells.csv")
LatInfLexi_nouns_cells["POS"] = "noun"
LatInfLexi_cells = pd.concat([LatInfLexi_verbs_cells, LatInfLexi_nouns_cells])

# Reading forms

We get the previous version's forms

In [57]:
%%bash 
git checkout v1.1 -- LatInfLexi-nouns.csv
git checkout v1.1 -- LatInfLexi-verbs.csv

We read these tables

In [58]:
LatInfLexi_verbs = pd.read_csv("LatInfLexi-verbs.csv")
LatInfLexi_nouns = pd.read_csv("LatInfLexi-nouns.csv")

# Making some corrections

In [59]:
changes = {
# vowel length in the present system of lexeme "praescisco" (and consequently IPA transcriptions) 
633730: { "form" : "praescīscō" , "form_IPA" : "prajskiːskoː" } ,
633731: { "form" : "praescīscis" , "form_IPA" : "prajskiːskis" } ,
633732: { "form" : "praescīscit" , "form_IPA" : "prajskiːskit" } ,
633733: { "form" : "praescīscimus" , "form_IPA" : "prajskiːskimus" } ,
633734: { "form" : "praescīscitis" , "form_IPA" : "prajskiːskitis" } ,
633735: { "form" : "praescīscunt" , "form_IPA" : "prajskiːskunt" } ,
633736: { "form" : "praescīscēbam" , "form_IPA" : "prajskiːskeːbam" } ,
633737: { "form" : "praescīscēbās" , "form_IPA" : "prajskiːskeːbaːs" } ,
633738: { "form" : "praescīscēbat" , "form_IPA" : "prajskiːskeːbat" } ,
633739: { "form" : "praescīscēbāmus" , "form_IPA" : "prajskiːskeːbaːmus" } ,
633740: { "form" : "praescīscēbātis" , "form_IPA" : "prajskiːskeːbaːtis" } ,
633741: { "form" : "praescīscēbant" , "form_IPA" : "prajskiːskeːbant" } ,
633742: { "form" : "praescīscam" , "form_IPA" : "prajskiːskam" } ,
633743: { "form" : "praescīscēs" , "form_IPA" : "prajskiːskeːs" } ,
633744: { "form" : "praescīscet" , "form_IPA" : "prajskiːsket" } ,
633745: { "form" : "praescīscēmus" , "form_IPA" : "prajskiːskeːmus" } ,
633746: { "form" : "praescīscētis" , "form_IPA" : "prajskiːskeːtis" } ,
633747: { "form" : "praescīscent" , "form_IPA" : "prajskiːskent" } ,
633748: { "form" : "praescīscam" , "form_IPA" : "prajskiːskam" } ,
633749: { "form" : "praescīscās" , "form_IPA" : "prajskiːskaːs" } ,
633750: { "form" : "praescīscat" , "form_IPA" : "prajskiːskat" } ,
633751: { "form" : "praescīscāmus" , "form_IPA" : "prajskiːskaːmus" } ,
633752: { "form" : "praescīscātis" , "form_IPA" : "prajskiːskaːtis" } ,
633753: { "form" : "praescīscant" , "form_IPA" : "prajskiːskant" } ,
633754: { "form" : "praescīscerem" , "form_IPA" : "prajskiːskerem" } ,
633755: { "form" : "praescīscerēs" , "form_IPA" : "prajskiːskereːs" } ,
633756: { "form" : "praescīsceret" , "form_IPA" : "prajskiːskeret" } ,
633757: { "form" : "praescīscerēmus" , "form_IPA" : "prajskiːskereːmus" } ,
633758: { "form" : "praescīscerētis" , "form_IPA" : "prajskiːskereːtis" } ,
633759: { "form" : "praescīscerent" , "form_IPA" : "prajskiːskerent" } ,
633760: { "form" : "praescīsce" , "form_IPA" : "prajskiːske" } ,
633761: { "form" : "praescīscite" , "form_IPA" : "prajskiːskite" } ,
633762: { "form" : "praescīscitō" , "form_IPA" : "prajskiːskitoː" } ,
633763: { "form" : "praescīscitō" , "form_IPA" : "prajskiːskitoː" } ,
633764: { "form" : "praescīscitōte" , "form_IPA" : "prajskiːskitoːte" } ,
633765: { "form" : "praescīscuntō" , "form_IPA" : "prajskiːskuntoː" } ,
633766: { "form" : "praescīscere" , "form_IPA" : "prajskiːskere" } ,
633767: { "form" : "praescīscor" , "form_IPA" : "prajskiːskor" } ,
633768: { "form" : "praescīsceris" , "form_IPA" : "prajskiːskeris" } ,
633769: { "form" : "praescīscitur" , "form_IPA" : "prajskiːskitur" } ,
633770: { "form" : "praescīscimur" , "form_IPA" : "prajskiːskimur" } ,
633771: { "form" : "praescīsciminī" , "form_IPA" : "prajskiːskiminiː" } ,
633772: { "form" : "praescīscuntur" , "form_IPA" : "prajskiːskuntur" } ,
633773: { "form" : "praescīscēbar" , "form_IPA" : "prajskiːskeːbar" } ,
633774: { "form" : "praescīscēbāris" , "form_IPA" : "prajskiːskeːbaːris" } ,
633775: { "form" : "praescīscēbātur" , "form_IPA" : "prajskiːskeːbaːtur" } ,
633776: { "form" : "praescīscēbāmur" , "form_IPA" : "prajskiːskeːbaːmur" } ,
633777: { "form" : "praescīscēbāminī" , "form_IPA" : "prajskiːskeːbaːminiː" } ,
633778: { "form" : "praescīscēbantur" , "form_IPA" : "prajskiːskeːbantur" } ,
633779: { "form" : "praescīscar" , "form_IPA" : "prajskiːskar" } ,
633780: { "form" : "praescīscēris" , "form_IPA" : "prajskiːskeːris" } ,
633781: { "form" : "praescīscētur" , "form_IPA" : "prajskiːskeːtur" } ,
633782: { "form" : "praescīscēmur" , "form_IPA" : "prajskiːskeːmur" } ,
633783: { "form" : "praescīscēminī" , "form_IPA" : "prajskiːskeːminiː" } ,
633784: { "form" : "praescīscentur" , "form_IPA" : "prajskiːskentur" } ,
633785: { "form" : "praescīscar" , "form_IPA" : "prajskiːskar" } ,
633786: { "form" : "praescīscāris" , "form_IPA" : "prajskiːskaːris" } ,
633787: { "form" : "praescīscātur" , "form_IPA" : "prajskiːskaːtur" } ,
633788: { "form" : "praescīscāmur" , "form_IPA" : "prajskiːskaːmur" } ,
633789: { "form" : "praescīscāminī" , "form_IPA" : "prajskiːskaːminiː" } ,
633790: { "form" : "praescīscantur" , "form_IPA" : "prajskiːskantur" } ,
633791: { "form" : "praescīscerer" , "form_IPA" : "prajskiːskerer" } ,
633792: { "form" : "praescīscerēris" , "form_IPA" : "prajskiːskereːris" } ,
633793: { "form" : "praescīscerētur" , "form_IPA" : "prajskiːskereːtur" } ,
633794: { "form" : "praescīscerēmur" , "form_IPA" : "prajskiːskereːmur" } ,
633795: { "form" : "praescīscerēminī" , "form_IPA" : "prajskiːskereːminiː" } ,
633796: { "form" : "praescīscerentur" , "form_IPA" : "prajskiːskerentur" } ,
633797: { "form" : "praescīscere" , "form_IPA" : "prajskiːskere" } ,
633798: { "form" : "praescīsciminī" , "form_IPA" : "prajskiːskiminiː" } ,
633799: { "form" : "praescīscitor" , "form_IPA" : "prajskiːskitor" } ,
633800: { "form" : "praescīscitor" , "form_IPA" : "prajskiːskitor" } ,
633801: { "form" : "praescīscuntor" , "form_IPA" : "prajskiːskuntor" } ,
633802: { "form" : "praescīscī" , "form_IPA" : "prajskiːskiː" } ,
633803: { "form" : "praescīscendī" , "form_IPA" : "prajskiːskendiː" } ,
633804: { "form" : "praescīscendō" , "form_IPA" : "prajskiːskendoː" } ,
633805: { "form" : "praescīscendum" , "form_IPA" : "prajskiːskendum" } ,
633806: { "form" : "praescīscendō" , "form_IPA" : "prajskiːskendoː" } ,
633807: { "form" : "praescīscendus" , "form_IPA" : "prajskiːskendus" } ,
633808: { "form" : "praescīscenda" , "form_IPA" : "prajskiːskenda" } ,
633809: { "form" : "praescīscendum" , "form_IPA" : "prajskiːskendum" } ,
633810: { "form" : "praescīscendī" , "form_IPA" : "prajskiːskendiː" } ,
633811: { "form" : "praescīscendae" , "form_IPA" : "prajskiːskendaj" } ,
633812: { "form" : "praescīscendī" , "form_IPA" : "prajskiːskendiː" } ,
633813: { "form" : "praescīscendō" , "form_IPA" : "prajskiːskendoː" } ,
633814: { "form" : "praescīscendae" , "form_IPA" : "prajskiːskendaj" } ,
633815: { "form" : "praescīscendō" , "form_IPA" : "prajskiːskendoː" } ,
633816: { "form" : "praescīscendum" , "form_IPA" : "prajskiːskendum" } ,
633817: { "form" : "praescīscendam" , "form_IPA" : "prajskiːskendam" } ,
633818: { "form" : "praescīscendum" , "form_IPA" : "prajskiːskendum" } ,
633819: { "form" : "praescīscende" , "form_IPA" : "prajskiːskende" } ,
633820: { "form" : "praescīscenda" , "form_IPA" : "prajskiːskenda" } ,
633821: { "form" : "praescīscendum" , "form_IPA" : "prajskiːskendum" } ,
633822: { "form" : "praescīscendō" , "form_IPA" : "prajskiːskendoː" } ,
633823: { "form" : "praescīscendā" , "form_IPA" : "prajskiːskendaː" } ,
633824: { "form" : "praescīscendō" , "form_IPA" : "prajskiːskendoː" } ,
633825: { "form" : "praescīscendī" , "form_IPA" : "prajskiːskendiː" } ,
633826: { "form" : "praescīscendae" , "form_IPA" : "prajskiːskendaj" } ,
633827: { "form" : "praescīscenda" , "form_IPA" : "prajskiːskenda" } ,
633828: { "form" : "praescīscendōrum" , "form_IPA" : "prajskiːskendoːrum" } ,
633829: { "form" : "praescīscendārum" , "form_IPA" : "prajskiːskendaːrum" } ,
633830: { "form" : "praescīscendōrum" , "form_IPA" : "prajskiːskendoːrum" } ,
633831: { "form" : "praescīscendīs" , "form_IPA" : "prajskiːskendiːs" } ,
633832: { "form" : "praescīscendīs" , "form_IPA" : "prajskiːskendiːs" } ,
633833: { "form" : "praescīscendīs" , "form_IPA" : "prajskiːskendiːs" } ,
633834: { "form" : "praescīscendōs" , "form_IPA" : "prajskiːskendoːs" } ,
633835: { "form" : "praescīscendās" , "form_IPA" : "prajskiːskendaːs" } ,
633836: { "form" : "praescīscenda" , "form_IPA" : "prajskiːskenda" } ,
633837: { "form" : "praescīscendī" , "form_IPA" : "prajskiːskendiː" } ,
633838: { "form" : "praescīscendae" , "form_IPA" : "prajskiːskendaj" } ,
633839: { "form" : "praescīscenda" , "form_IPA" : "prajskiːskenda" } ,
633840: { "form" : "praescīscendīs" , "form_IPA" : "prajskiːskendiːs" } ,
633841: { "form" : "praescīscendīs" , "form_IPA" : "prajskiːskendiːs" } ,
633842: { "form" : "praescīscendīs" , "form_IPA" : "prajskiːskendiːs" } ,
633843: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633844: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633845: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633846: { "form" : "praescīscentis" , "form_IPA" : "prajskiːskentis" } ,
633847: { "form" : "praescīscentis" , "form_IPA" : "prajskiːskentis" } ,
633848: { "form" : "praescīscentis" , "form_IPA" : "prajskiːskentis" } ,
633849: { "form" : "praescīscentī" , "form_IPA" : "prajskiːskentiː" } ,
633850: { "form" : "praescīscentī" , "form_IPA" : "prajskiːskentiː" } ,
633851: { "form" : "praescīscentī" , "form_IPA" : "prajskiːskentiː" } ,
633852: { "form" : "praescīscentem" , "form_IPA" : "prajskiːskentem" } ,
633853: { "form" : "praescīscentem" , "form_IPA" : "prajskiːskentem" } ,
633854: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633855: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633856: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633857: { "form" : "praescīscēns" , "form_IPA" : "prajskiːskeːns" } ,
633858: { "form" : "praescīscente" , "form_IPA" : "prajskiːskente" } ,
633859: { "form" : "praescīscente" , "form_IPA" : "prajskiːskente" } ,
633860: { "form" : "praescīscente" , "form_IPA" : "prajskiːskente" } ,
633861: { "form" : "praescīscentēs" , "form_IPA" : "prajskiːskenteːs" } ,
633862: { "form" : "praescīscentēs" , "form_IPA" : "prajskiːskenteːs" } ,
633863: { "form" : "praescīscentia" , "form_IPA" : "prajskiːskentia" } ,
633864: { "form" : "praescīscentium" , "form_IPA" : "prajskiːskentium" } ,
633865: { "form" : "praescīscentium" , "form_IPA" : "prajskiːskentium" } ,
633866: { "form" : "praescīscentium" , "form_IPA" : "prajskiːskentium" } ,
633867: { "form" : "praescīscentibus" , "form_IPA" : "prajskiːskentibus" } ,
633868: { "form" : "praescīscentibus" , "form_IPA" : "prajskiːskentibus" } ,
633869: { "form" : "praescīscentibus" , "form_IPA" : "prajskiːskentibus" } ,
633870: { "form" : "praescīscentēs" , "form_IPA" : "prajskiːskenteːs" } ,
633871: { "form" : "praescīscentēs" , "form_IPA" : "prajskiːskenteːs" } ,
633872: { "form" : "praescīscentia" , "form_IPA" : "prajskiːskentia" } ,
633873: { "form" : "praescīscentēs" , "form_IPA" : "prajskiːskenteːs" } ,
633874: { "form" : "praescīscentēs" , "form_IPA" : "prajskiːskenteːs" } ,
633875: { "form" : "praescīscentia" , "form_IPA" : "prajskiːskentia" } ,
633876: { "form" : "praescīscentibus" , "form_IPA" : "prajskiːskentibus" } ,
633877: { "form" : "praescīscentibus" , "form_IPA" : "prajskiːskentibus" } ,
633878: { "form" : "praescīscentibus" , "form_IPA" : "prajskiːskentibus" } ,
# "i" in prs.act.ind.2/3sg of lexeme "aio" is vowel, not glide (cf. Bennett grammar, §135)
41149: { "form_IPA" : "aiːs" } ,
41150: { "form_IPA" : "ait" } ,
# fix to switch sbjv.1pl/3pl of "aio"
41169: { "form" : "#DEF#" , "form_IPA" : "#DEF#" },
41171: { "form" : "aiant" , "form_IPA" : "ajant" },
# recovering prf.3sg of "aio"
41299: { "form" : "ait" , "form_IPA" : "ait" },
# recovering passive forms of transitive derivatives of irregular "eo" (cf. Bennett grammar, §132.1, identification of transitives based on lemlat's codles
1857: { "form" : "adeor" , "form_IPA" : "adeor" } ,
1858: { "form" : "adīris" , "form_IPA" : "adiːris" } ,
1858: { "form" : "adīmur" , "form_IPA" : "adiːmur" } ,
1858: { "form" : "adīminī" , "form_IPA" : "adiːminiː" } ,
1858: { "form" : "adeuntur" , "form_IPA" : "adeuntur" } ,
1858: { "form" : "adībar" , "form_IPA" : "adiːbar" } ,
1858: { "form" : "adībāris" , "form_IPA" : "adiːbaːris" } ,
1858: { "form" : "adībāmur" , "form_IPA" : "adiːbaːmur" } ,
1858: { "form" : "adībāminī" , "form_IPA" : "adiːbaːminiː" } ,
1859: { "form" : "adībantur" , "form_IPA" : "adiːbantur" } ,
1859: { "form" : "adībor" , "form_IPA" : "adiːbor" } ,
1859: { "form" : "adīberis" , "form_IPA" : "adiːberis" } ,
1859: { "form" : "adībimur" , "form_IPA" : "adiːbimur" } ,
1859: { "form" : "adībiminī" , "form_IPA" : "adiːbiminiː" } ,
1859: { "form" : "adībuntur" , "form_IPA" : "adiːbuntur" } ,
1859: { "form" : "adear" , "form_IPA" : "adear" } ,
1859: { "form" : "adeāris" , "form_IPA" : "adeaːris" } ,
1860: { "form" : "adeāmur" , "form_IPA" : "adeaːmur" } ,
1860: { "form" : "adeāminī" , "form_IPA" : "adeaːminiː" } ,
1860: { "form" : "adeantur" , "form_IPA" : "adeantur" } ,
1860: { "form" : "adīrer" , "form_IPA" : "adiːrer" } ,
1860: { "form" : "adīrēris" , "form_IPA" : "adiːreːris" } ,
1860: { "form" : "adīrēmur" , "form_IPA" : "adiːreːmur" } ,
1860: { "form" : "adīrēminī" , "form_IPA" : "adiːreːminiː" } ,
1860: { "form" : "adīrentur" , "form_IPA" : "adiːrentur" } ,
1860: { "form" : "adīre" , "form_IPA" : "adiːre" } ,
1861: { "form" : "adīminī" , "form_IPA" : "adiːminiː" } ,
1861: { "form" : "adītor" , "form_IPA" : "adiːtor" } ,
1861: { "form" : "adītor" , "form_IPA" : "adiːtor" } ,
1861: { "form" : "adeuntor" , "form_IPA" : "adeuntor" } ,
1861: { "form" : "adeundus" , "form_IPA" : "adeundus" } ,
1862: { "form" : "adeunda" , "form_IPA" : "adeunda" } ,
1862: { "form" : "adeundum" , "form_IPA" : "adeundum" } ,
1862: { "form" : "adeundī" , "form_IPA" : "adeundiː" } ,
1862: { "form" : "adeundae" , "form_IPA" : "adeundaj" } ,
1862: { "form" : "adeundī" , "form_IPA" : "adeundiː" } ,
1862: { "form" : "adeundō" , "form_IPA" : "adeundoː" } ,
1862: { "form" : "adeundae" , "form_IPA" : "adeundaj" } ,
1862: { "form" : "adeundō" , "form_IPA" : "adeundoː" } ,
1862: { "form" : "adeundum" , "form_IPA" : "adeundum" } ,
1862: { "form" : "adeundam" , "form_IPA" : "adeundam" } ,
1863: { "form" : "adeunde" , "form_IPA" : "adeunde" } ,
1863: { "form" : "adeunda" , "form_IPA" : "adeunda" } ,
1863: { "form" : "adeundum" , "form_IPA" : "adeundum" } ,
1863: { "form" : "adeundō" , "form_IPA" : "adeundoː" } ,
1863: { "form" : "adeundā" , "form_IPA" : "adeundaː" } ,
1863: { "form" : "adeundō" , "form_IPA" : "adeundoː" } ,
1863: { "form" : "adeundī" , "form_IPA" : "adeundiː" } ,
1863: { "form" : "adeundae" , "form_IPA" : "adeundaj" } ,
1863: { "form" : "adeunda" , "form_IPA" : "adeunda" } ,
1864: { "form" : "adeundōrum" , "form_IPA" : "adeundoːrum" } ,
1864: { "form" : "adeundārum" , "form_IPA" : "adeundaːrum" } ,
1864: { "form" : "adeundōrum" , "form_IPA" : "adeundoːrum" } ,
1864: { "form" : "adeundīs" , "form_IPA" : "adeundiːs" } ,
1864: { "form" : "adeundīs" , "form_IPA" : "adeundiːs" } ,
1864: { "form" : "adeundīs" , "form_IPA" : "adeundiːs" } ,
1864: { "form" : "adeundōs" , "form_IPA" : "adeundoːs" } ,
1864: { "form" : "adeundās" , "form_IPA" : "adeundaːs" } ,
1864: { "form" : "adeunda" , "form_IPA" : "adeunda" } ,
1864: { "form" : "adeundī" , "form_IPA" : "adeundiː" } ,
1865: { "form" : "adeundae" , "form_IPA" : "adeundaj" } ,
1865: { "form" : "adeunda" , "form_IPA" : "adeunda" } ,
1865: { "form" : "adeundīs" , "form_IPA" : "adeundiːs" } ,
1865: { "form" : "adeundīs" , "form_IPA" : "adeundiːs" } ,
1865: { "form" : "adeundīs" , "form_IPA" : "adeundiːs" } ,
5337: { "form" : "anteeor" , "form_IPA" : "anteeor" } ,
5337: { "form" : "anteīris" , "form_IPA" : "anteiːris" } ,
5338: { "form" : "anteīmur" , "form_IPA" : "anteiːmur" } ,
5338: { "form" : "anteīminī" , "form_IPA" : "anteiːminiː" } ,
5338: { "form" : "anteeuntur" , "form_IPA" : "anteeuntur" } ,
5338: { "form" : "anteībar" , "form_IPA" : "anteiːbar" } ,
5338: { "form" : "anteībāris" , "form_IPA" : "anteiːbaːris" } ,
5338: { "form" : "anteībāmur" , "form_IPA" : "anteiːbaːmur" } ,
5338: { "form" : "anteībāminī" , "form_IPA" : "anteiːbaːminiː" } ,
5338: { "form" : "anteībantur" , "form_IPA" : "anteiːbantur" } ,
5338: { "form" : "anteībor" , "form_IPA" : "anteiːbor" } ,
5339: { "form" : "anteīberis" , "form_IPA" : "anteiːberis" } ,
5339: { "form" : "anteībimur" , "form_IPA" : "anteiːbimur" } ,
5339: { "form" : "anteībiminī" , "form_IPA" : "anteiːbiminiː" } ,
5339: { "form" : "anteībuntur" , "form_IPA" : "anteiːbuntur" } ,
5339: { "form" : "anteear" , "form_IPA" : "anteear" } ,
5339: { "form" : "anteeāris" , "form_IPA" : "anteeaːris" } ,
5339: { "form" : "anteeāmur" , "form_IPA" : "anteeaːmur" } ,
5339: { "form" : "anteeāminī" , "form_IPA" : "anteeaːminiː" } ,
5340: { "form" : "anteeantur" , "form_IPA" : "anteeantur" } ,
5340: { "form" : "anteīrer" , "form_IPA" : "anteiːrer" } ,
5340: { "form" : "anteīrēris" , "form_IPA" : "anteiːreːris" } ,
5340: { "form" : "anteīrēmur" , "form_IPA" : "anteiːreːmur" } ,
5340: { "form" : "anteīrēminī" , "form_IPA" : "anteiːreːminiː" } ,
5340: { "form" : "anteīrentur" , "form_IPA" : "anteiːrentur" } ,
5340: { "form" : "anteīre" , "form_IPA" : "anteiːre" } ,
5340: { "form" : "anteīminī" , "form_IPA" : "anteiːminiː" } ,
5340: { "form" : "anteītor" , "form_IPA" : "anteiːtor" } ,
5341: { "form" : "anteītor" , "form_IPA" : "anteiːtor" } ,
5341: { "form" : "anteeuntor" , "form_IPA" : "anteeuntor" } ,
5341: { "form" : "anteeundus" , "form_IPA" : "anteeundus" } ,
5341: { "form" : "anteeunda" , "form_IPA" : "anteeunda" } ,
5341: { "form" : "anteeundum" , "form_IPA" : "anteeundum" } ,
5342: { "form" : "anteeundī" , "form_IPA" : "anteeundiː" } ,
5342: { "form" : "anteeundae" , "form_IPA" : "anteeundaj" } ,
5342: { "form" : "anteeundī" , "form_IPA" : "anteeundiː" } ,
5342: { "form" : "anteeundō" , "form_IPA" : "anteeundoː" } ,
5342: { "form" : "anteeundae" , "form_IPA" : "anteeundaj" } ,
5342: { "form" : "anteeundō" , "form_IPA" : "anteeundoː" } ,
5342: { "form" : "anteeundum" , "form_IPA" : "anteeundum" } ,
5342: { "form" : "anteeundam" , "form_IPA" : "anteeundam" } ,
5342: { "form" : "anteeunde" , "form_IPA" : "anteeunde" } ,
5343: { "form" : "anteeunda" , "form_IPA" : "anteeunda" } ,
5343: { "form" : "anteeundum" , "form_IPA" : "anteeundum" } ,
5343: { "form" : "anteeundō" , "form_IPA" : "anteeundoː" } ,
5343: { "form" : "anteeundā" , "form_IPA" : "anteeundaː" } ,
5343: { "form" : "anteeundō" , "form_IPA" : "anteeundoː" } ,
5343: { "form" : "anteeundī" , "form_IPA" : "anteeundiː" } ,
5343: { "form" : "anteeundae" , "form_IPA" : "anteeundaj" } ,
5343: { "form" : "anteeunda" , "form_IPA" : "anteeunda" } ,
5343: { "form" : "anteeundōrum" , "form_IPA" : "anteeundoːrum" } ,
5343: { "form" : "anteeundārum" , "form_IPA" : "anteeundaːrum" } ,
5344: { "form" : "anteeundōrum" , "form_IPA" : "anteeundoːrum" } ,
5344: { "form" : "anteeundīs" , "form_IPA" : "anteeundiːs" } ,
5344: { "form" : "anteeundīs" , "form_IPA" : "anteeundiːs" } ,
5344: { "form" : "anteeundīs" , "form_IPA" : "anteeundiːs" } ,
5344: { "form" : "anteeundōs" , "form_IPA" : "anteeundoːs" } ,
5344: { "form" : "anteeundās" , "form_IPA" : "anteeundaːs" } ,
5344: { "form" : "anteeunda" , "form_IPA" : "anteeunda" } ,
5344: { "form" : "anteeundī" , "form_IPA" : "anteeundiː" } ,
5344: { "form" : "anteeundae" , "form_IPA" : "anteeundaj" } ,
5344: { "form" : "anteeunda" , "form_IPA" : "anteeunda" } ,
5345: { "form" : "anteeundīs" , "form_IPA" : "anteeundiːs" } ,
5345: { "form" : "anteeundīs" , "form_IPA" : "anteeundiːs" } ,
5345: { "form" : "anteeundīs" , "form_IPA" : "anteeundiːs" } ,
10112: { "form" : "circumeor" , "form_IPA" : "kirkumeor" } ,
10113: { "form" : "circumīris" , "form_IPA" : "kirkumiːris" } ,
10113: { "form" : "circumīmur" , "form_IPA" : "kirkumiːmur" } ,
10113: { "form" : "circumīminī" , "form_IPA" : "kirkumiːminiː" } ,
10113: { "form" : "circumeuntur" , "form_IPA" : "kirkumeuntur" } ,
10113: { "form" : "circumībar" , "form_IPA" : "kirkumiːbar" } ,
10113: { "form" : "circumībāris" , "form_IPA" : "kirkumiːbaːris" } ,
10113: { "form" : "circumībāmur" , "form_IPA" : "kirkumiːbaːmur" } ,
10113: { "form" : "circumībāminī" , "form_IPA" : "kirkumiːbaːminiː" } ,
10114: { "form" : "circumībantur" , "form_IPA" : "kirkumiːbantur" } ,
10114: { "form" : "circumībor" , "form_IPA" : "kirkumiːbor" } ,
10114: { "form" : "circumīberis" , "form_IPA" : "kirkumiːberis" } ,
10114: { "form" : "circumībimur" , "form_IPA" : "kirkumiːbimur" } ,
10114: { "form" : "circumībiminī" , "form_IPA" : "kirkumiːbiminiː" } ,
10114: { "form" : "circumībuntur" , "form_IPA" : "kirkumiːbuntur" } ,
10114: { "form" : "circumear" , "form_IPA" : "kirkumear" } ,
10114: { "form" : "circumeāris" , "form_IPA" : "kirkumeaːris" } ,
10115: { "form" : "circumeāmur" , "form_IPA" : "kirkumeaːmur" } ,
10115: { "form" : "circumeāminī" , "form_IPA" : "kirkumeaːminiː" } ,
10115: { "form" : "circumeantur" , "form_IPA" : "kirkumeantur" } ,
10115: { "form" : "circumīrer" , "form_IPA" : "kirkumiːrer" } ,
10115: { "form" : "circumīrēris" , "form_IPA" : "kirkumiːreːris" } ,
10115: { "form" : "circumīrēmur" , "form_IPA" : "kirkumiːreːmur" } ,
10115: { "form" : "circumīrēminī" , "form_IPA" : "kirkumiːreːminiː" } ,
10115: { "form" : "circumīrentur" , "form_IPA" : "kirkumiːrentur" } ,
10115: { "form" : "circumīre" , "form_IPA" : "kirkumiːre" } ,
10116: { "form" : "circumīminī" , "form_IPA" : "kirkumiːminiː" } ,
10116: { "form" : "circumītor" , "form_IPA" : "kirkumiːtor" } ,
10116: { "form" : "circumītor" , "form_IPA" : "kirkumiːtor" } ,
10116: { "form" : "circumeuntor" , "form_IPA" : "kirkumeuntor" } ,
10116: { "form" : "circumeundus" , "form_IPA" : "kirkumeundus" } ,
10117: { "form" : "circumeunda" , "form_IPA" : "kirkumeunda" } ,
10117: { "form" : "circumeundum" , "form_IPA" : "kirkumeundum" } ,
10117: { "form" : "circumeundī" , "form_IPA" : "kirkumeundiː" } ,
10117: { "form" : "circumeundae" , "form_IPA" : "kirkumeundaj" } ,
10117: { "form" : "circumeundī" , "form_IPA" : "kirkumeundiː" } ,
10117: { "form" : "circumeundō" , "form_IPA" : "kirkumeundoː" } ,
10117: { "form" : "circumeundae" , "form_IPA" : "kirkumeundaj" } ,
10117: { "form" : "circumeundō" , "form_IPA" : "kirkumeundoː" } ,
10117: { "form" : "circumeundum" , "form_IPA" : "kirkumeundum" } ,
10117: { "form" : "circumeundam" , "form_IPA" : "kirkumeundam" } ,
10118: { "form" : "circumeunde" , "form_IPA" : "kirkumeunde" } ,
10118: { "form" : "circumeunda" , "form_IPA" : "kirkumeunda" } ,
10118: { "form" : "circumeundum" , "form_IPA" : "kirkumeundum" } ,
10118: { "form" : "circumeundō" , "form_IPA" : "kirkumeundoː" } ,
10118: { "form" : "circumeundā" , "form_IPA" : "kirkumeundaː" } ,
10118: { "form" : "circumeundō" , "form_IPA" : "kirkumeundoː" } ,
10118: { "form" : "circumeundī" , "form_IPA" : "kirkumeundiː" } ,
10118: { "form" : "circumeundae" , "form_IPA" : "kirkumeundaj" } ,
10118: { "form" : "circumeunda" , "form_IPA" : "kirkumeunda" } ,
10119: { "form" : "circumeundōrum" , "form_IPA" : "kirkumeundoːrum" } ,
10119: { "form" : "circumeundārum" , "form_IPA" : "kirkumeundaːrum" } ,
10119: { "form" : "circumeundōrum" , "form_IPA" : "kirkumeundoːrum" } ,
10119: { "form" : "circumeundīs" , "form_IPA" : "kirkumeundiːs" } ,
10119: { "form" : "circumeundīs" , "form_IPA" : "kirkumeundiːs" } ,
10119: { "form" : "circumeundīs" , "form_IPA" : "kirkumeundiːs" } ,
10119: { "form" : "circumeundōs" , "form_IPA" : "kirkumeundoːs" } ,
10119: { "form" : "circumeundās" , "form_IPA" : "kirkumeundaːs" } ,
10119: { "form" : "circumeunda" , "form_IPA" : "kirkumeunda" } ,
10119: { "form" : "circumeundī" , "form_IPA" : "kirkumeundiː" } ,
10120: { "form" : "circumeundae" , "form_IPA" : "kirkumeundaj" } ,
10120: { "form" : "circumeunda" , "form_IPA" : "kirkumeunda" } ,
10120: { "form" : "circumeundīs" , "form_IPA" : "kirkumeundiːs" } ,
10120: { "form" : "circumeundīs" , "form_IPA" : "kirkumeundiːs" } ,
10120: { "form" : "circumeundīs" , "form_IPA" : "kirkumeundiːs" } ,
63757: { "form" : "prajtereor" , "form_IPA" : "prajtereor" } ,
63757: { "form" : "prajterīris" , "form_IPA" : "prajteriːris" } ,
63758: { "form" : "prajterīmur" , "form_IPA" : "prajteriːmur" } ,
63758: { "form" : "prajterīminī" , "form_IPA" : "prajteriːminiː" } ,
63758: { "form" : "prajtereuntur" , "form_IPA" : "prajtereuntur" } ,
63758: { "form" : "prajterībar" , "form_IPA" : "prajteriːbar" } ,
63758: { "form" : "prajterībāris" , "form_IPA" : "prajteriːbaːris" } ,
63758: { "form" : "prajterībāmur" , "form_IPA" : "prajteriːbaːmur" } ,
63758: { "form" : "prajterībāminī" , "form_IPA" : "prajteriːbaːminiː" } ,
63758: { "form" : "prajterībantur" , "form_IPA" : "prajteriːbantur" } ,
63758: { "form" : "prajterībor" , "form_IPA" : "prajteriːbor" } ,
63759: { "form" : "prajterīberis" , "form_IPA" : "prajteriːberis" } ,
63759: { "form" : "prajterībimur" , "form_IPA" : "prajteriːbimur" } ,
63759: { "form" : "prajterībiminī" , "form_IPA" : "prajteriːbiminiː" } ,
63759: { "form" : "prajterībuntur" , "form_IPA" : "prajteriːbuntur" } ,
63759: { "form" : "prajterear" , "form_IPA" : "prajterear" } ,
63759: { "form" : "prajtereāris" , "form_IPA" : "prajtereaːris" } ,
63759: { "form" : "prajtereāmur" , "form_IPA" : "prajtereaːmur" } ,
63759: { "form" : "prajtereāminī" , "form_IPA" : "prajtereaːminiː" } ,
63760: { "form" : "prajtereantur" , "form_IPA" : "prajtereantur" } ,
63760: { "form" : "prajterīrer" , "form_IPA" : "prajteriːrer" } ,
63760: { "form" : "prajterīrēris" , "form_IPA" : "prajteriːreːris" } ,
63760: { "form" : "prajterīrēmur" , "form_IPA" : "prajteriːreːmur" } ,
63760: { "form" : "prajterīrēminī" , "form_IPA" : "prajteriːreːminiː" } ,
63760: { "form" : "prajterīrentur" , "form_IPA" : "prajteriːrentur" } ,
63760: { "form" : "prajterīre" , "form_IPA" : "prajteriːre" } ,
63760: { "form" : "prajterīminī" , "form_IPA" : "prajteriːminiː" } ,
63760: { "form" : "prajterītor" , "form_IPA" : "prajteriːtor" } ,
63761: { "form" : "prajterītor" , "form_IPA" : "prajteriːtor" } ,
63761: { "form" : "prajtereuntor" , "form_IPA" : "prajtereuntor" } ,
63761: { "form" : "prajtereundus" , "form_IPA" : "prajtereundus" } ,
63761: { "form" : "prajtereunda" , "form_IPA" : "prajtereunda" } ,
63761: { "form" : "prajtereundum" , "form_IPA" : "prajtereundum" } ,
63762: { "form" : "prajtereundī" , "form_IPA" : "prajtereundiː" } ,
63762: { "form" : "prajtereundae" , "form_IPA" : "prajtereundaj" } ,
63762: { "form" : "prajtereundī" , "form_IPA" : "prajtereundiː" } ,
63762: { "form" : "prajtereundō" , "form_IPA" : "prajtereundoː" } ,
63762: { "form" : "prajtereundae" , "form_IPA" : "prajtereundaj" } ,
63762: { "form" : "prajtereundō" , "form_IPA" : "prajtereundoː" } ,
63762: { "form" : "prajtereundum" , "form_IPA" : "prajtereundum" } ,
63762: { "form" : "prajtereundam" , "form_IPA" : "prajtereundam" } ,
63762: { "form" : "prajtereunde" , "form_IPA" : "prajtereunde" } ,
63763: { "form" : "prajtereunda" , "form_IPA" : "prajtereunda" } ,
63763: { "form" : "prajtereundum" , "form_IPA" : "prajtereundum" } ,
63763: { "form" : "prajtereundō" , "form_IPA" : "prajtereundoː" } ,
63763: { "form" : "prajtereundā" , "form_IPA" : "prajtereundaː" } ,
63763: { "form" : "prajtereundō" , "form_IPA" : "prajtereundoː" } ,
63763: { "form" : "prajtereundī" , "form_IPA" : "prajtereundiː" } ,
63763: { "form" : "prajtereundae" , "form_IPA" : "prajtereundaj" } ,
63763: { "form" : "prajtereunda" , "form_IPA" : "prajtereunda" } ,
63763: { "form" : "prajtereundōrum" , "form_IPA" : "prajtereundoːrum" } ,
63763: { "form" : "prajtereundārum" , "form_IPA" : "prajtereundaːrum" } ,
63764: { "form" : "prajtereundōrum" , "form_IPA" : "prajtereundoːrum" } ,
63764: { "form" : "prajtereundīs" , "form_IPA" : "prajtereundiːs" } ,
63764: { "form" : "prajtereundīs" , "form_IPA" : "prajtereundiːs" } ,
63764: { "form" : "prajtereundīs" , "form_IPA" : "prajtereundiːs" } ,
63764: { "form" : "prajtereundōs" , "form_IPA" : "prajtereundoːs" } ,
63764: { "form" : "prajtereundās" , "form_IPA" : "prajtereundaːs" } ,
63764: { "form" : "prajtereunda" , "form_IPA" : "prajtereunda" } ,
63764: { "form" : "prajtereundī" , "form_IPA" : "prajtereundiː" } ,
63764: { "form" : "prajtereundae" , "form_IPA" : "prajtereundaj" } ,
63764: { "form" : "prajtereunda" , "form_IPA" : "prajtereunda" } ,
63765: { "form" : "prajtereundīs" , "form_IPA" : "prajtereundiːs" } ,
63765: { "form" : "prajtereundīs" , "form_IPA" : "prajtereundiːs" } ,
63765: { "form" : "prajtereundīs" , "form_IPA" : "prajtereundiːs" } ,
67237: { "form" : "queor" , "form_IPA" : "kweor" } ,
67237: { "form" : "quīris" , "form_IPA" : "kwiːris" } ,
67237: { "form" : "quīmur" , "form_IPA" : "kwiːmur" } ,
67237: { "form" : "quīminī" , "form_IPA" : "kwiːminiː" } ,
67238: { "form" : "queuntur" , "form_IPA" : "kweuntur" } ,
67238: { "form" : "quībar" , "form_IPA" : "kwiːbar" } ,
67238: { "form" : "quībāris" , "form_IPA" : "kwiːbaːris" } ,
67238: { "form" : "quībāmur" , "form_IPA" : "kwiːbaːmur" } ,
67238: { "form" : "quībāminī" , "form_IPA" : "kwiːbaːminiː" } ,
67238: { "form" : "quībantur" , "form_IPA" : "kwiːbantur" } ,
67238: { "form" : "quībor" , "form_IPA" : "kwiːbor" } ,
67238: { "form" : "quīberis" , "form_IPA" : "kwiːberis" } ,
67239: { "form" : "quībimur" , "form_IPA" : "kwiːbimur" } ,
67239: { "form" : "quībiminī" , "form_IPA" : "kwiːbiminiː" } ,
67239: { "form" : "quībuntur" , "form_IPA" : "kwiːbuntur" } ,
67239: { "form" : "quear" , "form_IPA" : "kwear" } ,
67239: { "form" : "queāris" , "form_IPA" : "kweaːris" } ,
67239: { "form" : "queāmur" , "form_IPA" : "kweaːmur" } ,
67239: { "form" : "queāminī" , "form_IPA" : "kweaːminiː" } ,
67239: { "form" : "queantur" , "form_IPA" : "kweantur" } ,
67239: { "form" : "quīrer" , "form_IPA" : "kwiːrer" } ,
67240: { "form" : "quīrēris" , "form_IPA" : "kwiːreːris" } ,
67240: { "form" : "quīrēmur" , "form_IPA" : "kwiːreːmur" } ,
67240: { "form" : "quīrēminī" , "form_IPA" : "kwiːreːminiː" } ,
67240: { "form" : "quīrentur" , "form_IPA" : "kwiːrentur" } ,
67240: { "form" : "quīre" , "form_IPA" : "kwiːre" } ,
67240: { "form" : "quīminī" , "form_IPA" : "kwiːminiː" } ,
67240: { "form" : "quītor" , "form_IPA" : "kwiːtor" } ,
67240: { "form" : "quītor" , "form_IPA" : "kwiːtor" } ,
67240: { "form" : "queuntor" , "form_IPA" : "kweuntor" } ,
67241: { "form" : "queundus" , "form_IPA" : "kweundus" } ,
67241: { "form" : "queunda" , "form_IPA" : "kweunda" } ,
67241: { "form" : "queundum" , "form_IPA" : "kweundum" } ,
67241: { "form" : "queundī" , "form_IPA" : "kweundiː" } ,
67241: { "form" : "queundae" , "form_IPA" : "kweundaj" } ,
67242: { "form" : "queundī" , "form_IPA" : "kweundiː" } ,
67242: { "form" : "queundō" , "form_IPA" : "kweundoː" } ,
67242: { "form" : "queundae" , "form_IPA" : "kweundaj" } ,
67242: { "form" : "queundō" , "form_IPA" : "kweundoː" } ,
67242: { "form" : "queundum" , "form_IPA" : "kweundum" } ,
67242: { "form" : "queundam" , "form_IPA" : "kweundam" } ,
67242: { "form" : "queunde" , "form_IPA" : "kweunde" } ,
67242: { "form" : "queunda" , "form_IPA" : "kweunda" } ,
67242: { "form" : "queundum" , "form_IPA" : "kweundum" } ,
67243: { "form" : "queundō" , "form_IPA" : "kweundoː" } ,
67243: { "form" : "queundā" , "form_IPA" : "kweundaː" } ,
67243: { "form" : "queundō" , "form_IPA" : "kweundoː" } ,
67243: { "form" : "queundī" , "form_IPA" : "kweundiː" } ,
67243: { "form" : "queundae" , "form_IPA" : "kweundaj" } ,
67243: { "form" : "queunda" , "form_IPA" : "kweunda" } ,
67243: { "form" : "queundōrum" , "form_IPA" : "kweundoːrum" } ,
67243: { "form" : "queundārum" , "form_IPA" : "kweundaːrum" } ,
67243: { "form" : "queundōrum" , "form_IPA" : "kweundoːrum" } ,
67243: { "form" : "queundīs" , "form_IPA" : "kweundiːs" } ,
67244: { "form" : "queundīs" , "form_IPA" : "kweundiːs" } ,
67244: { "form" : "queundīs" , "form_IPA" : "kweundiːs" } ,
67244: { "form" : "queundōs" , "form_IPA" : "kweundoːs" } ,
67244: { "form" : "queundās" , "form_IPA" : "kweundaːs" } ,
67244: { "form" : "queunda" , "form_IPA" : "kweunda" } ,
67244: { "form" : "queundī" , "form_IPA" : "kweundiː" } ,
67244: { "form" : "queundae" , "form_IPA" : "kweundaj" } ,
67244: { "form" : "queunda" , "form_IPA" : "kweunda" } ,
67244: { "form" : "queundīs" , "form_IPA" : "kweundiːs" } ,
67244: { "form" : "queundīs" , "form_IPA" : "kweundiːs" } ,
67245: { "form" : "queundīs" , "form_IPA" : "kweundiːs" } ,
76889: { "form" : "subeor" , "form_IPA" : "subeor" } ,
76889: { "form" : "subīris" , "form_IPA" : "subiːris" } ,
76889: { "form" : "subīmur" , "form_IPA" : "subiːmur" } ,
76889: { "form" : "subīminī" , "form_IPA" : "subiːminiː" } ,
76890: { "form" : "subeuntur" , "form_IPA" : "subeuntur" } ,
76890: { "form" : "subībar" , "form_IPA" : "subiːbar" } ,
76890: { "form" : "subībāris" , "form_IPA" : "subiːbaːris" } ,
76890: { "form" : "subībāmur" , "form_IPA" : "subiːbaːmur" } ,
76890: { "form" : "subībāminī" , "form_IPA" : "subiːbaːminiː" } ,
76890: { "form" : "subībantur" , "form_IPA" : "subiːbantur" } ,
76890: { "form" : "subībor" , "form_IPA" : "subiːbor" } ,
76890: { "form" : "subīberis" , "form_IPA" : "subiːberis" } ,
76891: { "form" : "subībimur" , "form_IPA" : "subiːbimur" } ,
76891: { "form" : "subībiminī" , "form_IPA" : "subiːbiminiː" } ,
76891: { "form" : "subībuntur" , "form_IPA" : "subiːbuntur" } ,
76891: { "form" : "subear" , "form_IPA" : "subear" } ,
76891: { "form" : "subeāris" , "form_IPA" : "subeaːris" } ,
76891: { "form" : "subeāmur" , "form_IPA" : "subeaːmur" } ,
76891: { "form" : "subeāminī" , "form_IPA" : "subeaːminiː" } ,
76891: { "form" : "subeantur" , "form_IPA" : "subeantur" } ,
76891: { "form" : "subīrer" , "form_IPA" : "subiːrer" } ,
76892: { "form" : "subīrēris" , "form_IPA" : "subiːreːris" } ,
76892: { "form" : "subīrēmur" , "form_IPA" : "subiːreːmur" } ,
76892: { "form" : "subīrēminī" , "form_IPA" : "subiːreːminiː" } ,
76892: { "form" : "subīrentur" , "form_IPA" : "subiːrentur" } ,
76892: { "form" : "subīre" , "form_IPA" : "subiːre" } ,
76892: { "form" : "subīminī" , "form_IPA" : "subiːminiː" } ,
76892: { "form" : "subītor" , "form_IPA" : "subiːtor" } ,
76892: { "form" : "subītor" , "form_IPA" : "subiːtor" } ,
76892: { "form" : "subeuntor" , "form_IPA" : "subeuntor" } ,
76893: { "form" : "subeundus" , "form_IPA" : "subeundus" } ,
76893: { "form" : "subeunda" , "form_IPA" : "subeunda" } ,
76893: { "form" : "subeundum" , "form_IPA" : "subeundum" } ,
76893: { "form" : "subeundī" , "form_IPA" : "subeundiː" } ,
76893: { "form" : "subeundae" , "form_IPA" : "subeundaj" } ,
76894: { "form" : "subeundī" , "form_IPA" : "subeundiː" } ,
76894: { "form" : "subeundō" , "form_IPA" : "subeundoː" } ,
76894: { "form" : "subeundae" , "form_IPA" : "subeundaj" } ,
76894: { "form" : "subeundō" , "form_IPA" : "subeundoː" } ,
76894: { "form" : "subeundum" , "form_IPA" : "subeundum" } ,
76894: { "form" : "subeundam" , "form_IPA" : "subeundam" } ,
76894: { "form" : "subeunde" , "form_IPA" : "subeunde" } ,
76894: { "form" : "subeunda" , "form_IPA" : "subeunda" } ,
76894: { "form" : "subeundum" , "form_IPA" : "subeundum" } ,
76895: { "form" : "subeundō" , "form_IPA" : "subeundoː" } ,
76895: { "form" : "subeundā" , "form_IPA" : "subeundaː" } ,
76895: { "form" : "subeundō" , "form_IPA" : "subeundoː" } ,
76895: { "form" : "subeundī" , "form_IPA" : "subeundiː" } ,
76895: { "form" : "subeundae" , "form_IPA" : "subeundaj" } ,
76895: { "form" : "subeunda" , "form_IPA" : "subeunda" } ,
76895: { "form" : "subeundōrum" , "form_IPA" : "subeundoːrum" } ,
76895: { "form" : "subeundārum" , "form_IPA" : "subeundaːrum" } ,
76895: { "form" : "subeundōrum" , "form_IPA" : "subeundoːrum" } ,
76895: { "form" : "subeundīs" , "form_IPA" : "subeundiːs" } ,
76896: { "form" : "subeundīs" , "form_IPA" : "subeundiːs" } ,
76896: { "form" : "subeundīs" , "form_IPA" : "subeundiːs" } ,
76896: { "form" : "subeundōs" , "form_IPA" : "subeundoːs" } ,
76896: { "form" : "subeundās" , "form_IPA" : "subeundaːs" } ,
76896: { "form" : "subeunda" , "form_IPA" : "subeunda" } ,
76896: { "form" : "subeundī" , "form_IPA" : "subeundiː" } ,
76896: { "form" : "subeundae" , "form_IPA" : "subeundaj" } ,
76896: { "form" : "subeunda" , "form_IPA" : "subeunda" } ,
76896: { "form" : "subeundīs" , "form_IPA" : "subeundiːs" } ,
76896: { "form" : "subeundīs" , "form_IPA" : "subeundiːs" } ,
76897: { "form" : "subeundīs" , "form_IPA" : "subeundiːs" } ,
81410: { "form" : "trānseor" , "form_IPA" : "traːnseor" } ,
81410: { "form" : "trānsīris" , "form_IPA" : "traːnsiːris" } ,
81411: { "form" : "trānsīmur" , "form_IPA" : "traːnsiːmur" } ,
81411: { "form" : "trānsīminī" , "form_IPA" : "traːnsiːminiː" } ,
81411: { "form" : "trānseuntur" , "form_IPA" : "traːnseuntur" } ,
81411: { "form" : "trānsībar" , "form_IPA" : "traːnsiːbar" } ,
81411: { "form" : "trānsībāris" , "form_IPA" : "traːnsiːbaːris" } ,
81411: { "form" : "trānsībāmur" , "form_IPA" : "traːnsiːbaːmur" } ,
81411: { "form" : "trānsībāminī" , "form_IPA" : "traːnsiːbaːminiː" } ,
81411: { "form" : "trānsībantur" , "form_IPA" : "traːnsiːbantur" } ,
81411: { "form" : "trānsībor" , "form_IPA" : "traːnsiːbor" } ,
81412: { "form" : "trānsīberis" , "form_IPA" : "traːnsiːberis" } ,
81412: { "form" : "trānsībimur" , "form_IPA" : "traːnsiːbimur" } ,
81412: { "form" : "trānsībiminī" , "form_IPA" : "traːnsiːbiminiː" } ,
81412: { "form" : "trānsībuntur" , "form_IPA" : "traːnsiːbuntur" } ,
81412: { "form" : "trānsear" , "form_IPA" : "traːnsear" } ,
81412: { "form" : "trānseāris" , "form_IPA" : "traːnseaːris" } ,
81412: { "form" : "trānseāmur" , "form_IPA" : "traːnseaːmur" } ,
81412: { "form" : "trānseāminī" , "form_IPA" : "traːnseaːminiː" } ,
81413: { "form" : "trānseantur" , "form_IPA" : "traːnseantur" } ,
81413: { "form" : "trānsīrer" , "form_IPA" : "traːnsiːrer" } ,
81413: { "form" : "trānsīrēris" , "form_IPA" : "traːnsiːreːris" } ,
81413: { "form" : "trānsīrēmur" , "form_IPA" : "traːnsiːreːmur" } ,
81413: { "form" : "trānsīrēminī" , "form_IPA" : "traːnsiːreːminiː" } ,
81413: { "form" : "trānsīrentur" , "form_IPA" : "traːnsiːrentur" } ,
81413: { "form" : "trānsīre" , "form_IPA" : "traːnsiːre" } ,
81413: { "form" : "trānsīminī" , "form_IPA" : "traːnsiːminiː" } ,
81413: { "form" : "trānsītor" , "form_IPA" : "traːnsiːtor" } ,
81414: { "form" : "trānsītor" , "form_IPA" : "traːnsiːtor" } ,
81414: { "form" : "trānseuntor" , "form_IPA" : "traːnseuntor" } ,
81414: { "form" : "trānseundus" , "form_IPA" : "traːnseundus" } ,
81414: { "form" : "trānseunda" , "form_IPA" : "traːnseunda" } ,
81414: { "form" : "trānseundum" , "form_IPA" : "traːnseundum" } ,
81415: { "form" : "trānseundī" , "form_IPA" : "traːnseundiː" } ,
81415: { "form" : "trānseundae" , "form_IPA" : "traːnseundaj" } ,
81415: { "form" : "trānseundī" , "form_IPA" : "traːnseundiː" } ,
81415: { "form" : "trānseundō" , "form_IPA" : "traːnseundoː" } ,
81415: { "form" : "trānseundae" , "form_IPA" : "traːnseundaj" } ,
81415: { "form" : "trānseundō" , "form_IPA" : "traːnseundoː" } ,
81415: { "form" : "trānseundum" , "form_IPA" : "traːnseundum" } ,
81415: { "form" : "trānseundam" , "form_IPA" : "traːnseundam" } ,
81415: { "form" : "trānseunde" , "form_IPA" : "traːnseunde" } ,
81416: { "form" : "trānseunda" , "form_IPA" : "traːnseunda" } ,
81416: { "form" : "trānseundum" , "form_IPA" : "traːnseundum" } ,
81416: { "form" : "trānseundō" , "form_IPA" : "traːnseundoː" } ,
81416: { "form" : "trānseundā" , "form_IPA" : "traːnseundaː" } ,
81416: { "form" : "trānseundō" , "form_IPA" : "traːnseundoː" } ,
81416: { "form" : "trānseundī" , "form_IPA" : "traːnseundiː" } ,
81416: { "form" : "trānseundae" , "form_IPA" : "traːnseundaj" } ,
81416: { "form" : "trānseunda" , "form_IPA" : "traːnseunda" } ,
81416: { "form" : "trānseundōrum" , "form_IPA" : "traːnseundoːrum" } ,
81416: { "form" : "trānseundārum" , "form_IPA" : "traːnseundaːrum" } ,
81417: { "form" : "trānseundōrum" , "form_IPA" : "traːnseundoːrum" } ,
81417: { "form" : "trānseundīs" , "form_IPA" : "traːnseundiːs" } ,
81417: { "form" : "trānseundīs" , "form_IPA" : "traːnseundiːs" } ,
81417: { "form" : "trānseundīs" , "form_IPA" : "traːnseundiːs" } ,
81417: { "form" : "trānseundōs" , "form_IPA" : "traːnseundoːs" } ,
81417: { "form" : "trānseundās" , "form_IPA" : "traːnseundaːs" } ,
81417: { "form" : "trānseunda" , "form_IPA" : "traːnseunda" } ,
81417: { "form" : "trānseundī" , "form_IPA" : "traːnseundiː" } ,
81417: { "form" : "trānseundae" , "form_IPA" : "traːnseundaj" } ,
81417: { "form" : "trānseunda" , "form_IPA" : "traːnseunda" } ,
81418: { "form" : "trānseundīs" , "form_IPA" : "traːnseundiːs" } ,
81418: { "form" : "trānseundīs" , "form_IPA" : "traːnseundiːs" } ,
81418: { "form" : "trānseundīs" , "form_IPA" : "traːnseundiːs" } ,
83315: { "form" : "uēneor" , "form_IPA" : "weːneor" } ,
83315: { "form" : "uēnīris" , "form_IPA" : "weːniːris" } ,
83316: { "form" : "uēnīmur" , "form_IPA" : "weːniːmur" } ,
83316: { "form" : "uēnīminī" , "form_IPA" : "weːniːminiː" } ,
83316: { "form" : "uēneuntur" , "form_IPA" : "weːneuntur" } ,
83316: { "form" : "uēnībar" , "form_IPA" : "weːniːbar" } ,
83316: { "form" : "uēnībāris" , "form_IPA" : "weːniːbaːris" } ,
83316: { "form" : "uēnībāmur" , "form_IPA" : "weːniːbaːmur" } ,
83316: { "form" : "uēnībāminī" , "form_IPA" : "weːniːbaːminiː" } ,
83316: { "form" : "uēnībantur" , "form_IPA" : "weːniːbantur" } ,
83316: { "form" : "uēnībor" , "form_IPA" : "weːniːbor" } ,
83317: { "form" : "uēnīberis" , "form_IPA" : "weːniːberis" } ,
83317: { "form" : "uēnībimur" , "form_IPA" : "weːniːbimur" } ,
83317: { "form" : "uēnībiminī" , "form_IPA" : "weːniːbiminiː" } ,
83317: { "form" : "uēnībuntur" , "form_IPA" : "weːniːbuntur" } ,
83317: { "form" : "uēnear" , "form_IPA" : "weːnear" } ,
83317: { "form" : "uēneāris" , "form_IPA" : "weːneaːris" } ,
83317: { "form" : "uēneāmur" , "form_IPA" : "weːneaːmur" } ,
83317: { "form" : "uēneāminī" , "form_IPA" : "weːneaːminiː" } ,
83318: { "form" : "uēneantur" , "form_IPA" : "weːneantur" } ,
83318: { "form" : "uēnīrer" , "form_IPA" : "weːniːrer" } ,
83318: { "form" : "uēnīrēris" , "form_IPA" : "weːniːreːris" } ,
83318: { "form" : "uēnīrēmur" , "form_IPA" : "weːniːreːmur" } ,
83318: { "form" : "uēnīrēminī" , "form_IPA" : "weːniːreːminiː" } ,
83318: { "form" : "uēnīrentur" , "form_IPA" : "weːniːrentur" } ,
83318: { "form" : "uēnīre" , "form_IPA" : "weːniːre" } ,
83318: { "form" : "uēnīminī" , "form_IPA" : "weːniːminiː" } ,
83318: { "form" : "uēnītor" , "form_IPA" : "weːniːtor" } ,
83319: { "form" : "uēnītor" , "form_IPA" : "weːniːtor" } ,
83319: { "form" : "uēneuntor" , "form_IPA" : "weːneuntor" } ,
83319: { "form" : "uēneundus" , "form_IPA" : "weːneundus" } ,
83319: { "form" : "uēneunda" , "form_IPA" : "weːneunda" } ,
83319: { "form" : "uēneundum" , "form_IPA" : "weːneundum" } ,
83320: { "form" : "uēneundī" , "form_IPA" : "weːneundiː" } ,
83320: { "form" : "uēneundae" , "form_IPA" : "weːneundaj" } ,
83320: { "form" : "uēneundī" , "form_IPA" : "weːneundiː" } ,
83320: { "form" : "uēneundō" , "form_IPA" : "weːneundoː" } ,
83320: { "form" : "uēneundae" , "form_IPA" : "weːneundaj" } ,
83320: { "form" : "uēneundō" , "form_IPA" : "weːneundoː" } ,
83320: { "form" : "uēneundum" , "form_IPA" : "weːneundum" } ,
83320: { "form" : "uēneundam" , "form_IPA" : "weːneundam" } ,
83320: { "form" : "uēneunde" , "form_IPA" : "weːneunde" } ,
83321: { "form" : "uēneunda" , "form_IPA" : "weːneunda" } ,
83321: { "form" : "uēneundum" , "form_IPA" : "weːneundum" } ,
83321: { "form" : "uēneundō" , "form_IPA" : "weːneundoː" } ,
83321: { "form" : "uēneundā" , "form_IPA" : "weːneundaː" } ,
83321: { "form" : "uēneundō" , "form_IPA" : "weːneundoː" } ,
83321: { "form" : "uēneundī" , "form_IPA" : "weːneundiː" } ,
83321: { "form" : "uēneundae" , "form_IPA" : "weːneundaj" } ,
83321: { "form" : "uēneunda" , "form_IPA" : "weːneunda" } ,
83321: { "form" : "uēneundōrum" , "form_IPA" : "weːneundoːrum" } ,
83321: { "form" : "uēneundārum" , "form_IPA" : "weːneundaːrum" } ,
83322: { "form" : "uēneundōrum" , "form_IPA" : "weːneundoːrum" } ,
83322: { "form" : "uēneundīs" , "form_IPA" : "weːneundiːs" } ,
83322: { "form" : "uēneundīs" , "form_IPA" : "weːneundiːs" } ,
83322: { "form" : "uēneundīs" , "form_IPA" : "weːneundiːs" } ,
83322: { "form" : "uēneundōs" , "form_IPA" : "weːneundoːs" } ,
83322: { "form" : "uēneundās" , "form_IPA" : "weːneundaːs" } ,
83322: { "form" : "uēneunda" , "form_IPA" : "weːneunda" } ,
83322: { "form" : "uēneundī" , "form_IPA" : "weːneundiː" } ,
83322: { "form" : "uēneundae" , "form_IPA" : "weːneundaj" } ,
83322: { "form" : "uēneunda" , "form_IPA" : "weːneunda" } ,
83323: { "form" : "uēneundīs" , "form_IPA" : "weːneundiːs" } ,
83323: { "form" : "uēneundīs" , "form_IPA" : "weːneundiːs" } ,
83323: { "form" : "uēneundīs" , "form_IPA" : "weːneundiːs" }
}

In [60]:
for row in changes:
    #print(row)
    for column in changes[row]:
        #print(column)
        #print(changes[row][column])
        LatInfLexi_verbs.loc[row,column] = changes[row][column]

# Distinguishing defective cells from missing data

In [61]:
defective_verbs = pd.read_csv("./etc/verbs_defectiveness_mapping_manual.csv", sep="\t", index_col="lexeme")
defective_nouns = pd.read_csv("./etc/nouns_defectiveness_mapping_manual.csv", sep="\t", index_col="lexeme")

In [62]:
for i in LatInfLexi_nouns.index:
    if LatInfLexi_nouns.loc[i,"form"] == "#DEF#" and LatInfLexi_nouns.loc[i,"form_IPA"] == "#DEF#" :
        if defective_nouns.loc[LatInfLexi_nouns.loc[i,"lexeme"],"systematicDefectiveness"] != "plurale tantum":
            LatInfLexi_nouns.loc[i,"form"] = ""
            LatInfLexi_nouns.loc[i,"form_IPA"] = ""
            #print(LatInfLexi_nouns.loc[i,"lexeme"],LatInfLexi_nouns.loc[i,"PoSTag:features"],LatInfLexi_nouns.loc[i,"form_IPA"],type(LatInfLexi_nouns.loc[i,"form_IPA"]))

In [63]:
LatInfLexi_nouns = LatInfLexi_nouns[LatInfLexi_nouns["form"] != ""]

In [64]:
never_defective_cells = ['VERB:Sup+-+-+-+Act+-+-+Acc+-', 'VERB:Sup+-+-+-+Pass+-+-+Abl+-', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Nom+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Nom+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Nom+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Gen+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Gen+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Gen+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Dat+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Dat+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Dat+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Acc+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Acc+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Acc+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Voc+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Voc+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Voc+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Abl+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Abl+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Sing+Abl+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Nom+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Nom+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Nom+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Gen+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Gen+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Gen+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Dat+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Dat+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Dat+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Acc+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Acc+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Acc+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Voc+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Voc+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Voc+Neut', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Abl+Masc', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Abl+Fem', 'VERB:Part+-+Past+Perf+Pass+-+Plur+Abl+Neut', 'VERB:Part+-+Fut+-+Act+-+Sing+Nom+Masc', 'VERB:Part+-+Fut+-+Act+-+Sing+Nom+Fem', 'VERB:Part+-+Fut+-+Act+-+Sing+Nom+Neut', 'VERB:Part+-+Fut+-+Act+-+Sing+Gen+Masc', 'VERB:Part+-+Fut+-+Act+-+Sing+Gen+Fem', 'VERB:Part+-+Fut+-+Act+-+Sing+Gen+Neut', 'VERB:Part+-+Fut+-+Act+-+Sing+Dat+Masc', 'VERB:Part+-+Fut+-+Act+-+Sing+Dat+Fem', 'VERB:Part+-+Fut+-+Act+-+Sing+Dat+Neut', 'VERB:Part+-+Fut+-+Act+-+Sing+Acc+Masc', 'VERB:Part+-+Fut+-+Act+-+Sing+Acc+Fem', 'VERB:Part+-+Fut+-+Act+-+Sing+Acc+Neut', 'VERB:Part+-+Fut+-+Act+-+Sing+Voc+Masc', 'VERB:Part+-+Fut+-+Act+-+Sing+Voc+Fem', 'VERB:Part+-+Fut+-+Act+-+Sing+Voc+Neut', 'VERB:Part+-+Fut+-+Act+-+Sing+Abl+Masc', 'VERB:Part+-+Fut+-+Act+-+Sing+Abl+Fem', 'VERB:Part+-+Fut+-+Act+-+Sing+Abl+Neut', 'VERB:Part+-+Fut+-+Act+-+Plur+Nom+Masc', 'VERB:Part+-+Fut+-+Act+-+Plur+Nom+Fem', 'VERB:Part+-+Fut+-+Act+-+Plur+Nom+Neut', 'VERB:Part+-+Fut+-+Act+-+Plur+Gen+Masc', 'VERB:Part+-+Fut+-+Act+-+Plur+Gen+Fem', 'VERB:Part+-+Fut+-+Act+-+Plur+Gen+Neut', 'VERB:Part+-+Fut+-+Act+-+Plur+Dat+Masc', 'VERB:Part+-+Fut+-+Act+-+Plur+Dat+Fem', 'VERB:Part+-+Fut+-+Act+-+Plur+Dat+Neut', 'VERB:Part+-+Fut+-+Act+-+Plur+Acc+Masc', 'VERB:Part+-+Fut+-+Act+-+Plur+Acc+Fem', 'VERB:Part+-+Fut+-+Act+-+Plur+Acc+Neut', 'VERB:Part+-+Fut+-+Act+-+Plur+Voc+Masc', 'VERB:Part+-+Fut+-+Act+-+Plur+Voc+Fem', 'VERB:Part+-+Fut+-+Act+-+Plur+Voc+Neut', 'VERB:Part+-+Fut+-+Act+-+Plur+Abl+Masc', 'VERB:Part+-+Fut+-+Act+-+Plur+Abl+Fem', 'VERB:Part+-+Fut+-+Act+-+Plur+Abl+Neut']

In [65]:
for i in tqdm(LatInfLexi_verbs.index):
    if LatInfLexi_verbs.loc[i,"form"] == "#DEF#" and LatInfLexi_verbs.loc[i,"form_IPA"] == "#DEF#":
        if defective_verbs.loc[LatInfLexi_verbs.loc[i,"lexeme"],"systematicDefectiveness"] == "none" or LatInfLexi_verbs.loc[i,"PoSTag:features"] in never_defective_cells:
            LatInfLexi_verbs.loc[i,"form"] = ""
            LatInfLexi_verbs.loc[i,"form_IPA"] = ""
            #print(LatInfLexi_verbs.loc[i,"lexeme"],LatInfLexi_verbs.loc[i,"PoSTag:features"],LatInfLexi_verbs.loc[i,"form_IPA"],type(LatInfLexi_verbs.loc[i,"form_IPA"]))

100%|████████████████████████████████████████████████████████████████████████| 850392/850392 [13:37<00:00, 1040.68it/s]


In [66]:
LatInfLexi_verbs = LatInfLexi_verbs[LatInfLexi_verbs["form"] != ""]

# Combining forms tables

Adding POS

In [67]:
LatInfLexi_verbs["POS"] = "verb"
LatInfLexi_nouns["POS"] = "noun"

Generating form_id

In [68]:
def add_form_id(df, suffix=""):
    df.index.name = "form_id"
    df.reset_index(inplace=True)
    df["form_id"] = "form_" + df["form_id"].apply(str) + suffix

add_form_id(LatInfLexi_verbs, "_v")
add_form_id(LatInfLexi_nouns, "_n")

Concatenating forms tables:

In [69]:
LatInfLexi_forms = pd.concat([LatInfLexi_verbs, LatInfLexi_nouns])

# Converting forms to Paralex format

Obtaining columns in Paralex format:

In [70]:
col_map = {"form": "orth_form", "form_IPA": "phon_form", "PoSTag:features": "cell",
           "freqTFTL":"frequency",
           "freqAntiquitas":"frequency_Antiquitas" ,
           "freqAetasPatrum":"frequency_AetasPatrum",
            "freqMediumAeuum":"frequency_MediumAeuum",
            "freqRecentiorLatinitas":"frequency_RecentiorLatinitas"
           }
LatInfLexi_forms.rename(col_map, axis=1, inplace=True)

Mapping cells to new scheme:

In [71]:
LatInfLexi_cells_mapper = LatInfLexi_cells.set_index("LatInFlexi-cell").cell_id.to_dict()
LatInfLexi_forms.loc[:,"cell"] = LatInfLexi_forms.cell.map(LatInfLexi_cells_mapper)

In [72]:
LatInfLexi_forms.sample(5)

Unnamed: 0,form_id,lexeme,cell,orth_form,phon_form,frequency,frequency_Antiquitas,frequency_AetasPatrum,frequency_MediumAeuum,frequency_RecentiorLatinitas,POS
669556,form_706029_v,repleo,fprf.act.ind.3.sg,replēuerit,repleːwerit,117,1,49,67,0,verb
165231,form_173628_v,contraho,prs.act.ptcp.abl.m.pl,contrahentibus,kontrahentibus,18,1,5,10,2,verb
336540,form_351614_v,farcio,gdv.nom.f.sg,farcienda,farkienda,0,0,0,0,0,verb
278788,form_292169_v,ementior,fut.pass.imp.2.sg,ēmentītor,eːmentiːtor,0,0,0,0,0,verb
499121,form_524513_v,nuto,prs.act.ind.1.pl,nūtāmus,nuːtaːmus,1,0,1,0,0,verb


Setting form_id as index

In [73]:
LatInfLexi_forms = LatInfLexi_forms.set_index("form_id")

In [74]:
LatInfLexi_forms

Unnamed: 0_level_0,lexeme,cell,orth_form,phon_form,frequency,frequency_Antiquitas,frequency_AetasPatrum,frequency_MediumAeuum,frequency_RecentiorLatinitas,POS
form_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
form_0_v,abalieno,prs.act.ind.1.sg,abaliēnō,abalieːnoː,0,0,0,0,0,verb
form_1_v,abalieno,prs.act.ind.2.sg,abaliēnās,abalieːnaːs,0,0,0,0,0,verb
form_2_v,abalieno,prs.act.ind.3.sg,abaliēnat,abalieːnat,5,2,2,1,0,verb
form_3_v,abalieno,prs.act.ind.1.pl,abaliēnāmus,abalieːnaːmus,0,0,0,0,0,verb
form_4_v,abalieno,prs.act.ind.2.pl,abaliēnātis,abalieːnaːtis,2,1,1,0,0,verb
...,...,...,...,...,...,...,...,...,...,...
form_12451_n,uxor,gen.pl,uxōrum,uksoːrum,265,15,124,122,4,noun
form_12452_n,uxor,dat.pl,uxōribus,uksoːribus,725,30,317,373,5,noun
form_12453_n,uxor,acc.pl,uxōrēs,uksoːreːs,2073,90,1064,903,16,noun
form_12454_n,uxor,voc.pl,uxōrēs,uksoːreːs,2073,90,1064,903,16,noun


Adding stress

In [75]:
C = r"b|d|ɡ|m|n|l|r|z|p|pʰ|f|t|tʰ|s|k|kʰ|h"
V = r"waj|[jw][aeiouy]ː|[aeiouy]ː?|[jw][aeiouy]|[aeiouy][jw]"
segmenter = f"(?:{C}|({V}))*?"


def search_vowels(word):
    segmented = regex.fullmatch(segmenter, word)
    return segmented.spans(1)

def find_latin_stress(word):

    if word == "#DEF#" or word == "":
        return word

    def stress(idxs):
        i = idxs[0]
        if word[i] in 'wj':
            i += 1
        return word[:i] + "ˈ" + word[i:]

    indexes = search_vowels(word)

    # 2 syllables or less => stress first syllable
    if len(indexes) <= 2:
        return stress(indexes[0])

    *_, antepenult, penult, ultimate = indexes

    # If the penult has a long vowel it is stressed
    if word[slice(*penult)][-1] in {"ː", "j", "w"}:
        return stress(penult)

    # Single C after the penult => short penult => stress antepenult
    if (ultimate[0] - penult[1]) < 2:
        return stress(antepenult)

    # For the rest, it depends on the consonant sequence
    c_seq = word[penult[1]:ultimate[0]]
    cl = re.compile("^[bdɡpctd]ʰ?[rl]$")

    # C seq is a liquid cluster => short penult => stress antepenult
    if cl.match(c_seq):
        return stress(antepenult)

    # other C sequence => long penult => stress penult
    return stress(penult)


LatInfLexi_forms.loc[:, "phon_form"] = LatInfLexi_forms["phon_form"].apply(find_latin_stress)

Separating sounds with spaces

In [76]:
def splitter(series, split_pattern):
    series = series.str.split(pat=split_pattern, regex=True)
    return series.apply(lambda x: " ".join([char for char in x if char]))

sounds = ['b', 'd', 'ɡ', 'm', 'n', 'l', 'r', 'z', 'p', 'pʰ', 'f', 't', 'tʰ', 's', 'k', 'kʰ', 'h', 'j', 'w', 'a', 'aː', 'e', 'eː', 'i', 'iː', 'o', 'oː', 'u', 'uː', 'ˈa', 'ˈaː', 'ˈe', 'ˈeː', 'ˈi', 'ˈiː', 'ˈo', 'ˈoː', 'ˈu', 'ˈuː', 'ˈy', 'ˈyː']
split_pattern = "(" + "|".join(sorted(sounds, key=len, reverse=True)) + ")"
LatInfLexi_forms["phon_form"] = splitter(LatInfLexi_forms["phon_form"], split_pattern)

# Adjusting the transcription

In [77]:
# Adding frequencies to the cells

In [78]:
cells_freq = LatInfLexi_forms.groupby("cell")[["frequency", 
                                              "frequency_Antiquitas",
                                              "frequency_AetasPatrum",
                                              "frequency_MediumAeuum",
                                              "frequency_RecentiorLatinitas"]].agg(sum)
cells_freq.index.name = "cell_id"

  "frequency_RecentiorLatinitas"]].agg(sum)


In [79]:
cells_freq

Unnamed: 0_level_0,frequency,frequency_Antiquitas,frequency_AetasPatrum,frequency_MediumAeuum,frequency_RecentiorLatinitas
cell_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
abl.pl,835283,115180,310927,382000,27176
abl.sg,2816933,297275,962861,1463839,92958
acc.pl,1565479,216650,557840,734860,56129
acc.sg,2663992,246582,945900,1391128,80382
dat.pl,835283,115180,310927,382000,27176
...,...,...,...,...,...
prs.pass.sbjv.3.sg,168334,9945,66246,83631,8512
sup.abl,156341,17168,41767,88748,8658
sup.acc,591906,50304,198546,323267,19789
voc.pl,2113371,236521,758001,1044528,74321


In [80]:
LatInfLexi_cells.set_index("cell_id", inplace=True)

In [81]:
LatInfLexi_cells = pd.merge(LatInfLexi_cells, cells_freq, left_index=True, right_index=True)

# Creating the lexemes table

In [82]:
LatInfLexi_lexemes = LatInfLexi_forms.groupby(["lexeme", "POS"])[["frequency", 
                                                                      "frequency_Antiquitas",
                                                                      "frequency_AetasPatrum",
                                                                      "frequency_MediumAeuum",
                                                                      "frequency_RecentiorLatinitas"]].agg(sum).reset_index("POS", drop=False)
LatInfLexi_lexemes.index.name = "lexeme_id"

  "frequency_RecentiorLatinitas"]].agg(sum).reset_index("POS", drop=False)


In [83]:
LatInfLexi_lexemes

Unnamed: 0_level_0,POS,frequency,frequency_Antiquitas,frequency_AetasPatrum,frequency_MediumAeuum,frequency_RecentiorLatinitas
lexeme_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
abalieno,verb,393,101,176,113,3
abdico,verb,2256,773,760,683,40
abdo,verb,4850,971,2111,1644,124
abduco,verb,3194,937,1057,1116,84
abeo,verb,6626,1723,1466,3229,208
...,...,...,...,...,...,...
uulgus,noun,7917,1588,2524,2311,1494
uulnero,verb,10975,1167,4903,4784,121
uulnus,noun,18041,3618,7531,6637,255
uultus,noun,25464,4688,9117,11269,390


# Output

Writing it all to file

In [84]:
LatInfLexi_lexemes.to_csv("LatInfLexi-lexemes.csv")
LatInfLexi_forms.to_csv("LatInfLexi-forms.csv")
LatInfLexi_cells.to_csv("LatInfLexi-cells.csv")

Remove temporary files from v1.1

In [85]:
%%bash
rm LatInfLexi-nouns.csv
rm LatInfLexi-verbs.csv
