# Typology of Morphological Exponence (pyTomex)

An implementation of the model of exponence as informativeness from Carroll (2021)

Packages:

In [1]:
import pandas as pd


Data:

In [2]:
ngkolmpu_df = pd.read_csv("data/ngkolmpu_data_example.csv")


i_sig function returns the $I_\sigma$ relations of a given formative as a set:

- sig = feature column of dataframe
- pos = position data of formative
- form = phonological component of formative
- lex = lexeme
- df = data frame of inflected lexicon

In [3]:
def i_sig (sig, pos, form, lex, df):
    dist = df[df[pos] == form]
    dist_sig = set(dist[sig][dist.l_0 == lex])
    v_sig = set(df[sig])
    I_sig = set()
    if dist_sig < v_sig:
        I_sig = dist_sig
    return I_sig
    

In [4]:
i_sig('c_plur', 'f_p2', 's', 'TOUCH', ngkolmpu_df)

set()

i_sig_all function returns all the $I_\sigma$ relations of a formative for a given paradigm as a dictionary

In [5]:
def i_sig_all (pos, form, lex, df):
    i_sig_all_out = {}
    for sig in [col for col in df if col.startswith('c_')]: #We could define this as a variable outside the function at some point along with the values.:
        i_sig_all_out[sig] = i_sig(sig, pos, form, lex, df)
    return i_sig_all_out
        

In [6]:
i_sig_all('f_p2', 's', 'TOUCH', ngkolmpu_df)

{'c_tnsmood': {'futign', 'futirr', 'hod', 'imp', 'rct', 'rmt'},
 'c_aspect': {'dur', 'pfv'},
 'c_Aper': set(),
 'c_Anum': set(),
 'c_Uper': {'2U', '3U'},
 'c_Unum': {'nsgU'},
 'c_plur': set()}

Paradigm analyser:

Prints out the $I_\sigma$ relations of every formative in a paradigm and stores these as a dictionary.


In [7]:
def lexAnalyse(lex, df):
    para = {}
      
    for pos in [col for col in df if col.startswith('f_')]:
        forms = df[pos].unique()
        
        for form in forms:
            if pd.isna(form):
                pass
            else:
                posform = (pos, form)
                para[posform] = i_sig_all(pos, form, lex, df)
            
    return para


Classifies a language specific dataframe with repsect to the typologies of multiple exponence.

In [10]:
def classify_VE(lex, df):
    table = []
    
    formval = lexAnalyse(lex, df)
    
    for word in df.iloc:
        
        formatives = []
        for pos in [col for col in df if col.startswith('f_')]:
            
            if pd.isna(word[pos]):
                pass
            else:
                posform = (pos, word[pos])
                formatives.append(posform)
        
        for pair in [(formatives[i],formatives[j]) for i in range(len(formatives)) for j in range(i+1, len(formatives))]:
            cell = set()
            for sig in [col for col in df if col.startswith('c_')]:
                cell.add(word[sig])
                forma = pair[0] #('f_s3', 'ai')
                formb = pair[1]
                if formval[forma][sig] & formval[formb][sig]:
                    if formval[forma][sig] == formval[formb][sig]:
                        typ = 'ME'
                    elif formval[forma][sig].issubset(formval[formb][sig]) or formval[formb][sig].issubset(formval[forma][sig]):
                        typ = 'SE'
                    else:
                        typ = 'DE'
                    
                    table.append({'Wordform': word['wordform'],
                                  'Cell': cell,
                                  'Value': word[sig],
                                    'FormA':forma,
                                    'FormB':formb,
                                    'Type': typ})
    return table
        

In [16]:
ve_df = pd.DataFrame()

df_ve = classify_VE('TOUCH', ngkolmpu_df)

In [17]:
ve_df = ve_df.append(df_ve)

In [18]:
ve_df.head()

Unnamed: 0,Wordform,Cell,Value,FormA,FormB,Type
0,ntpinont,"{2U, sgA, sgU, npl, futirr, dur, 1A}",futirr,"(f_p2, nt)","(f_s1, nt)",SE
1,ntpinont,"{2U, sgA, sgU, npl, futirr, dur, 1A}",dur,"(f_p2, nt)","(f_s1, nt)",ME
2,ntpinont,"{2U, sgA, pl, sgU, futirr, dur, 1A}",futirr,"(f_p2, nt)","(f_s1, nt)",SE
3,ntpinont,"{2U, sgA, pl, sgU, futirr, dur, 1A}",dur,"(f_p2, nt)","(f_s1, nt)",ME
4,srpinont,"{2U, sgA, npl, futirr, nsgU, dur, 1A}",futirr,"(f_p2, s)","(f_p3, r)",SE


In [19]:
len(ve_df)

4780