# Danis 2019 Python supplement
June 1, 2021

Danis, Nick. 2019. [Long-distance major place harmony](https://doi.org/10.1017/S0952675719000307). *Phonology* 36.4. 573-604.

This is intended as a supplement to Danis (2019). This notebook uses the same coded wordlist as the original article ([danis2019-word-list.csv](danis2019-word-list.csv)). Howevever, in the original article, the wordlist was manipulated in Excel to create the 2x2 contingency tables and these were then tested in R. Here, pandas is used to do all the manipulations in a clear and reproducible way, for posterity.

In [1]:
import pandas as pd
import numpy as np
import scipy
import scipy.stats as stats
from collections import defaultdict

In [2]:
scipy.__version__

'1.4.1'

## Helper functions

In [3]:
def build_2x2(crosstab, c1, c2):
    '''
    builds a 2x2 contingency table (as a numpy array) from a larger crosstab df
    '''
    target = crosstab.loc[c1,c2]
    c1_other = crosstab.loc[c1,'All'] - target
    c2_other = crosstab.loc['All',c2] - target 
    all_other = crosstab.loc['All','All'] - target - c1_other - c2_other
    obs = np.array([[target, c1_other],[c2_other, all_other]])
    return obs

In [4]:
places = ['lab','dor','cor','labdor']
combos = [(c1, c2) for c1 in places for c2 in places if c1 == c2]
combos.extend([(c1, c2) for c1 in ['labdor'] for c2 in ['lab','dor']])
combos.extend([(c1, c2) for c1 in ['lab','dor'] for c2 in ['labdor']])

def highlight(x):
    '''styler function to highlight relevant cells'''
    color = 'background-color: yellow; font-weight: bold'
    df1 = pd.DataFrame('', index=x.index, columns=x.columns)
    for x, y in combos:
        df1.loc[x, y] = color
    return df1
    

## Observed place values for all stop combinations

In [5]:
df = pd.read_csv('danis2019-word-list.csv')

df.sample(5)

Unnamed: 0,ID,IPA,POS,Alt form,Def,defFr,defDu,defEn,C1 IPA,C2 IPA,...,Homorganic,C1 Manner,C2 Manner,T-D,N-ND,D-ND,KP-K,T-N,T-ND,D-N
122,2282,duŋu,(v.),,"rester, être, se trouver. Blijven, zijn, verbl...","rester, être, se trouver","Blijven, zijn, verblijven",,d,ŋ,...,non-homorganic,D,N,No,No,No,No,No,No,No
329,2722,kɔ̀la̍,(s.),,poule. Hen.,poule,Hen,,k,l,...,non-homorganic,T,other,No,No,No,No,No,No,No
651,3344,sanza,(v.),,"pousser des rejetons (arbres, buissons) ; se m...","pousser des rejetons (arbres, buissons) ; se m...","W ortelscheuten maken, nieuwe loten schieten; ...",,s,nz,...,homorganic,other,other,No,No,No,No,No,No,No
479,3005,mɔ̃yɔ̃,(v.),,"amincir, rétrécir ; s’amincir, se rétrécir, s’...","amincir, rétrécir ; s’amincir, se rétrécir, s’...","Verkleinen, dun maken, nauw maken; dun zijn, s...",,m,y,...,non-homorganic,N,other,No,No,No,No,No,No,No
812,3633,yolo,(v.),,"se trouver, être debout. Zich bevinden, staan.","se trouver, être debout","Zich bevinden, staan",,y,l,...,homorganic,other,other,No,No,No,No,No,No,No


In [6]:
stops = pd.crosstab(df['C1 place'],
                    df['C2 place'],
                    margins=True)
stops.style.apply(highlight,axis=None)

C2 place,cor,dor,lab,labdor,other,All
C1 place,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cor,28,34,17,11,70,160
dor,43,26,27,1,105,202
lab,33,31,12,0,76,152
labdor,15,13,2,11,45,86
other,37,67,35,14,127,280
All,156,171,93,37,423,880


## O/E place values for all stop combinations

In [7]:
obs = stops.to_numpy()
exp = stats.chi2_contingency(stops)[3]
oe_ratios = pd.DataFrame(obs / exp, columns = stops.columns, index=stops.index)
oe_ratios.style.background_gradient(cmap ='hot',axis=None,vmax=1.0)

C2 place,cor,dor,lab,labdor,other,All
C1 place,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cor,0.987179,1.093567,1.005376,1.635135,0.910165,1.0
dor,1.200812,0.662382,1.264772,0.117742,1.081385,1.0
lab,1.224696,1.049554,0.747029,0.0,1.040189,1.0
labdor,0.9839,0.777914,0.220055,3.042112,1.08857,1.0
other,0.745421,1.231412,1.182796,1.189189,0.9436,1.0
All,1.0,1.0,1.0,1.0,1.0,1.0


## Testing individual place combinations

### Results

In [8]:
# this will test all homorganic and semihomorganic place combinations against each other
places = ['lab','dor','cor','labdor']
combos = [(c1, c2) for c1 in places for c2 in places if c1 == c2]
combos.extend([(c1, c2) for c1 in ['labdor'] for c2 in ['lab','dor']])
combos.extend([(c1, c2) for c1 in ['lab','dor'] for c2 in ['labdor']])

comparisons = len(combos)
alpha = 0.05/comparisons

results = defaultdict(list)

for c1, c2 in combos:
    obs = build_2x2(stops,c1,c2)
    fisher_results = stats.fisher_exact(obs)
    chi_results = stats.chi2_contingency(obs)
    results['c1'].append(c1)
    results['c2'].append(c2)
    results['obs'].append(obs[0,0])
    results['exp'].append(chi_results[3][0,0])
    results['fisher p'].append(fisher_results[1])
    results['chi^2 p'].append(chi_results[1])
    results['alpha'].append(alpha)
    
place_results = pd.DataFrame(results)
place_results['o/e'] = place_results['obs'] / place_results['exp'] 
place_results['fisher sig'] = place_results['fisher p'].apply(lambda x: True if x < alpha else False)
place_results['chi^2 sig'] = place_results['chi^2 p'].apply(lambda x: True if x < alpha else False)

print(f"alpha of 0.05 adjusted for {len(combos)} comparisons = {np.round(alpha,4)}")
place_results.style.background_gradient(cmap ='Reds_r',axis=None,vmax=alpha)

alpha of 0.05 adjusted for 8 comparisons = 0.0062


Unnamed: 0,c1,c2,obs,exp,fisher p,chi^2 p,alpha,o/e,fisher sig,chi^2 sig
0,lab,lab,12,16.063636,0.309155,0.301269,0.00625,0.747029,False,False
1,dor,dor,26,39.252273,0.006298,0.009782,0.00625,0.662382,False,False
2,cor,cor,28,28.363636,1.0,0.975104,0.00625,0.987179,False,False
3,labdor,labdor,11,3.615909,0.000421,9.9e-05,0.00625,3.042112,True,True
4,labdor,lab,2,9.088636,0.005082,0.014977,0.00625,0.220055,True,False
5,labdor,dor,13,16.711364,0.318467,0.356859,0.00625,0.777914,False,False
6,lab,labdor,0,6.390909,0.001304,0.008855,0.00625,0.0,True,False
7,dor,labdor,1,8.493182,0.001054,0.00522,0.00625,0.117742,True,True


### Words

#### labial-labial

In [9]:
df[['ID','IPA','POS','Alt form','Def']].loc[(df['C1 place'] == 'lab') & (df['C2 place'] == 'lab')]

Unnamed: 0,ID,IPA,POS,Alt form,Def
0,2064,bàbá,(s. voc.),,père ! Vader !
1,2065,’ba’ba,(v.),,"tordre, être tordu. Tot wrong draaien of rolle..."
8,2075,’bama,(v.),,"serrer, pincer. Knijpen, vastknellen."
9,2076,bàmbú,(s.),,ceinture large des mVres aprVs l’enfantement. ...
52,2155,bɔ̀bá,(s. cfr.),,papa ! Vader!
58,2161,’bɔmɔ,(v.),,"serrer, pincer ; être serré, être pincé. Toekn..."
431,2910,mbá’bó-,(s.),mbá’bó kɔ̃̀,", : fosse pour prendre des animaux. Valput voo..."
454,2951,mbɔ́bì,(s.),,: rotin mince dônnant des liens forts. Dunne ...
477,2993,mɔmɔ,(v.),,"rire; se moquer de. Lachen, uitlachen."
478,2994,mɔ̀mù,(s.),màmù ; tè dò mòmù,": éclater de rire. In lach uitbarsten, schate..."


#### dorsal-dorsal

In [10]:
df[['ID','IPA','POS','Alt form','Def']].loc[(df['C1 place'] == 'dor') & (df['C2 place'] == 'dor')]

Unnamed: 0,ID,IPA,POS,Alt form,Def
159,2380,gàgá,(s.),,culture en saison sVche en terrain marécageux....
160,2381,gàga̍,(s.),,gouttes de rosée. Lekende dauwdroppels. •*
161,2382,ga̍ga̍,(s.),,cornet. Hoornfluitje met slechts een toon.
170,2393,gàŋga̍,(s.),,"crochet, barbillon. Weerhaak."
225,2509,gòkò,(s.),gògò,", = : dent. Tand."
236,2529,gɔ̀ŋgɔ̀,(s.),,#NAME?
269,2608,kàkà,(s.),,ancien couteau Ngbaka à lame et poignée larges...
270,2609,kàkà,(s.),,"arriVre grand-pVre, — grand’mVre. Overgrootvad..."
284,2632,káŋgé,(s.),,"grand serpent ‘jaune vert, vénimeux. Grote gee..."
294,2647,kèŋgè,(s.),,coin (vu de l’exférieur). Hoek (van buiten).


#### coronal-coronal

In [11]:
df[['ID','IPA','POS','Alt form','Def']].loc[(df['C1 place'] == 'cor') & (df['C2 place'] == 'cor')]

Unnamed: 0,ID,IPA,POS,Alt form,Def
79,2199,dànì,(s.),,"plaie, blessure. Wonde, kwetsuur."
110,2264,dɔ̀ndɔ̀,(s.),,"surface glissante. Gladheid, glibberigheid."
111,2266,dɔ̀nɔ̀,(s.),,"marteau. Hamer, vroeger: Zang ijzer dikker aan..."
119,2279,dunu,(v.),,être rempli ; remplir. Vol zijn; vullen.
128,2295,’dɛnɛ,(v.),,"buter contre, donner des coups de poing, — de ..."
129,2302,’dò’dò-,(s.),,testicules. Teelballen.
130,2303,’dó’dó,(s.),,maladie des cheveux et de la peau. Huid- en ha...
135,2315,’du̍’du̍,(s.),,pian au pied. Pian- wond£ aan de voetzool.
490,3031,ndànà,(s.),,mal aux reins. Lendenpijn.
496,3041,ndɛ̀ndɛ̀,(s.),,fougères. Varenplanten.


#### labial-dorsal - labial-dorsal

In [12]:
df[['ID','IPA','POS','Alt form','Def']].loc[(df['C1 place'] == 'labdor') & (df['C2 place'] == 'labdor')]

Unnamed: 0,ID,IPA,POS,Alt form,Def
173,2412,gbàgbà,(s.),,pont. Brug.
174,2413,gbágbà,(s.),,clôture en pieux ou claies. Afsluiting uit sta...
175,2414,gbàgbá,(s.),,claie de lattes de rachis de palmier ; lattes ...
188,2435,gbàŋbà,(s.),,piVge avec poids écrasant l’animal ou fermant ...
204,2466,gbo̍ŋbo̍,(s.),,rigole. Geul.
205,2468,gbɔ̀gbɔ̀,(s.),,lion. Leeuw.
358,2781,kpòŋbò,(s.),,tabouret indigène. Stoeltje zonder leuning' - ...
359,2785,kpɔ̀kpɔ̀,(s.),,chaussures. Schoeisel.
558,3162,ŋbàŋbò,(s. étr.),,bâton de portage. Draagstok.
566,3175,ŋbéŋbé,(s.),,grelot. Belletje uit samengeplooid ijzeren pla...


#### labial-dorsal - labial

In [13]:
df[['ID','IPA','POS','Alt form','Def']].loc[(df['C1 place'] == 'labdor') & (df['C2 place'] == 'lab')]

Unnamed: 0,ID,IPA,POS,Alt form,Def
203,2465,gbòmbè,(s.),,"marabout. Mara- boet, kropooivaar."
556,3160,ŋbámù,(s.),,antilope huppée à raie dorsale noire. Rugstree...


#### labial-dorsal - dorsal

In [14]:
df[['ID','IPA','POS','Alt form','Def']].loc[(df['C1 place'] == 'labdor') & (df['C2 place'] == 'dor')]

Unnamed: 0,ID,IPA,POS,Alt form,Def
176,2416,gbaka,(v.),,"aider, secourir. Helpen."
177,2419,gbákɔ̍-,(s.),,"branche d’arbre. Tak, vertakking van boom."
187,2434,gbaŋa,(v.),,s’effrayer ; survenir à l’improviste. Verschie...
189,2436,gbàŋgà,(s.),,"battant de porte, panneau d’écorce ou de planc..."
346,2756,kpáŋgà,(s. étr.),,pain de manioc. Maniokbrood; gekookte maniok- ...
349,2765,kpɛ̀kà,(s.),,bistouri pour tatouage. Tatoeëermesje.
350,2766,kpɛ̀kà,"(s., cfr.)",pɛ̀tɛ́kɛ̀lɛ̍,: déclic ■d'un piège. Springstokje van strop.
352,2769,kpɛ̀ŋga̍,(s.),,"lame —, fer d’arme ou d'outil sans manche.. Ij..."
552,3155,ŋbàkà,(s.),,"Ngbaka, peuple ■—, langue Ngbaka. Ngbaka (man)..."
559,3164,ŋbàŋgà,(s. étr.),,"palabre, cause, litige. Geschil, zaak."


#### labial - labial-dorsal

In [15]:
df[['ID','IPA','POS','Alt form','Def']].loc[(df['C1 place'] == 'lab') & (df['C2 place'] == 'labdor')]

Unnamed: 0,ID,IPA,POS,Alt form,Def


#### dorsal - labial-dorsal

In [16]:
df[['ID','IPA','POS','Alt form','Def']].loc[(df['C1 place'] == 'dor') & (df['C2 place'] == 'labdor')]

Unnamed: 0,ID,IPA,POS,Alt form,Def
385,2822,ku̍ŋba̍,(s.),,gros mortier pour piler le maïs. Brede en lage...


## Voicing and Manner restrictions

Definitions:

- **T-D**: voiceless and voiced stop combinations
- **N-ND**: nasal and prenasalized stop combinations
- **D-ND**: oral and prenasalized stop combinations

For all, order is irrelevant.

### Voicing Agreement

In [17]:
voicing = pd.crosstab(df['Homorganic'],
                    df['T-D'])

voicing

T-D,No,Yes
Homorganic,Unnamed: 1_level_1,Unnamed: 2_level_1
homorganic,203,1
non-homorganic,654,22


The one observed token for a homorganic pair that disagrees in voicing is shown below. It has an alternate pronunciation ('Alt form') that does agree in voicing. Stats are run for both forms. The published paper includes the alt form in the main statistics, but notes that this was an alternate form. As will be shown, both results are significant with an unadjusted alpha of 0.05, but only the alt form is significant if alpha is adjusted for the three tests (voicing and two nasal restrictions). 

In [18]:
df[['ID','IPA','POS','Alt form','Def']].loc[(df['T-D'] == 'Yes') & (df['Homorganic'] == 'homorganic')]

Unnamed: 0,ID,IPA,POS,Alt form,Def
225,2509,gòkò,(s.),gògò,", = : dent. Tand."


In [19]:
# cosntruct the alt form 2x2
voicing_alt = voicing.copy()
# remove a count from homorganic + T-D=Yes
voicing_alt.iloc[0,1] = 0
# add a count to homorganic + T-D=No
voicing_alt.iloc[0,0] += 1

voicing_alt

T-D,No,Yes
Homorganic,Unnamed: 1_level_1,Unnamed: 2_level_1
homorganic,204,0
non-homorganic,654,22


### Nasal Agreement



In [20]:
nasality = pd.crosstab(df['Homorganic'],
                    df['N-ND'])

nasality

N-ND,No,Yes
Homorganic,Unnamed: 1_level_1,Unnamed: 2_level_1
homorganic,203,1
non-homorganic,667,9


In [21]:
df[['ID','IPA','POS','Alt form','Def']].loc[(df['N-ND'] == 'Yes') & (df['Homorganic'] == 'homorganic')]

Unnamed: 0,ID,IPA,POS,Alt form,Def
490,3031,ndànà,(s.),,mal aux reins. Lendenpijn.


In [22]:
prenasality = pd.crosstab(df['Homorganic'],
                    df['D-ND'])

prenasality

D-ND,No,Yes
Homorganic,Unnamed: 1_level_1,Unnamed: 2_level_1
homorganic,196,8
non-homorganic,653,23


In [23]:
df[['ID','IPA','POS','Alt form','Def']].loc[(df['D-ND'] == 'Yes') & (df['Homorganic'] == 'homorganic')]

Unnamed: 0,ID,IPA,POS,Alt form,Def
9,2076,bàmbú,(s.),,ceinture large des mVres aprVs l’enfantement. ...
110,2264,dɔ̀ndɔ̀,(s.),,"surface glissante. Gladheid, glibberigheid."
170,2393,gàŋga̍,(s.),,"crochet, barbillon. Weerhaak."
188,2435,gbàŋbà,(s.),,piVge avec poids écrasant l’animal ou fermant ...
204,2466,gbo̍ŋbo̍,(s.),,rigole. Geul.
236,2529,gɔ̀ŋgɔ̀,(s.),,#NAME?
454,2951,mbɔ́bì,(s.),,: rotin mince dônnant des liens forts. Dunne ...
868,2198,dándèà,(s.),,petite hirondelle noire. Gewone zwarte holenz...


### Results

In [24]:
# this will test all homorganic and semihomorganic place combinations against each other
voi_nas_combos = {'Voicing Agreement' : voicing, 
                  'Voicing Agreement (alt)' : voicing_alt,
                  'N-ND Agreement' : nasality, 
                  'D-ND Agreement' : prenasality}

comparisons = len(voi_nas_combos) - 1
alpha = 0.05/comparisons

results = defaultdict(list)

for name, table in voi_nas_combos.items():
    obs = table.to_numpy()
    fisher_results = stats.fisher_exact(obs)
    chi_results = stats.chi2_contingency(obs)
    results['restriction'].append(name)
    results['obs'].append(obs[0,1])
    results['exp'].append(chi_results[3][0,1])
    results['fisher p'].append(fisher_results[1])
    results['chi^2 p'].append(chi_results[1])
    results['alpha'].append(alpha)
    
voi_nas_results = pd.DataFrame(results)
voi_nas_results['o/e'] = voi_nas_results['obs'] / voi_nas_results['exp'] 
voi_nas_results['fisher sig'] = voi_nas_results['fisher p'].apply(lambda x: True if x < alpha else False)
voi_nas_results['chi^2 sig'] = voi_nas_results['chi^2 p'].apply(lambda x: True if x < alpha else False)

print(f"alpha of 0.05 adjusted for {len(combos)} comparisons = {np.round(alpha,4)}")
voi_nas_results.style.background_gradient(cmap ='Reds_r',axis=None,vmax=alpha)

alpha of 0.05 adjusted for 8 comparisons = 0.0167


Unnamed: 0,restriction,obs,exp,fisher p,chi^2 p,alpha,o/e,fisher sig,chi^2 sig
0,Voicing Agreement,1,5.331818,0.024894,0.055034,0.016667,0.187553,False,False
1,Voicing Agreement (alt),0,5.1,0.004027,0.018591,0.016667,0.0,True,False
2,N-ND Agreement,1,2.318182,0.468088,0.537478,0.016667,0.431373,False,False
3,D-ND Agreement,8,7.186364,0.670087,0.891898,0.016667,1.113219,False,False
