# Phylogenetic collocation analysis

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import chi2_contingency

import matplotlib.pyplot as plt
import seaborn as sns

import networkx as nx

import svgutils.compose as sc
import svgutils.transform as sg

% matplotlib inline

In [None]:
df0 = pd.read_csv('fish-patterns-db-img.csv')

In [None]:
print(len(set(df0['family'])), " families")
print(len(set(df0['genus'])), " genera")
print(len(set(df0['species'])), " species")
print(len(df0), "images")

In [None]:
df_sp = df0.groupby(['genus', 'species']).sum().applymap(lambda x: 1 if x>0 else 0)
spnum = df_sp.reset_index().groupby('genus').count()['species'].rename('sp_num')
df_gen = df_sp.groupby(['genus']).sum().applymap(lambda x: 1 if x>0 else 0)

df_gen_spnum = pd.merge(spnum.to_frame(), df_gen, on='genus')
df_gen_sp2 = df_gen[df_gen_spnum['sp_num']>=2]
print(len(df_gen_sp2), " genera (incl. >=2 sp.)")

df_gen_sp3 = df_gen[df_gen_spnum['sp_num']>=3]
print(len(df_gen_sp3), " genera (incl. >=3 sp.)")


In [None]:
df_gen_comat = df_gen.T.dot(df_gen)
df_gen_comat

In [None]:
df_gen_sp2_comat = df_gen_sp2.T.dot(df_gen_sp2)
df_gen_sp2_comat

In [None]:
df_gen_sp3_comat = df_gen_sp3.T.dot(df_gen_sp3)
df_gen_sp3_comat

### Contingency table

| Motifs        |B present (1) |B absent (0) |
|:-------------:|:-----------:|:------------:|
| A present (1) |O<sub>11</sub>|O<sub>10</sub>|A1
| A absent  (0) |O<sub>01</sub>|O<sub>00</sub>|A0
|               |B1            |B0            |N

In [None]:
ptns = ["Mono",
        "Bltc",
        "Sp_D",
        "Sp_L",
        "Maze",
        "St_H",
        "St_D",
        "St_V",
        "Sddl",
        "Eyes",
        "Area"]

labels = ["Mono",
          "Bltc",
          "Sp-D",
          "Sp-L",
          "Maze",
          "St-H",
          "St-D",
          "St-V",
          "Sddl",
          "Eyes",
          "Area"]

svgfiles = ["ptns_svg/ptn_mono.svg",
            "ptns_svg/ptn_blotch.svg",
            "ptns_svg/ptn_spot_dark.svg",
            "ptns_svg/ptn_spot_light.svg",
            "ptns_svg/ptn_stripe_maze.svg", 
            "ptns_svg/ptn_stripe_horz.svg",
            "ptns_svg/ptn_stripe_diag.svg",
            "ptns_svg/ptn_stripe_vert.svg",
            "ptns_svg/ptn_saddle.svg",
            "ptns_svg/ptn_eyespot.svg",
            "ptns_svg/ptn_area_fill.svg"]

colors = ["silver",
          "dimgray",
          "mediumseagreen",
          "tomato",
          "gold",
          "turquoise",
          "royalblue",
          "darkslateblue",
          "mediumorchid",
          "pink",
          "sienna"]


In [None]:
def contingency_mat(ptnA, ptnB):
    n = len(df_gen_sp2)
    nA = df_gen_sp2_comat.loc[ptnA, ptnA]
    nB = df_gen_sp2_comat.loc[ptnB, ptnB]
    kAB = df_gen_sp2_comat.loc[ptnA, ptnB]

    obs = np.array([[n-kAB-(nA-kAB)-(nB-kAB), nB-kAB], [nA-kAB, kAB]])
    return obs


In [None]:
# Jaccard index
df_gen_Jaccard = df_gen_sp2_comat.copy()

# Sørensen-Dice coefficient
df_gen_Dice = df_gen_sp2_comat.copy()

# Overlap coefficient (Szymkiewicz–Simpson coefficient)
df_gen_OC = df_gen_sp2_comat.copy()

# log-likelihood
df_gen_LL = df_gen_sp2_comat.copy()

# Z-score (Dennis 1965)
df_gen_Z = df_gen_sp2_comat.copy()

# T-score (Church et al.1991)
df_gen_T = df_gen_sp2_comat.copy()

for i in range(11):
    for j in range(11):
        obs = contingency_mat(ptns[i], ptns[j])

        N = obs[1,1] + obs[1,0] + obs[0,1] + obs[0,0]
        A1 = obs[1,1] + obs[1,0]
        A0 = obs[0,1] + obs[0,0]
        B1 = obs[1,1] + obs[0,1]
        B0 = obs[1,0] + obs[0,0]
        
        df_gen_Jaccard.loc[ptns[i], ptns[j]] = obs[1, 1]/(obs[1, 1]+obs[1, 0]+obs[0, 1])
        df_gen_Dice.loc[ptns[i], ptns[j]] = 2*obs[1, 1]/(obs[1, 1]+obs[1, 0]+obs[1, 1] + obs[0, 1])
        df_gen_OC.loc[ptns[i], ptns[j]] = obs[1, 1]/min(A1, B1)
        
        LL, p, dof, ex = chi2_contingency(obs, correction=False, lambda_='log-likelihood')
        df_gen_LL.loc[ptns[i], ptns[j]] = LL
       
        # df_gen_Z.loc[ptns[i], ptns[j]] = (obs[1, 1] - ex[1, 1]) / np.sqrt(ex[1, 1]*(1-(A1*B1/N**2)))
        df_gen_Z.loc[ptns[i], ptns[j]] = (obs[1, 1] - ex[1, 1]) / np.sqrt(ex[1, 1])
        
        df_gen_T.loc[ptns[i], ptns[j]] = (obs[1, 1] - ex[1, 1]) / np.sqrt(obs[1, 1])
        

In [None]:
def svg_node(svgfile, nodepos=(0, 0), scale=1.0, r=1.0, mx=0.0, my=0.0, rot=0.0):
    svgFig = sg.fromfile(svgfile)

    (fw, fh) = svgFig.get_size()
    (fw, fh) = (str(sc.Unit(fw).to('px')), str(sc.Unit(fh).to('px')))
    (fw, fh) = (float(fw[:-2]), float(fh[:-2]))
    
    trans = plt.gca().transData.transform
    
    (fx, fy) = trans((nodepos[0]*r, -nodepos[1]*r))
    
    out_svgFig = sc.SVG(svgfile)
    out_svgFig.rotate(rot, fw/2.0, fh/2.0)
    out_svgFig = out_svgFig.scale(scale).move(fx-(fw*scale)/2.0 + mx, fy-(fh*scale)/2.0 + my)
    
    return out_svgFig


In [None]:
def make_nx_11(measure, dfgen, th, cmap):
    sns.set(style="white", context="talk")
    fig = plt.figure(figsize=(8, 8))
    ax = fig.add_subplot(111)

    tempfile = 'temp_11_' + measure + '.svg'
    svgfile = 'ptns_network_11_' + measure + '.svg'
    pdffile = 'ptns_network_11_' + measure + '.pdf'

    G = nx.Graph()

    for i in range(11):
        G.add_node(labels[i])
    
    for i in range(11):
        for j in range(11):
            if (i!=j):
                if dfgen.iloc[i,j] >= th:
                    G.add_edge(labels[i], labels[j], weight=dfgen.iloc[i,j])

    edge_Ws = np.array([edge[2]['weight'] for edge in G.edges(data=True)])

    pos = nx.circular_layout(G, scale=0.8)

    nx.draw_networkx_nodes(G, pos,
                           node_shape='o',
                           node_size=500,
                           node_color='white',
                           linewidths=5.0,
                           edgecolors=colors)
    nx.draw_networkx_edges(G, pos,
                           width=15*edge_Ws/max(edge_Ws),
                           alpha=0.8,
                           edge_cmap=cmap,
                           edge_vmin=0.0,
                           edge_vmax=max(edge_Ws),
                           edge_color=edge_Ws)

    plt.axis('off')

    plt.savefig(tempfile, transparent=True)

    scale=0.9
    r=1.35
    mx=30
    my=30

    svg_nodes = [svg_node(svgfiles[i], pos[labels[i]], scale, r, mx, my, 0) for i in range(11)]
    svg_nodes[0] = svg_node(svgfiles[0], pos[labels[0]], scale, r, mx, my, -3)     # Mono
    svg_nodes[1] = svg_node(svgfiles[1], pos[labels[1]], scale, r, mx, my, 10)     # Blch
    svg_nodes[2] = svg_node(svgfiles[2], pos[labels[2]], scale, r, mx, my, 5)     # Sp_D
    svg_nodes[3] = svg_node(svgfiles[3], pos[labels[3]], scale, r, mx, my+5, 15)     # Sp_L
    svg_nodes[4] = svg_node(svgfiles[4], pos[labels[4]], scale, r, mx, my, 5)     # Maze
    svg_nodes[5] = svg_node(svgfiles[5], pos[labels[5]], scale, r, mx, my, 5)     # St_H
    svg_nodes[6] = svg_node(svgfiles[6], pos[labels[6]], scale, r, mx, my, -10)     # St_D
    svg_nodes[7] = svg_node(svgfiles[7], pos[labels[7]], scale, r, mx, my, 0)     # St_V
    svg_nodes[8] = svg_node(svgfiles[8], pos[labels[8]], scale, r, mx, my-10, -5)     # Sddl
    svg_nodes[9] = svg_node(svgfiles[9], pos[labels[9]], scale, r, mx, my-8, 0)     # Eyes
    svg_nodes[10] = svg_node(svgfiles[10], pos[labels[10]], scale, r, mx, my, 10)     # Area

    sc.Figure("18.5cm", "18cm",
              sc.SVG(tempfile).move(mx, my),
              svg_nodes[0],
              svg_nodes[1],
              svg_nodes[2],
              svg_nodes[3],
              svg_nodes[4],
              svg_nodes[5],
              svg_nodes[6],
              svg_nodes[7],
              svg_nodes[8],
              svg_nodes[9],
              svg_nodes[10],
             ).save(svgfile)
    
    !inkscape --export-pdf=$pdffile $svgfile

    return svgfile


In [None]:
def make_nx_7(measure, dfgen, th, cmap):
    sns.set(style="white", context="talk")
    fig = plt.figure(figsize=(8, 8))
    ax = fig.add_subplot(111)

    tempfile = 'temp_7_' + measure + '.svg'
    svgfile = 'ptns_network_7_' + measure + '.svg'
    pdffile = 'ptns_network_7_' + measure + '.pdf'

    G = nx.DiGraph()

    for i in [1, 2, 3, 4, 5, 6, 7]:
        G.add_node(labels[i])
    
    for i in [1, 2, 3, 4, 5, 6]:
        for j in range(i, 7):
            if (i!=j):
                if dfgen.iloc[i,j] >= th:
                    G.add_edge(labels[i], labels[j], weight=dfgen.iloc[i,j])
    for i in [1, 2, 3, 4, 5]:
        for j in [7]:
            if dfgen.iloc[j,i] >= th:
                G.add_edge(labels[j], labels[i], weight=dfgen.iloc[j,i])
    for i in [6]:
        for j in [7]:
            if dfgen.iloc[i,j] >= th:
                G.add_edge(labels[i], labels[j], weight=dfgen.iloc[i,j])

    edge_Ws = np.array([edge[2]['weight'] for edge in G.edges(data=True)])

    pos = nx.shell_layout(G, scale=1.0)

    nx.draw_networkx_nodes(G, pos,
                           node_shape='o',
                           node_size=800,
                           node_color='white',
                           linewidths=10.0,
                           edgecolors=colors[1:8])

    nx.draw_networkx_edges(G, pos,
                           width=15*edge_Ws/max(edge_Ws),
                           alpha=0.8,
                           edge_cmap=cmap,
                           edge_vmin=0.0,
                           edge_vmax=max(edge_Ws),
                           edge_color=edge_Ws,
                           arrows=True,
                           arrowstyle='-',
                           arrowsize=10,
                           connectionstyle='arc3, rad=-0.12')

    # nx.draw_networkx_labels(G, pos, font_size=16)

    plt.axis('off')

    plt.savefig(tempfile, transparent=True)

    scale=0.9
    r=1.35
    mx=30
    my=30

    svg_nodes = [svg_node(svgfiles[i], pos[labels[i]], scale, r, mx, my, 0) for i in range(1, 8)]
    svg_nodes[0] = svg_node(svgfiles[1], pos[labels[1]], scale, r, mx+8, my, 15)     # Blch
    svg_nodes[1] = svg_node(svgfiles[2], pos[labels[2]], scale, r, mx, my-5, 15)     # Sp_D
    svg_nodes[2] = svg_node(svgfiles[3], pos[labels[3]], scale, r, mx+5, my, 10)     # Sp_L
    svg_nodes[3] = svg_node(svgfiles[4], pos[labels[4]], scale, r, mx+5, my-7, -15)     # Maze
    svg_nodes[4] = svg_node(svgfiles[5], pos[labels[5]], scale, r, mx+7, my+5, 5)     # St_H
    svg_nodes[5] = svg_node(svgfiles[6], pos[labels[6]], scale, r, mx+10, my-5, 0)     # St_D
    svg_nodes[6] = svg_node(svgfiles[7], pos[labels[7]], scale, r, mx, my-5, 0)     # St_V

    sc.Figure("18.5cm", "18cm",
              sc.SVG(tempfile).move(mx, my),
              svg_nodes[0],
              svg_nodes[1],
              svg_nodes[2],
              svg_nodes[3],
              svg_nodes[4],
              svg_nodes[5],
              svg_nodes[6],
             ).save(svgfile)

    !inkscape --export-pdf=$pdffile $svgfile

    return svgfile


### Co-occurrence frequency

In [None]:
df_gen_comat

In [None]:
sns.set_context('talk')
plt.figure(figsize=(15, 12))

mask = np.eye(df_gen_comat.shape[0], dtype=np.bool)
ax = sns.heatmap(df_gen_comat,
                 mask=mask,
                 vmax=700,
                 square=True,
                 fmt='d',
                 annot=True,
                 linewidth=1.0,
                 cmap='Greys',
                 rasterized=True)

plt.title('Co-occurrence frequency', fontsize=24)

ax.set_xticklabels(labels, rotation=0, fontsize=22)
ax.set_yticklabels(labels, rotation=0, fontsize=22)

plt.show()

In [None]:
make_nx_11('Freq', df_gen_comat, 0, plt.cm.Greys)

In [None]:
make_nx_7('Freq', df_gen_comat, 0, plt.cm.Greys)

### Jaccard index

In [None]:
df_gen_Jaccard

In [None]:
sns.set_context('talk')
plt.figure(figsize=(15, 12))

mask = np.eye(df_gen_Jaccard.shape[0], dtype=np.bool)
ax = sns.heatmap(df_gen_Jaccard,
                 mask=mask,
                 vmax = 0.40,
                 square=True,
                 fmt='.2f',
                 annot=True,
                 linewidth=1.0,
                 cmap='Purples',
                 rasterized=True)

plt.title('Jaccard index', fontsize=24)

ax.set_xticklabels(labels, rotation=0, fontsize=22)
ax.set_yticklabels(labels, rotation=0, fontsize=22)

plt.show()

In [None]:
make_nx_11('Jaccard', df_gen_Jaccard, 0.12, plt.cm.Purples)

In [None]:
make_nx_7('Jaccard', df_gen_Jaccard, 0.0, plt.cm.Purples)

### Sørensen–Dice coefficient

In [None]:
df_gen_Dice

In [None]:
sns.set_context('talk')
plt.figure(figsize=(15, 12))

mask = np.eye(df_gen_Dice.shape[0], dtype=np.bool)
ax = sns.heatmap(df_gen_Dice,
                 mask=mask,
                 vmax = 0.6,
                 square=True,
                 fmt='.2f',
                 annot=True,
                 linewidth=1.0,
                 cmap='Greens',
                 rasterized=True)

plt.title('Sørensen–Dice coefficient', fontsize=24)

ax.set_xticklabels(labels, rotation=0, fontsize=22)
ax.set_yticklabels(labels, rotation=0, fontsize=22)

plt.show()

In [None]:
make_nx_11('Dice', df_gen_Dice, 0.25, plt.cm.Greens)

In [None]:
make_nx_7('Dice', df_gen_Dice, 0.0, plt.cm.Greens)

### Overlap coefficient (Szymkiewicz–Simpson coefficient)

In [None]:
df_gen_OC

In [None]:
sns.set_context('talk')
plt.figure(figsize=(15, 12))

mask = np.eye(df_gen_OC.shape[0], dtype=np.bool)
ax = sns.heatmap(df_gen_OC,
                 mask=mask,
                 vmax = 0.8,
                 square=True,
                 fmt='.2f',
                 annot=True,
                 linewidth=1.0,
                 cmap='RdPu',
                 rasterized=True)

plt.title('Overlap coefficient', fontsize=24)

ax.set_xticklabels(labels, rotation=0, fontsize=22)
ax.set_yticklabels(labels, rotation=0, fontsize=22)

plt.show()

In [None]:
make_nx_11('OC', df_gen_OC, 0.3, plt.cm.RdPu)

In [None]:
make_nx_7('OC', df_gen_OC, 0.0, plt.cm.RdPu)

### Log-likelihood

In [None]:
df_gen_LL

In [None]:
sns.set_context('talk')
plt.figure(figsize=(15, 12))

mask = np.eye(df_gen_LL.shape[0], dtype=np.bool)
ax = sns.heatmap(df_gen_LL,
                 mask=mask,
                 vmax = 270,
                 square=True,
                 fmt='.1f',
                 annot=True,
                 linewidth=1.0,
                 cmap='Reds',
                 rasterized=True)

plt.title('Log-likelihood', fontsize=24)

ax.set_xticklabels(labels, rotation=0, fontsize=22)
ax.set_yticklabels(labels, rotation=0, fontsize=22)

plt.show()

In [None]:
make_nx_11('LL', df_gen_LL, 3.84, plt.cm.Reds)

In [None]:
make_nx_7('LL', df_gen_LL, 0, plt.cm.Reds)

### Z-score (Dennis1965)

In [None]:
df_gen_Z

In [None]:
sns.set_context('talk')
plt.figure(figsize=(15, 12))

mask = np.eye(df_gen_Z.shape[0], dtype=np.bool)
ax = sns.heatmap(df_gen_Z, mask=mask,
                 vmin = 0.0,
                 vmax = 12.5,
                 square=True,
                 fmt='.1f',
                 annot=True,
                 linewidth=1.0,
                 cmap='Blues',
                 rasterized=True)
plt.title('Z-score (Dennis1965)', fontsize=24)

ax.set_xticklabels(labels, rotation=0, fontsize=22)
ax.set_yticklabels(labels, rotation=0, fontsize=22)

plt.show()

In [None]:
make_nx_11('Z', df_gen_Z, 1.96, plt.cm.Blues)

In [None]:
make_nx_7('Z', df_gen_Z, 0, plt.cm.Blues)

### T-score

In [None]:
df_gen_T

In [None]:
sns.set_context('talk')
plt.figure(figsize=(15, 12))

mask = np.eye(df_gen_T.shape[0], dtype=np.bool)
ax = sns.heatmap(df_gen_T, mask=mask,
                 vmin = 0.0,
                 vmax = 8.5,
                 square=True,
                 fmt='.1f',
                 annot=True,
                 linewidth=1.0,
                 cmap='Oranges',
                 rasterized=True)
plt.title('T-score', fontsize=24)

ax.set_xticklabels(labels, rotation=0, fontsize=22)
ax.set_yticklabels(labels, rotation=0, fontsize=22)

plt.show()

In [None]:
make_nx_11('T', df_gen_T, 1.96, plt.cm.Oranges)

In [None]:
make_nx_7('T', df_gen_T, 0.0, plt.cm.Oranges)

In [None]:
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
from mpl_toolkits.axes_grid1.colorbar import colorbar

def combi_hm(df_l, measure_l, label_l, cmap_l, vmax_l, 
             df_u, measure_u, label_u, cmap_u, vmax_u):

    sns.set_context('talk')
    plt.figure(figsize=(15, 12))
    plt.yticks=True

    svgfile = 'co-occ_' + measure_l + '-' + measure_u + '_200713.svg'
    pdffile = 'co-occ_' + measure_l + '-' + measure_u + '_200713.pdf'
    
    mask_u = np.eye(df_l.shape[0], dtype=np.bool)
    mask_l = np.eye(df_l.shape[0], dtype=np.bool)
    mask_u[np.triu_indices_from(mask_u)] = True
    mask_l[np.tril_indices_from(mask_l)] = True

    ax = sns.heatmap(df_l,
                     mask=mask_u,
                     vmax=vmax_l, 
                     square=True,
                     linewidth=0.0,
                     cmap=cmap_l,
                     cbar=False)

    sns.heatmap(df_u,
                mask=mask_l,
                vmax=vmax_u, 
                square=True,
                linewidth=0.0,
                ax=ax,
                cmap=cmap_u,
                cbar=False)

    ax_divider = make_axes_locatable(ax)
    cax1 = ax_divider.append_axes('bottom', size='5%', pad='5%')
    cax2 = ax_divider.append_axes('right', size='5%', pad='5%')

    cbar1 = colorbar(ax.get_children()[0], cax=cax1, orientation='horizontal')
    cbar1.solids.set_rasterized(True)
    cax1.set_xlabel(label_l, fontsize=32)
    cax1.yaxis.set_label_position('right')
    cax1.tick_params(length=0)

    cbar2 = colorbar(ax.get_children()[1], cax=cax2, orientation='vertical')
    cbar2.solids.set_rasterized(True)
    cax2.set_ylabel(label_u, fontsize=32)
    cax2.xaxis.set_label_position('bottom')
    cax2.tick_params(length=0)

    for i in range(10):
        ax.axhline(i+1, linewidth=2.0, color='black')
        ax.axvline(10-i, linewidth=2.0, color='black')
    
    
    # ax.hlines([1, 2], *ax.get_xlim())
    ax.xaxis.set_ticks_position('top')
    ax.set_xticklabels(labels, fontsize=32, rotation=60, fontname='monospace')
    ax.set_yticklabels(labels, fontsize=32, rotation=0, fontname='monospace')

    # plt.show()
    plt.savefig(pdffile, bbox_inches="tight", pad_inches=0.1)

In [None]:
combi_hm(df_gen_LL, 'LL', 'Log-likelihood', 'Reds', 270,
         df_gen_Jaccard, 'JI', 'Jaccard', 'Purples', 0.35)

In [None]:
combi_hm(df_gen_T, 'T', 'T-score', 'Oranges', 8.5,
         df_gen_Dice, 'Dice', 'Sørensen–Dice coefficient', 'Greens', 0.50)

In [None]:
combi_hm(df_gen_comat, 'Freq', 'Co-occurrence frequency', 'Greys', 700,
         df_gen_OC, 'OC', 'Simpson similarity index', 'RdPu', 0.8)

In [None]:
combi_hm(df_gen_Z, 'Z', 'Z-score', 'Blues', 12.5,
         df_gen_comat, 'Freq', 'Co-occurrence frequency', 'Greys', 700)

## Triple co-occurrence analysis
### (Sp_L & Sp_D) vs pattern C

In [None]:
def co_occ_sp3_measures_vs_AB(A='Sp_L', B='Sp_D', C='Maze'):
    ABC = len(df_gen_sp3[(df_gen_sp3[A]==1) & (df_gen_sp3[B]==1) & (df_gen_sp3[C]==1)])
    abC = len(df_gen_sp3[((df_gen_sp3[A]==0) | (df_gen_sp3[B]==0)) & (df_gen_sp3[C]==1)])
    ABc = len(df_gen_sp3[(df_gen_sp3[A]==1) & (df_gen_sp3[B]==1) & (df_gen_sp3[C]==0)])
    abc = len(df_gen_sp3[((df_gen_sp3[A]==0) | (df_gen_sp3[B]==0)) & (df_gen_sp3[C]==0)])

    cntmat = np.array([[ABC, abC], [ABc, abc]])
    
    Freq = ABC

    JI = ABC / (ABC + abC + ABc)
    Dice = 2*ABC / ((ABC+abC) + (ABC+ABc))
    OC = ABC / min((ABC+ABc), (ABC+abC))

    LL, p, dof, ex = chi2_contingency(cntmat, correction=False, lambda_='log-likelihood')
    Z = (ABC - ex[0, 0]) / np.sqrt(ex[0, 0])
    T = (ABC - ex[0, 0]) / np.sqrt(ABC)

    return cntmat, Freq, JI, Dice, OC, LL, Z, T

In [None]:
A = 'Sp_L'
B = 'Sp_D'

df_coocc_sp3_vs_Sp_LD = pd.DataFrame(
    [co_occ_sp3_measures_vs_AB(A, B, 'Mono')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Bltc')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Maze')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'St_H')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'St_D')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'St_V')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Sddl')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Eyes')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Area')[1:]],
    index=["Mono", "Bltc", 'Maze', 'St-H', 'St-D', 'St-V', 'Sddl', "Eyes", "Area"],
    columns=['Freq', 'Jaccard', 'Dice', 'Simpson', 'Log-likelihood', 'Z-score', 'T-score']
)


In [None]:
df_coocc_sp3_vs_Sp_LD

In [None]:
sns.set_context("talk")
sns.set_style("ticks")

fig, axes = plt.subplots(3, 1, figsize=(3.5, 7), sharex=True)

sns.barplot(x=df_coocc_sp3_vs_Sp_LD['Z-score'].index,
            y=df_coocc_sp3_vs_Sp_LD['Z-score'],
            color=sns.color_palette('Blues')[3],
            ax=axes[2])
axes[2].set_ylabel("Z-score", fontsize=18)
axes[2].tick_params(labelsize=11)

sns.barplot(x=df_coocc_sp3_vs_Sp_LD['Log-likelihood'].index,
            y=df_coocc_sp3_vs_Sp_LD['Log-likelihood'],
            color=sns.color_palette('Reds')[3],
            ax=axes[1])
axes[1].set_ylabel("Log-likelihood", fontsize=18)
axes[1].tick_params(labelsize=11)

sns.barplot(x=df_coocc_sp3_vs_Sp_LD['Jaccard'].index,
            y=df_coocc_sp3_vs_Sp_LD['Jaccard'],
            color=sns.color_palette('Purples')[3],
            ax=axes[0])
axes[0].set_ylabel("Jaccard", fontsize=18)
axes[0].tick_params(labelsize=11)

plt.xlim(-0.6, 8.6)
plt.xticks=True
plt.setp(axes[2].xaxis.get_majorticklabels(), rotation=90, fontsize=18)


plt.show()
# plt.savefig('co-occ_triple_Z-LL-JI.pdf', bbox_inches="tight", pad_inches=0.1)

In [None]:
sns.set_context("talk")
sns.set_style("ticks")

fig, axes = plt.subplots(1, 4, figsize=(12, 4), sharey=True)

sns.barplot(x=df_coocc_sp3_vs_Sp_LD['Freq'].iloc[::-1],
            y=df_coocc_sp3_vs_Sp_LD['Freq'].iloc[::-1].index,
            color=sns.color_palette('Greys')[2],
            ax=axes[0])
axes[0].set_xlabel("Raw freq")

sns.barplot(x=df_coocc_sp3_vs_Sp_LD['Simpson'].iloc[::-1],
            y=df_coocc_sp3_vs_Sp_LD['Simpson'].iloc[::-1].index,
            color=sns.color_palette('RdPu')[3],
            ax=axes[1])
axes[1].set_xlabel("Simpson")

sns.barplot(x=df_coocc_sp3_vs_Sp_LD['T-score'].iloc[::-1],
            y=df_coocc_sp3_vs_Sp_LD['T-score'].iloc[::-1].index,
            color=sns.color_palette('Oranges')[3],
            ax=axes[2])
axes[2].set_xlabel("T-score")

sns.barplot(x=df_coocc_sp3_vs_Sp_LD['Dice'].iloc[::-1],
            y=df_coocc_sp3_vs_Sp_LD['Dice'].iloc[::-1].index,
            color=sns.color_palette('Greens')[3],
            ax=axes[3])
axes[3].set_xlabel("Sørensen–Dice")

plt.ylim(-0.6, 8.6)
plt.yticks=True
plt.setp(axes[3].yaxis.get_majorticklabels(), rotation=90)

plt.show()
# plt.savefig('co-occ_triple_Freq-OC-T-Dice.pdf', bbox_inches="tight", pad_inches=0.1)

#### Response to Reviews
## Triple co-occurrence analysis
### (motif A & motif B) vs motif C

In [None]:
def tri_allplots(df, ptns, savefile="out.pdf"):
    sns.set_context("talk")
    sns.set_style("ticks")

    fig, axes = plt.subplots(1, 7, figsize=(16, 4), sharey=True)

    sns.barplot(x=df['Freq'].iloc[::-1],
                y=df['Freq'].iloc[::-1].index,
                color=sns.color_palette('Greys')[2],
                ax=axes[0])
    axes[0].set_xlabel("Raw freq")
    axes[0].set_ylabel(ptns)
    axes[0].set_xlim(0, 280)
    
    sns.barplot(x=df['Simpson'].iloc[::-1],
                y=df['Simpson'].iloc[::-1].index,
                color=sns.color_palette('RdPu')[3],
                ax=axes[1])
    axes[1].set_xlabel("Simpson")
    axes[1].set_xlim(0, 0.8)
    
    sns.barplot(x=df['Jaccard'].iloc[::-1],
                y=df['Jaccard'].iloc[::-1].index,
                color=sns.color_palette('Purples')[3],
                ax=axes[2])
    axes[2].set_xlabel("Jaccard")
    axes[2].set_xlim(0, 0.35)
    
    sns.barplot(x=df['Dice'].iloc[::-1],
                y=df['Dice'].iloc[::-1].index,
                color=sns.color_palette('Greens')[3],
                ax=axes[3])
    axes[3].set_xlabel("Dice")
    axes[3].set_xlim(0, 0.52)
    
    sns.barplot(x=df['Z-score'].iloc[::-1],
                y=df['Z-score'].iloc[::-1].index,
                color=sns.color_palette('Blues')[3],
                ax=axes[4])
    axes[4].set_xlabel("Z-score")
    axes[4].set_xlim(-3.0, 12.5)
    
    sns.barplot(x=df['T-score'].iloc[::-1],
                y=df['T-score'].iloc[::-1].index,
                color=sns.color_palette('Oranges')[3],
                ax=axes[5])
    axes[5].set_xlabel("T-score")
    axes[5].set_xlim(-3.5, 8.0)
    
    sns.barplot(x=df['Log-likelihood'].iloc[::-1],
                y=df['Log-likelihood'].iloc[::-1].index,
                color=sns.color_palette('Reds')[3],
                ax=axes[6])
    axes[6].set_xlabel("Log-likelihood")
    axes[6].set_xlim(0, 200)
    
    # plt.suptitle('Triple co-occurrence with ' + ptns)
    plt.ylim(-0.6, 8.6)
    plt.yticks=True
    plt.setp(axes[6].yaxis.get_majorticklabels(), rotation=90)
    
    plt.show()
    # plt.savefig(savefile, bbox_inches="tight", pad_inches=0.1)

In [None]:
tri_allplots(df_coocc_sp3_vs_Sp_LD, "(Sp-D, Sp-L) +", "co-occ_triple_Sp-LD_Raw-OC-JI-Dice-Z-T-LL.pdf")

In [None]:
A = 'St_H'
B = 'St_V'

df_coocc_sp3_vs_St_HV = pd.DataFrame(
    [co_occ_sp3_measures_vs_AB(A, B, 'Mono')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Bltc')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Sp_D')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Sp_L')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Maze')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'St_D')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Sddl')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Eyes')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Area')[1:]],
    index=["Mono", "Bltc", 'Sp-D', 'Sp-L', 'Maze', 'St-D', 'Sddl', "Eyes", "Area"],
    columns=['Freq', 'Jaccard', 'Dice', 'Simpson', 'Log-likelihood', 'Z-score', 'T-score']
)

tri_allplots(df_coocc_sp3_vs_St_HV, "(St-H, St-V) +", "co-occ_triple_St-HV_Raw-OC-JI-Dice-Z-T-LL.pdf")

In [None]:
A = 'St_H'
B = 'St_D'

df_coocc_sp3_vs_St_HD = pd.DataFrame(
    [co_occ_sp3_measures_vs_AB(A, B, 'Mono')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Bltc')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Sp_D')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Sp_L')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Maze')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'St_V')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Sddl')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Eyes')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Area')[1:]],
    index=["Mono", "Bltc", 'Sp-D', 'Sp-L', 'Maze', 'St-V', 'Sddl', "Eyes", "Area"],
    columns=['Freq', 'Jaccard', 'Dice', 'Simpson', 'Log-likelihood', 'Z-score', 'T-score']
)

tri_allplots(df_coocc_sp3_vs_St_HD, "(St-H, St-D) +", "co-occ_triple_St-HD_Raw-OC-JI-Dice-Z-T-LL.pdf")

In [None]:
A = 'St_V'
B = 'St_D'

df_coocc_sp3_vs_St_VD = pd.DataFrame(
    [co_occ_sp3_measures_vs_AB(A, B, 'Mono')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Bltc')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Sp_D')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Sp_L')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Maze')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'St_H')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Sddl')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Eyes')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Area')[1:]],
    index=["Mono", "Bltc", 'Sp-D', 'Sp-L', 'Maze', 'St-H', 'Sddl', "Eyes", "Area"],
    columns=['Freq', 'Jaccard', 'Dice', 'Simpson', 'Log-likelihood', 'Z-score', 'T-score']
)

tri_allplots(df_coocc_sp3_vs_St_VD, "(St-V, St-D) +", "co-occ_triple_St-VD_Raw-OC-JI-Dice-Z-T-LL.pdf")

In [None]:
A = 'St_H'
B = 'Sp_D'

df_coocc_sp3_vs_St_H_Sp_D = pd.DataFrame(
    [co_occ_sp3_measures_vs_AB(A, B, 'Mono')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Bltc')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Sp_L')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Maze')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'St_D')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'St_V')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Sddl')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Eyes')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Area')[1:]],
    index=["Mono", "Bltc", 'Sp-L', 'Maze', 'St-D', 'St-V', 'Sddl', "Eyes", "Area"],
    columns=['Freq', 'Jaccard', 'Dice', 'Simpson', 'Log-likelihood', 'Z-score', 'T-score']
)

tri_allplots(df_coocc_sp3_vs_St_HD, "(St-H, Sp-D) +", "co-occ_triple_St-H_Sp-D_Raw-OC-JI-Dice-Z-T-LL.pdf")

In [None]:
A = 'St_H'
B = 'Sp_L'

df_coocc_sp3_vs_St_H_Sp_L = pd.DataFrame(
    [co_occ_sp3_measures_vs_AB(A, B, 'Mono')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Bltc')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Sp_D')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Maze')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'St_D')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'St_V')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Sddl')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Eyes')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Area')[1:]],
    index=["Mono", "Bltc", 'Sp-D', 'Maze', 'St-D', 'St-V', 'Sddl', "Eyes", "Area"],
    columns=['Freq', 'Jaccard', 'Dice', 'Simpson', 'Log-likelihood', 'Z-score', 'T-score']
)

tri_allplots(df_coocc_sp3_vs_St_H_Sp_L, "(St-H, Sp-L) +", "co-occ_triple_St-H_Sp-L_Raw-OC-JI-Dice-Z-T-LL.pdf")

In [None]:
A = 'St_V'
B = 'Sp_D'

df_coocc_sp3_vs_St_V_Sp_D = pd.DataFrame(
    [co_occ_sp3_measures_vs_AB(A, B, 'Mono')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Bltc')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Sp_L')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Maze')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'St_H')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'St_D')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Sddl')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Eyes')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Area')[1:]],
    index=["Mono", "Bltc", 'Sp-L', 'Maze', 'St-H', 'St-D', 'Sddl', "Eyes", "Area"],
    columns=['Freq', 'Jaccard', 'Dice', 'Simpson', 'Log-likelihood', 'Z-score', 'T-score']
)

tri_allplots(df_coocc_sp3_vs_St_V_Sp_D, "(St-V, Sp-D) +", "co-occ_triple_St-V_Sp-D_Raw-OC-JI-Dice-Z-T-LL.pdf")

In [None]:
A = 'St_V'
B = 'Sp_L'

df_coocc_sp3_vs_St_V_Sp_L = pd.DataFrame(
    [co_occ_sp3_measures_vs_AB(A, B, 'Mono')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Bltc')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Sp_D')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Maze')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'St_H')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'St_D')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Sddl')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Eyes')[1:],
     co_occ_sp3_measures_vs_AB(A, B, 'Area')[1:]],
    index=["Mono", "Bltc", 'Sp-D', 'Maze', 'St-H', 'St-D','Sddl', "Eyes", "Area"],
    columns=['Freq', 'Jaccard', 'Dice', 'Simpson', 'Log-likelihood', 'Z-score', 'T-score']
)

tri_allplots(df_coocc_sp3_vs_St_V_Sp_L, "(St-V, Sp-L) +", "co-occ_triple_St-V_Sp-L_Raw-OC-JI-Dice-Z-T-LL.pdf")