# Pattern diversity among fish families

In [None]:
from ete3 import Tree, TreeStyle, add_face_to_node, RectFace, CircleFace
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

## backbone tree from Betancur-R et. al. (2017) BMC Evol Biol 17:162
t = Tree("12862_2017_958_MOESM2_ESM.nwk")


In [None]:
leaves = [leaf.name for leaf in t.get_leaves()]

fams = [leaf.split("_")[0] for leaf in leaves]
fams = sorted(set(fams), key=fams.index)

fams_leaves = {}
for fam in fams:
    fam_leaves = []
    for leaf in leaves:
        if leaf.startswith(fam):
            fam_leaves.append(leaf)
    fams_leaves[fam] = fam_leaves
    
num_single = 0
num_monophyly = 0
num_non_monophyly = 0

non_monophyletic_fams = []

for fam in fams_leaves:
    num_leaves = len(fams_leaves[fam])
    
    if num_leaves > 1:
        is_monophyly = t.check_monophyly(values=fams_leaves[fam], target_attr="name")
        if is_monophyly[0]:
            num_monophyly += 1
        else:
            num_non_monophyly += 1
            non_monophyletic_fams.append(fam)
            
    else:
        num_single += 1

print("single: ", num_single)
print("monophyletic: ", num_monophyly)
print("non_monophyletic: ", num_non_monophyly)
print("------")
print("total fam: ", num_single + num_monophyly + num_non_monophyly)
print("------")
print("non_monophyletic_fams:")
print(non_monophyletic_fams)


In [None]:
for fam in non_monophyletic_fams:
    print(fam)
    print(t.get_common_ancestor(fams_leaves[fam]))

In [None]:
## remove some fams from tree for convenience' sake (to keep expediential monophyly)

remove_fams = [
    "Clupeidae_Jenkinsia_lamprotaenia_G01339",
    "Alepocephalidae_Talismania_bifurcata_E00359",
    "Alepocephalidae_Rouleina_attrita_E00977",
    "Alepocephalidae_Xenodermichthys_copei_E00779",
    "Phosichthyidae_Polymetme_corythaeola_sp_E00036",
    "Synodontidae_Saurida_gracilis_E00703",
    "Synodontidae_Harpadon_microchir_G01313",
    "Synodontidae_Saurida_undosquamis_G01475",
    "Ipnopidae_Bathypterois_atricolor_G01225",
    "Scopelarchidae_Scopelarchus_sp_E00493",
    "Chlorophthalmidae_Parasudis_truculenta_E01034",
    "Paralepididae_Stemonosudis_intermedia_macrura_E00973",
    "Trachichthyidae_Hoplostethus_occidentalis_atlanticus_E01018",
    "Trachichthyidae_Gephyroberyx_darwinii_E00649",
    "Hemiramphidae_Oxyporhamphus_micropterus_E00397",
    "Hemiramphidae_Hemiramphus_brasiliensis_E00098",
    "Zenarchopteridae_Dermogenys_collettei_G01275",
    "Grammatidae_Gramma_loreto_E00280",
    "Chaenopsidae_Neoclinus_blanchardi_E00326",
    "Labrisomidae_Paraclinus_marmoratus_E00309",
    "Labrisomidae_Stathmonotus_stahli_E00317",
    "Cheilodactylidae_Cheilodactylus_fasciatus_E00795",
    "Cheilodactylidae_Cheilodactylus_pixi_E00797",
    "Acropomatidae_Acropoma_japonicum_G01188",
    "Acropomatidae_Malakichthys_elegans_N01922",
    "Scorpaenidae_Scorpaenodes_albaiensis_E00532",
    "Scorpaenidae_Dendrochirus_zebra_E00897",
    "Scorpaenidae_Pterois_radiata_E00850",
    "Scorpaenidae_Pterois_antennata_E00705",
    "Scorpaenidae_Pterois_miles_E00882",
    "Scorpaenidae_Scorpaenodes_guamensis_E00870",
    "Scorpaenidae_Neomerinthe_hemingwayi_E00619",
    "Scorpaenidae_Pontinus_rathbuni_E00463",
    "Scorpaenidae_Pontinus_longispinis_E01010",
    "Bathymasteridae_Rathbunella_hypoplecta_E00128",
    "Stichaeidae_Cebidichthys_violaceus_N30217",
    "Stichaeidae_Bryozoichthys_marjorius_E00442",
    "Nototheniidae_Aethotaxis_mitopteryx_G01528",
    "Nototheniidae_Dissostichus_eleginoides_G01279",
    "Nototheniidae_Notothenia_coriiceps_G01526",
    "Bathydraconidae_Parachaenichthys_charcoti_E00157",
    "Scombridae_Scomber_scombrus_E00626",
    "Scombridae_Scomber_japonicus_E00247",
    "Gempylidae_Paradiplospinus_gracilis_N15143"
]

t2 = t.copy()

for fam in remove_fams:
    t2.search_nodes(name=fam)[0].delete()
    
leaves_2 = [leaf.name for leaf in t2.get_leaves()]

fams_leaves_2 = {}
for fam in fams:
    fam_leaves = []
    for leaf in leaves_2:
        if leaf.startswith(fam):
            fam_leaves.append(leaf)
    fams_leaves_2[fam] = fam_leaves   

num_single_2 = 0
num_monophyly_2 = 0
num_non_monophyly_2 = 0

non_monophyletic_fams_2 = []

for fam in fams_leaves_2:
    num_leaves = len(fams_leaves_2[fam])
    
    if num_leaves > 1:
        is_monophyly = t2.check_monophyly(values=fams_leaves_2[fam], target_attr="name")
        if is_monophyly[0]:
            num_monophyly_2 += 1
        else:
            num_non_monophyly_2 += 1
            non_monophyletic_fams_2.append(fam)
    else:
        num_single_2 += 1

print("single: ", num_single_2)
print("monophyletic: ", num_monophyly_2)
print("non_monophyletic: ", num_non_monophyly_2)
print("------")
print("total fam: ", num_single_2 + num_monophyly_2 + num_non_monophyly_2)
print("------")
print("non_monophyletic_fams:")
print(non_monophyletic_fams_2)

In [None]:
for fam in fams_leaves_2:
    if len(fams_leaves_2[fam]) > 1:
        t2.get_common_ancestor(fams_leaves_2[fam]).name = fam
        
for node in t2:
    node.name = node.name.split("_")[0]
    
node2labels = t2.get_cached_content(store_attr="name")

def collapsed_leaf(node):
    if len(node2labels[node]) == 1:
        return True
    else:
        return False

t3 = Tree(t2.write(is_leaf_fn=collapsed_leaf, format=3), format=3)


In [None]:
df0 = pd.read_csv('fish-patterns-db-fam.csv')

fishdb_fams = sorted(set(df0['family']))
print("fams in FishDB: ", len(fishdb_fams))

fams_in_tree_fishdb = set(fams) & set(fishdb_fams)
print("fams in Tree & FishDB: ", len(fams_in_tree_fishdb))

df_inTree_fam = df0[df0.family.isin(fams_in_tree_fishdb)].set_index('family')

In [None]:
t3.prune(list(fams_in_tree_fishdb))
print(t3)

In [None]:
### Circle_Tree

def layout(node):
    node.img_style['vt_line_color']="steelblue"
    node.img_style['hz_line_color']="steelblue"
    node.img_style['size']=0
    node.img_style['vt_line_width']=4
    node.img_style['hz_line_width']=4

    rF_w = 60
    rF_h = 12
    marginL = 2
    
    if node.is_leaf():

        if node.name=='Tetraodontidae':
            # circF = CircleFace(radius=5, color='orangered', style="circle")
            # circF.margin_left=10
            # add_face_to_node(circF, node, column=3, position="aligned")
            rectF = RectFace(width=30, height=6, fgcolor='white', bgcolor='orangered')
            rectF.margin_left=20
            add_face_to_node(rectF, node, column=11, position="aligned")

        #### area_fill
        if (df_inTree_fam.loc[node.name].Area==1):
            rectF = RectFace(width=rF_w, height=rF_h, fgcolor='white', bgcolor='sienna')
            rectF.margin_left=marginL+10
            add_face_to_node(rectF, node, column=0, position="aligned")

        #### eyespot
        if (df_inTree_fam.loc[node.name].Eyes==1):
            rectF = RectFace(width=rF_w, height=rF_h, fgcolor='white', bgcolor='pink')
            rectF.margin_left=marginL
            add_face_to_node(rectF, node, column=1, position="aligned")

        #### saddle
        if (df_inTree_fam.loc[node.name].Sddl==1):
            rectF = RectFace(width=rF_w, height=rF_h, fgcolor='white', bgcolor='mediumorchid')
            rectF.margin_left=marginL
            add_face_to_node(rectF, node, column=2, position="aligned")

        #### stripe_vert
        if (df_inTree_fam.loc[node.name].St_V==1):
            rectF = RectFace(width=rF_w, height=rF_h, fgcolor='white', bgcolor='darkslateblue')
            rectF.margin_left=marginL
            add_face_to_node(rectF, node, column=3, position="aligned")

        #### stripe_diag
        if (df_inTree_fam.loc[node.name].St_D==1):
            rectF = RectFace(width=rF_w, height=rF_h, fgcolor='white', bgcolor='royalblue')
            rectF.margin_left=marginL
            add_face_to_node(rectF, node, column=4, position="aligned")

        #### stripe_horz
        if (df_inTree_fam.loc[node.name].St_H==1):
            rectF = RectFace(width=rF_w, height=rF_h, fgcolor='white', bgcolor='turquoise')
            rectF.margin_left=marginL
            add_face_to_node(rectF, node, column=5, position="aligned")

        #### stripe_maze
        if (df_inTree_fam.loc[node.name].Maze==1):
            rectF = RectFace(width=rF_w, height=rF_h, fgcolor='white', bgcolor='#FAF500')
            rectF.margin_left=marginL
            add_face_to_node(rectF, node, column=6, position="aligned")
            
        #### spot_light
        if (df_inTree_fam.loc[node.name].Sp_L==1):
            rectF = RectFace(width=rF_w, height=rF_h, fgcolor='white', bgcolor='orange')
            rectF.margin_left=marginL
            add_face_to_node(rectF, node, column=7, position="aligned")

        #### spot_dark
        if (df_inTree_fam.loc[node.name].Sp_D==1):
            rectF = RectFace(width=rF_w, height=rF_h, fgcolor='white', bgcolor='#35A16B')
            rectF.margin_left=marginL
            add_face_to_node(rectF, node, column=8, position="aligned")

        #### blotch
        if (df_inTree_fam.loc[node.name].Bltc==1):
            rectF = RectFace(width=rF_w, height=rF_h, fgcolor='white', bgcolor='dimgray')
            rectF.margin_left=marginL
            add_face_to_node(rectF, node, column=9, position="aligned")

        #### mono
        if (df_inTree_fam.loc[node.name].Mono==1):
            rectF = RectFace(width=rF_w, height=rF_h, fgcolor='white', bgcolor='silver')
            rectF.margin_left=marginL
            add_face_to_node(rectF, node, column=10, position="aligned")

                        
        
ts = TreeStyle()
ts.layout_fn = layout
ts.show_leaf_name = False
ts.mode = "c"
ts.show_scale=False
ts.optimal_scale_level="full"
ts.arc_start = 0
ts.arc_span = 270

t3.render(file_name="fam-tree-270.pdf", units='mm', w=180, tree_style=ts)
t3.render(file_name="%%inline", w=800, tree_style=ts)


In [None]:
df_fam = pd.read_csv('fish-patterns-db-fam.csv')
df_gen = pd.read_csv('fish-patterns-db-gen.csv')
df_sp = pd.read_csv('fish-patterns-db-sp.csv')


In [None]:
df_fam.head()

In [None]:
df_gen.head()

In [None]:
df_sp.head()

In [None]:
df = pd.DataFrame(df_fam.drop(columns=['family']).sum(), columns=['Family'])
df['Genus'] = df_gen.drop(columns=['genus']).sum()
df['Species'] = df_sp.drop(columns=['species']).sum()

df

In [None]:
ptns = df.index.values
ptns_r = ptns[::-1]
print(ptns_r)

In [None]:
# with color code indicators

sns.set_context("talk")
sns.set_style("white")

fig, axes = plt.subplots(1, 5, figsize=(10, 12), sharey=True)
plt.subplots_adjust(wspace=0.6)

FS=16
PALETTE=[# Mono
         'silver',
         # Bltc
         'dimgray',
         # Sp_D
         '#35A16B',
         # Sp_L
         'orange',
         # Maze
         '#FAF500',
         # St_H
         'turquoise',
         # St_D
         'royalblue',
         # St_V
         'darkslateblue',
         # Sddl
         'mediumorchid',
         # Eyes
         'pink',
         # Area
         'sienna']

sns.despine(top=True, right=True, left=True, bottom=True)

sns.barplot(x=df['Family'],
            y=df['Family'].index,
            color=sns.color_palette('Blues')[2],
            ax=axes[2])
axes[2].set_xlabel("Family", labelpad=10, size=20)
axes[2].xaxis.set_label_position('top')
axes[2].xaxis.set_ticks_position('bottom')
axes[2].set_xticks([])
for i, num in enumerate(df['Family']):
    axes[2].text(num, i, "{:,d}".format(num), color='black', ha="left", va="center", size=FS)


sns.barplot(x=df['Genus'],
            y=df['Genus'].index,
            color=sns.color_palette('Blues')[3],
            ax=axes[3])
axes[3].set_xlabel("Genus", labelpad=10, size=20)
axes[3].xaxis.set_label_position('top')
axes[3].xaxis.set_ticks_position('bottom')
# axes[3].set_xticks([0, 1000])
axes[3].set_xticks([])
for i, num in enumerate(df['Genus']):
    axes[3].text(num, i, "{:,d}".format(num), color='black', ha="left", va="center", size=FS)

sns.barplot(x=df['Species'],
            y=df['Species'].index,
            color=sns.color_palette('Blues')[4],
            ax=axes[4])
axes[4].set_xlabel("Species", labelpad=10, size=20)
axes[4].xaxis.set_label_position('top')
axes[4].xaxis.set_ticks_position('bottom')
# axes[4].set_xticks([0, 3000])
axes[4].set_xticks([])
for i, num in enumerate(df['Species']):
    axes[4].text(num, i, "{:,d}".format(num), color='black', ha="left", va="center", size=FS)

sns.barplot(x=np.full_like(df['Family'], 10),
            y=df['Family'].index,
            palette=PALETTE,
            ax=axes[0])
axes[0].set_xticks([])
axes[0].set_xlim([0, 30])

axes[1].set_xticks([])

axes[0].tick_params(labelleft=False)
axes[2].tick_params(labelsize=20, labelleft=True)

axes[2].tick_params(axis='y', which='major', pad=60)
for label in axes[2].yaxis.get_majorticklabels():
    label.set_horizontalalignment('left')

plt.savefig('ptn_occurrences_with_color.pdf', bbox_inches="tight", pad_inches=0.0)
plt.show()
