## Sequence Alignment and phylogenetic tree

I wanted to create an image that has a phylogenetic tree. To do this I found the ETE toolkit (http://etetoolkit.org/docs/latest/tutorial/tutorial_trees.html).

The protein sequence alignment comes from Muscle at the EBI website and downloaded the .clw and .nh files. To make Figure 2 for the publication, the alignment was combined with data from table S3.

In [8]:
#just the imports and a quick test
from ete3 import Tree, NodeStyle
t = Tree( "((a,b),c);" )
print (t)
#t.show()



      /-a
   /-|
--|   \-b
  |
   \-c


In [20]:
## file i/o
rplB_tree = Tree("rplB.nh") #from Muscle@EBI

#actinobacteria
a_style = NodeStyle()
a_style["bgcolor"] = "#b8ab88"
actino_list = ["Bifidobacterium_longum", "Bifidobacterium_bifidum", "Micrococcus_luteus", "Mycobacterium_smegmatis", "Rhodococcus_jostii", "Cellulomonas_gilvus", "Streptomyces_griseorubens", "Streptomyces_venezuelae"]
actino_root = rplB_tree.get_common_ancestor(actino_list)
actino_root.set_style(a_style)

#Firmicutes
f_style = NodeStyle()
f_style["bgcolor"] = "#ffffc0"
firmicutes_list1 = ["Anaerococcus_hydrogenalis", "Lactobacillus_casei", "Streptococcus_agalactiae", "Paenibacillus_polymyxa", "Bacillus_subtilis", "Bacillus_cereus"] 
firmicutes_list2 = ["Faecalibacterium_prausnitzii", "Clostridium_ljungdahlii","Coprococcus_comes", "Ruminococcus_gnavus", "Dorea_longicatena", "Sulfobacillus_thermosulfidooxidans"] 
firmicutes1_root = rplB_tree.get_common_ancestor(firmicutes_list1) # the bacilli side
firmicutes1_root.set_style(f_style)
firmicutes2_root = rplB_tree.get_common_ancestor(firmicutes_list2) # the clostridium side
firmicutes2_root.set_style(f_style)

#bacteroidetes
b_style = NodeStyle()
b_style["bgcolor"] = "#d7914d"
bacteroides_list = ["Algoriphagus_HL-49", "Prevotella_ruminicola", "Bacteroides_fragilis", "Bacteroides_thetaiotaomicron", "Cellulophaga_baltica", "Chryseobacterium_indologenes"]
bacteroides_root = rplB_tree.get_common_ancestor(bacteroides_list)
bacteroides_root.set_style(b_style)


# proteobacteria
p_style = NodeStyle()
p_style["bgcolor"] = "#6fabbf"
g_proteo_list = ["Francisella_novicida", "Legionella_pneumophila", "Citrobacter_freundii", "Halomonas_HL-48", "Halomonas_HL-93", "Pseudomonas_putida"]
b_proteo_list = ["Delftia_acidovorans", "Cupriavidus_necator", "Alcaligenes_faecalis"]
a_proteo_list = ["Acidiphilium_cryptum", "Erythrobacter_HL-111", "Paracoccus_denitrificans", "Roseibaca_HL-91", "Agrobacterium_tumefaciens", "Rhodopseudomonas_palustris"]
#d_proteo_list = ["Myxococcus_xanthus", "Stigmatella_aurantiaca"]
g_proteo_root = rplB_tree.get_common_ancestor(g_proteo_list)
b_proteo_root = rplB_tree.get_common_ancestor(b_proteo_list)
a_proteo_root = rplB_tree.get_common_ancestor(a_proteo_list)
#d_proteo_root = rplB_tree.get_common_ancestor(d_proteo_list)
g_proteo_root.set_style(p_style)
b_proteo_root.set_style(p_style)
p2_style = NodeStyle() # for the other part of proteobacteria
p2_style["bgcolor"] = "#c48d94"
a_proteo_root.set_style(p2_style)

print (rplB_tree)
#rplB_tree.show()
rplB_tree.render("test.svg", dpi=300)


                  /-Fibrobacter_succinogenes
                 |
               /-|   /-Acidiphilium_cryptum
              |  |  |
              |  |  |      /-Erythrobacter_HL-111
              |   \-|   /-|
              |     |  |  |   /-Paracoccus_denitrificans
              |     |  |   \-|
              |      \-|      \-Roseibaca_HL-91
            /-|        |
           |  |        |   /-Agrobacterium_tumefaciens
           |  |         \-|
           |  |            \-Rhodopseudomonas_palustris
           |  |
           |  |   /-Francisella_novicida
           |  |  |
           |   \-|   /-Legionella_pneumophila
           |     |  |
           |     |  |      /-Citrobacter_freundii
           |      \-|     |
           |        |   /-|      /-Halomonas_HL-48
           |        |  |  |   /-|
         /-|        |  |   \-|   \-Halomonas_HL-93
        |  |         \-|     |
        |  |           |      \-Pseudomonas_putida
        |  |           |
        |  |           |  

{'faces': [[198.97081592107963,
   209.0,
   309.97081592107963,
   225.0,
   31,
   'Delftia_acidovorans'],
  [186.5470302116873,
   145.0,
   297.5470302116873,
   161.0,
   24,
   'Citrobacter_freundii'],
  [204.17811879153248,
   225.0,
   320.1781187915325,
   241.0,
   33,
   'Cupriavidus_necator'],
  [188.72699869836265,
   241.0,
   306.72699869836265,
   257.0,
   34,
   'Alcaligenes_faecalis'],
  [188.15708707268612, 177.0, 294.1570870726861, 193.0, 28, 'Halomonas_HL-93'],
  [174.51003630883054,
   257.0,
   290.51003630883054,
   273.0,
   36,
   'Algoriphagus_HL-49'],
  [184.9739672535452,
   273.0,
   306.9739672535452,
   289.0,
   39,
   'Prevotella_ruminicola'],
  [182.85647735836125,
   289.0,
   295.85647735836125,
   305.0,
   41,
   'Bacteroides_fragilis'],
  [183.70733712406656,
   305.0,
   355.70733712406656,
   321.0,
   42,
   'Bacteroides_thetaiotaomicron'],
  [201.59656093717882,
   321.0,
   320.5965609371788,
   337.0,
   44,
   'Cellulophaga_baltica'],
  [