# Brief Tutorial on iPython Notebooks
+ What is an iPython Notebook?

Briefly, an iPython notebook is a format for running and visualizing code. It takes advantage of your browser's ability to render attractive HTML documents in order to weave together words, code fragments and output.

+ What is it good for?

Teaching, since they allow for notes to be interspersed in the code. Visualizing and sharing notes.


Which is what we're going to do today.

In [2]:
import dendropy
from dendropy.calculate import treemeasure
import pandas
import sys
import glob
import numpy as np

Above, I created a couple global variables. I thought I might need them.

In [5]:
def initializer():
#Load Tree
	tree = dendropy.Tree.get(path='test/p3p511.tre', schema="nexus", rooting="default-unrooted")
#Get Edges from tree
	edges = [edge.length for edge in tree.preorder_edge_iter()]
	edges[0] = 0
#Start a pandas dataframe
	df = pandas.DataFrame(pandas.Series(edges, edges),columns=['true'])
#Use the correct edges as the header
	return(df)

Above, I read in a tree from standard input (the command line), extracted the branch lengths, and loaded them into a pandas dataframe

In [6]:
initializer()

Unnamed: 0,true
0.0,0.0
9.356401,9.356401
9.356401,9.356401
10.137514,10.137514
1.043598,1.043598
8.312804,8.312804
1.720851,1.720851
1.5123,1.5123
2.448403,2.448403
0.303546,0.303546


In [8]:
def get_tree_list():
	container = [file for file in glob.glob('test/*.con')]
	treelist = dendropy.TreeList()	
	for file in container:
		print("processing file %s" % file)
		tree = dendropy.Tree.get(path=file, schema="nexus", extract_comment_metadata=True, rooting="default-unrooted")
		treelist.append(tree)
	return(treelist, container)

Above, what we do is use glob to find all the files with a certain extension. Then, we iterate over those files, reading them in with Dendropy and parsing any annotations on them. Finally, we return the filename and the annotations to use in the next function

In [10]:
get_tree_list()

processing file test/0.phy.out.ant.tre.con
processing file test/11.phy.out.ant.tre.con


(<dendropy.datamodel.treecollectionmodel.TreeList at 0x108ca9310>,
 ['test/0.phy.out.ant.tre.con', 'test/11.phy.out.ant.tre.con'])

In [28]:
def proc_trees(treelist):
	print treelist
	df_list = []
	for tree in treelist:
		print('Calculating tree: %s' % tree)
		node_hpd = [nd.annotations.findall(name='length_hpd95') for nd in tree.preorder_node_iter()]
		node_med = [nd.annotations.findall(name='length_median') for nd in tree.preorder_node_iter()]
		kvs = [nd.values_as_dict() for nd in node_hpd]
		gnocchi = [kv.values() for kv in kvs]
		max = [float(line[0][1]) for line in gnocchi]
		min = [float(line[0][0]) for line in gnocchi]
		df['min'] = pandas.Series(min, index=df.index)
		df['max'] = pandas.Series(max, index=df.index)
		df['boolcol'] = df['min'] < df['true']
		df['boolcolmax'] = df['max'] > df['true']
		kvs = [nd.values_as_dict() for nd in node_med]
		gnocchi = [kv.values() for kv in kvs]
		med = [float(line[0]) for line in gnocchi]
		df['med'] = pandas.Series(med, index=df.index)
		df['devcol'] = df['med'] - df['true']
		df_list.append(df)
	return(df_list)

This takes the values in node_hpd and breaks them apart into individual lists of values.

Then we crunch them into a pandas dataframe and create two column, which are boolean. 

In [31]:

def count_correct(df_list, container):
	for file in container:
		print('Exporting %s' % file)
		for df in df_list:
			min_true = (df.boolcol==True).sum()
			max_true = int((df.boolcolmax==True).sum())
			count = int(df.boolcol.count())
			df.to_csv("%s.csv" % file)
		print('Number of nodes above minimum age %s \n \
		Number of nodes under maximum: %s' % (min_true, max_true))
	return(min_true, max_true)	
		

So there, all our functions are defined. Now we can call them all.

In [32]:
if __name__ == "__main__":
	df = initializer()
	treelist, container = get_tree_list()
	df_list = proc_trees(treelist)
	min, max = count_correct(df_list, container)


processing file test/0.phy.out.ant.tre.con
processing file test/11.phy.out.ant.tre.con
<TreeList 0x109cc8590 'None': [<Tree object at 0x1099dbb10>, <Tree object at 0x109c31d90>]>
Calculating tree: (T1:9.35640137939,(T25:10.1375140233,(T2:8.31280356775,((((((T23:2.18950116362,T24:2.18950116362)1.00000000:0.0300579732931,((T22:0.882878879368,(T20:0.323950072248,T21:0.323950072248)1.00000000:0.558928807595)1.00000000:0.86679296346,((T16:0.715844268354,(T19:0.383669684338,(T17:0.103228071259,T18:0.103228071259)1.00000000:0.280441612419)1.00000000:0.332174582897)1.00000000:0.296698209859,(T14:0.0225187611628,T15:0.0225187611628)1.00000000:0.990023706061)1.00000000:0.737129361769)1.00000000:0.469887298606)1.00000000:0.108143996076,(T12:0.494656263466,T13:0.494656263466)1.00000000:1.83304687432)1.00000000:0.303545696931,(T10:0.606499327399,T11:0.606499327399)1.00000000:2.0247494974)1.00000000:2.44840335077,(T5:2.90347583317,((T8:0.903562158024,T9:0.903562158024)1.00000000:0.786355655195,(T6:0

In [30]:
print(df)

                true           min        max boolcol boolcolmax       med  \
0.000000    0.000000  0.000000e+00   0.000000   False      False  0.000000   
9.356401    9.356401  8.275043e+00  10.564130    True       True  9.285030   
9.356401    9.356401  8.275043e+00  10.564130    True       True  9.285030   
10.137514  10.137514  8.380176e+00  12.342090    True       True  9.978896   
1.043598    1.043598  1.476971e-01   2.238679    True       True  1.082332   
8.312804    8.312804  7.626370e+00   8.818500    True       True  8.208650   
1.720851    1.720851  1.115550e+00   2.694770    True       True  1.928980   
1.512300    1.512300  7.712150e-02   1.346300    True      False  0.687868   
2.448403    2.448403  2.645940e+00   3.761650   False       True  3.179720   
0.303546    0.303546  6.014360e-02   0.586637    True       True  0.256691   
0.108144    0.108144  4.056030e-06   0.146360    True       True  0.056308   
0.030058    0.030058  5.823260e-08   0.113640    True       True