In [17]:
# Load modules
import pandas as pd
import numpy as np
from Ontology import Ontology
from IPython.display import display

# Read Ontology  

In [18]:
## Read ontology files
# -- First parameter is a 2-column table of (gene, term) annotations
# -- Second parameter is a 2-column table of (child term, parent term) pairs
# -- This example is the S. cerevisiae (budding yeast) Gene Ontology used in Yu et al. Translation of Genotype to Phenotype by a Hierarchy of Cell Subsystems. Cell Syst. 2016 Feb 24;2(2):77-88.
ont = Ontology('example/yeast_child_2_parent.txt', 'example/yeast_gene_2_term.txt')

### Propagate  gene-to-term annotations.
# -- If gene g is annotated to term t, then we ensure that g is also annotated to all ancestral terms of t
ont.propagate_annotations()

Done constructing ontology
0


# Make Genotypes

In [19]:
# A list of gene pairs that were knocked out in each genotype
double_knockouts = [\
 ('YGR135W', 'YER065C'),
 ('YOR085W', 'YKL133C'),
 ('YPL017C', 'YOR380W'),
 ('YDL192W', 'YAL005C'),
 ('YJL214W', 'YGR203W'),
 ('YKL213C', 'YPR109W'),
 ('YKL213C', 'YDR458C'),
 ('YDR185C', 'YDL048C'),
 ('YPL213W', 'YJR127C'),
 ('YLR242C', 'YOL095C') \
                 ]

# Calculate ontotypes

In [23]:
## Calculate a strain-by-term dataframe
# -- Each row represents a genetic strain's ontotype, and each column represents a term in the ontology.
# -- Each value is calculated by simply summing the functional status of genes in that term.
#    In this example, a term's value is 0 if none of its gene have been knocked out, is -1 if exactly one gene has been knocked out, and -2 if two genes have been knocked out
# -- This simple calculation was used in Yu et al. Translation of Genotype to Phenotype by a Hierarchy of Cell Subsystems. Cell Syst. 2016 Feb 24;2(2):77-88.
#    However, one can imagine different formulas for calculating the ontotype.
ontotypes = ont.get_features(double_knockouts, prop='genes', format='scipy.csr')

Creating features 2016-07-28 13:15:49.389418
scipy.csr True
Making indices, indptr, data 2016-07-28 13:15:49.389829
Making sparse CSR matrix 2016-07-28 13:15:49.416848
Done creating features 2016-07-28 13:15:49.417492


In [24]:
# The ontotypes are currently represented as a scipy.sparse matrix.
print type(ontotypes)

# If needed, convert to a dense array. Note that this representation will probably require much more memory.
ontotypes = ontotypes.toarray()

<class 'scipy.sparse.csr.csr_matrix'>
