In [1]:
# Load modules
import pandas as pd
import numpy as np
from Ontology import Ontology
from IPython.display import display

# Read Ontology  

In [2]:
## Read ontology files
# -- First parameter is a 2-column table of (gene, term) annotations
# -- Second parameter is a 2-column table of (child term, parent term) pairs
# -- This example is the S. cerevisiae (budding yeast) Gene Ontology used in Yu et al. Translation of Genotype to Phenotype by a Hierarchy of Cell Subsystems. Cell Syst. 2016 Feb 24;2(2):77-88.
ont = Ontology('example/yeast_child_2_parent.txt', 'example/yeast_gene_2_term.txt')

### Propagate  gene-to-term annotations.
# -- If gene g is annotated to term t, then we ensure that g is also annotated to all ancestral terms of t
ont.propagate_annotations()

Done constructing ontology
0


# Make Genotypes

In [3]:
# A list of gene pairs that were knocked out
double_knockouts = [\
 ('YGR135W', 'YER065C'),
 ('YOR085W', 'YKL133C'),
 ('YPL017C', 'YOR380W'),
 ('YDL192W', 'YAL005C'),
 ('YJL214W', 'YGR203W'),
 ('YKL213C', 'YPR109W'),
 ('YKL213C', 'YDR458C'),
 ('YDR185C', 'YDL048C'),
 ('YPL213W', 'YJR127C'),
 ('YLR242C', 'YOL095C') \
                 ]

In [4]:
### Convert the list of double knockouts into a strain-by-genes dataframe.
# --Each row represents a different genetic strain's "genotype", and each column is a set of genes in the genome.
# --Each value represent the functional status of a gene.
#   In this example, if a gene is knocked out, then its functional status is -1 (complete loss-of-functoin), and otherwise it is '0' (wildtype or normal function).
#   More generally, one can imagine setting a fractional value between -1 and 0 to indicate a partial loss-of-function, or a value above 0 to indicate a gain-of-function
genotypes = pd.DataFrame(0, index=['genotype_%s' % i for i in range(len(double_knockouts))], columns=ont.genes, dtype=np.int8)
for i, gene_pair in enumerate(double_knockouts):
    genotypes.loc['genotype_%s' % i, gene_pair] = -1

In [5]:
# For intuition, show the genotypes dataframe
display(genotypes)

Unnamed: 0,15S_rRNA,21S_rRNA,AWA1,ENA6,ENS2,FPG1,HRA1,ICR1,KHR1,KHS1,...,tX(XXX)L,tY(GUA)D,tY(GUA)F1,tY(GUA)F2,tY(GUA)J1,tY(GUA)J2,tY(GUA)M1,tY(GUA)M2,tY(GUA)O,tY(GUA)Q
genotype_0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
genotype_1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
genotype_2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
genotype_3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
genotype_4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
genotype_5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
genotype_6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
genotype_7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
genotype_8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
genotype_9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
# As a sanity check, note that the row sums of the dataframe are all -2 because the genotypes are double knockouts
display(genotypes.sum(1))

genotype_0   -2
genotype_1   -2
genotype_2   -2
genotype_3   -2
genotype_4   -2
genotype_5   -2
genotype_6   -2
genotype_7   -2
genotype_8   -2
genotype_9   -2
dtype: int64

# Compute ontotypes

In [7]:
## Calculate a strain-by-term dataframe
# -- Each row represents a genetic strain's ontotype, and each column represents a term in the ontology.
# -- Each value is calculated by simply summing the functional status of genes in that term.
#    In this example, a term's value is 0 if none of its gene have been knocked out, is -1 if exactly one gene has been knocked out, and -2 if two genes have been knocked out
# -- This simple calculation was used in Yu et al. Translation of Genotype to Phenotype by a Hierarchy of Cell Subsystems. Cell Syst. 2016 Feb 24;2(2):77-88.
#    However, one can imagine different formulas for calculating the ontotype.
ontotypes = ont.get_features(genotypes, prop='matrix_mult', format=pd.DataFrame)

Removing 4698 terms with no mutations among samples


In [8]:
# For intuition, show the ontotypes dataframe
display(ontotypes)

Unnamed: 0,GO_0000002,GO_0000003,GO_0000060,GO_0000070,GO_0000087,GO_0000139,GO_0000166,GO_0000226,GO_0000267,GO_0000278,...,GO_0071842,GO_0071844,GO_0071944,GO_0072594,GO_0072655,GO_0080090,GO_0080129,GO_0090304,GO_00SUPER,GO_2000112
0,0,0,0,0,0,0,0,0,0,0,...,-1,-1,0,0,0,0,-1,0,-2,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,-2,0
2,0,-1,0,-1,-1,0,0,-1,0,-1,...,-1,0,0,0,0,-1,0,-2,-2,-1
3,0,0,-1,0,0,0,-2,0,-1,0,...,-1,0,-1,-1,-1,0,0,0,-2,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,-1,0,0,0,0,0,-2,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,-1,-2,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,-1,-2,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,-2,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,-1,0,-2,-2,-1
9,-1,-1,0,0,0,-1,-1,0,0,0,...,-1,0,0,0,0,0,0,-1,-2,0
