In [1]:
#for questions: karenmei@ucsd.edu

#Goal: To evaluate the custom hierarchy: #Metric 1
#	   Metric #1: How well does the model capture novel synapse proteins? (some of which have been recently uncovered by our collaborators using AP/MS/MS)

#	   code for customizing ontologies: DDOT: https://github.com/michaelkyu/ddot/blob/master/examples/Tutorial.ipynb
import numpy as np
from igraph import *
import pandas as pd
import sys

import sys
sys.path.append('C:\\Users\\Anubhav\\Documents\\GitHub\\ddot')

import ddot
from ddot import Ontology
import matplotlib
#matplotlib.use("TKAgg")
#print(matplotlib.get_backend())
from matplotlib import pyplot as plt
import networkx as nx
import csv
import scipy.stats as ss
from scipy.stats import hypergeom
from statsmodels.sandbox.stats.multicomp import multipletests

from collections import defaultdict
os.environ['KMP_DUPLICATE_LIB_OK']='True'

import networkx as nx 

import matplotlib.pyplot as plt 
%matplotlib inline

In [2]:
def jaccard(a, b):
	s1=set(a)
	s2=set(b)
	return len(s1.intersection(s2)) / len(s1.union(s2))

def metric_1(ont_file, test_gene_list):
	ont1=Ontology.from_table(ont_file)
	ont1_genes=ont1.genes
	test_recovery=jaccard(ont1_genes, test_gene_list)
	print ('recovery of test genes:', test_recovery)
	return test_recovery

### Analyzing synpase ontology

In [3]:
def ontology_recursion(node, ontology, dic): 
    
    # Get children of node 
    children_df = ontology[ontology.Parent == node]

    # Recursion portion 
    for idx, child_sr in children_df.iterrows(): 

        if child_sr.EdgeType == 'Gene-Term':
            dic[node].add(child_sr.Child)
            
        elif child_sr.EdgeType == 'Child-Parent': 
            
            child_genes = ontology_recursion(child_sr.Child, ontology, dic)
            dic[node].update(child_genes)
        
    return(dic[node])

In [4]:
global synapse_dic
root = 'GO:0045202'
synapse_ont = pd.read_table("synapse.txt")
synapse_dic = defaultdict(set)
x = ontology_recursion(root, synapse_ont, synapse_dic)
synapse_dic = {key: value for key, value in synapse_dic.items() if len(value)>0}

  This is separate from the ipykernel package so we can avoid doing imports until


### Analyzing CliXo ontology

In [5]:
#Loading clixo data
clixo_ont = pd.read_table("onttest.txt", dtype=str)
clixo_ont.loc[:, 'EdgeType'] = clixo_ont.EdgeType.str.replace('gene', 'Gene-Term').replace('default', 'Child-Parent')

# Adding an artificial root 
root = 'root'
Parents = set(clixo_ont.Parent) 
data = [[root]*len(Parents), list(Parents), ["Child-Parent"]*len(Parents)]
data = pd.DataFrame(data, index=["Parent", "Child", "EdgeType"], dtype=str).T
# data.loc[:, 'Parent'] = data.Parent.astype(int)
# data.loc[:, 'Child'] = data.Child.astype(int)

clixo_ont = clixo_ont.append(data)

  


In [6]:
global clixo_dic
clixo_dic = defaultdict(set)
x = ontology_recursion(root, clixo_ont, clixo_dic)

In [7]:
len(clixo_dic)

59

In [8]:
len(clixo_ont.Parent.unique())

59

In [9]:
new_labels = {}

In [10]:
for module in clixo_dic.keys():
    label = "GO:0045202"
    max_score = 0
    for synapse_module in synapse_dic.keys():
        jscore = jaccard(clixo_dic[module], synapse_dic[synapse_module])
        if jscore>max_score:
            max_score = jscore
            label = synapse_module
    new_labels[module] = label
    

In [11]:
new_labels

{'825': 'GO:0098999',
 'root': 'GO:0045202',
 '809': 'GO:0060076',
 '804': 'GO:0099092',
 '811': 'GO:0099092',
 '813': 'GO:0031594',
 '842': 'GO:0045202',
 '806': 'GO:0044327',
 '840': 'GO:0098978',
 '841': 'GO:0045202',
 '838': 'GO:0099056',
 '839': 'GO:0098833',
 '814': 'GO:0098691',
 '831': 'GO:0098850',
 '787': 'GO:0043679',
 '820': 'GO:0030672',
 '798': 'GO:0030285',
 '789': 'GO:0042734',
 '812': 'GO:0098688',
 '792': 'GO:0099092',
 '815': 'GO:0098897',
 '810': 'GO:0098982',
 '791': 'GO:0045202',
 '819': 'GO:0098563',
 '796': 'GO:0043197',
 '830': 'GO:0044326',
 '790': 'GO:0008021',
 '808': 'GO:0031594',
 '805': 'GO:0045202',
 '800': 'GO:0099524',
 '823': 'GO:0098837',
 '822': 'GO:0098691',
 '836': 'GO:0043197',
 '794': 'GO:0044305',
 '802': 'GO:0098844',
 '801': 'GO:0098982',
 '835': 'GO:0098843',
 '799': 'GO:0098794',
 '795': 'GO:0099524',
 '816': 'GO:0044305',
 '807': 'GO:0060077',
 '837': 'GO:0099059',
 '797': 'GO:0098981',
 '828': 'GO:0098891',
 '824': 'GO:1990026',
 '834': '

In [12]:
def replace(x):
    if x in new_labels:
        return(new_labels[x])
    else:
        return(x)

clixo_ont.loc[:, 'Parent'] = clixo_ont.Parent.map(replace)
clixo_ont.loc[:, 'Child'] = clixo_ont.Child.map(replace)

In [15]:
fn = '../output/final_clixo_ontology.txt'
clixo_ont.to_csv(fn, sep='\t', index=None)