# Annotate Ontology

In [44]:
import numpy as np
from igraph import *
import pandas as pd
import sys

import sys
sys.path.append('C:\\Users\\Anubhav\\Documents\\GitHub\\ddot')

import ddot
from ddot import Ontology
import matplotlib
from matplotlib import pyplot as plt
import networkx as nx
import csv
import scipy.stats as ss
from scipy.stats import hypergeom
from statsmodels.sandbox.stats.multicomp import multipletests

from collections import defaultdict
os.environ['KMP_DUPLICATE_LIB_OK']='True'

import networkx as nx 

import matplotlib.pyplot as plt 
%matplotlib inline

In [45]:
def jaccard(a, b):
    s1=set(a)
    s2=set(b)
    return len(s1.intersection(s2)) / len(s1.union(s2))

def metric_1(ont_file, test_gene_list):
    ont1=Ontology.from_table(ont_file)
    ont1_genes=ont1.genes
    test_recovery=jaccard(ont1_genes, test_gene_list)
    print ('recovery of test genes:', test_recovery)
    return test_recovery

### Analyzing synpase ontology

In [46]:
def ontology_recursion(node, ontology, dic): 
    
    # Get children of node 
    children_df = ontology[ontology.Parent == node]

    # Recursion portion 
    for idx, child_sr in children_df.iterrows(): 

        if child_sr.EdgeType == 'Gene-Term':
            dic[node].add(child_sr.Child)
            
        elif child_sr.EdgeType == 'Child-Parent': 
            
            child_genes = ontology_recursion(child_sr.Child, ontology, dic)
            dic[node].update(child_genes)
        
    return(dic[node])

In [47]:
global synapse_dic
root = 'GO:0045202'
synapse_ont = pd.read_table("synapse.txt", comment='#')
synapse_dic = defaultdict(set)
x = ontology_recursion(root, synapse_ont, synapse_dic)
synapse_dic = {key: value for key, value in synapse_dic.items() if len(value)>0}

### Analyzing CliXo ontology

In [92]:
#Loading clixo data
our_ont = '../output/StringDB/string_synapse_interactions_combined_score.clixo_alpha0.2_beta0.6.txt'
clixo_ont = pd.read_table(our_ont, dtype=str, comment='#', header=None, 
                          names=['Parent', 'Child', 'EdgeType', 'drop'])
clixo_ont.drop('drop', axis=1, inplace=True)

In [93]:
clixo_ont.loc[:, 'EdgeType'] = clixo_ont.EdgeType.str.replace('gene', 'Gene-Term').replace('default', 'Child-Parent')

# Adding an artificial root 
root = 'root'
Parents = set(clixo_ont.Parent) 
data = [[root]*len(Parents), list(Parents), ["Child-Parent"]*len(Parents)]
data = pd.DataFrame(data, index=["Parent", "Child", "EdgeType"], dtype=str).T
clixo_ont = clixo_ont.append(data)

In [94]:
global clixo_dic
clixo_dic = defaultdict(set)
x = ontology_recursion(root, clixo_ont, clixo_dic)

In [95]:
# Adding gene ontology terms to CliXo
new_labels = {}
for module in clixo_dic.keys():
    label = "GO:0045202"
    max_score = 0
    for synapse_module in synapse_dic.keys():
        jscore = jaccard(clixo_dic[module], synapse_dic[synapse_module])
        if jscore>max_score:
            max_score = jscore
            label = synapse_module
    new_labels[module] = label

def replace(x):
    if x in new_labels:
        return(new_labels[x])
    else:
        return(x)

clixo_ont.loc[:, 'Parent'] = clixo_ont.Parent.map(replace)
clixo_ont.loc[:, 'Child'] = clixo_ont.Child.map(replace)
clixo_ont.drop_duplicates(['Parent', 'Child'], inplace=True)
clixo_ont = clixo_ont[(clixo_ont.Parent != clixo_ont.Child)]
fn = '../output/StringDB/final_clixo_ontology.txt'
clixo_ont.to_csv(fn, sep='\t', index=None)

In [96]:
with open(fn) as f: 
    
    for line in f: 
        line = line.split()
        
        if line[0] == line[1]: 
            print(line)
            break