In [193]:
import networkx
import matplotlib
import obonet
import csv
%matplotlib inline
from itertools import chain


In [12]:
#import the csv as a list of dictionaries
with open('AGLA_test_100.csv') as file:
     interaction_list = [{k: v for k, v in row.items()}
          for row in csv.DictReader(file, skipinitialspace=True)]

In [13]:
interaction_list

[{'GO_term': 'GO:0003677',
  'GO_terms': 'GO:0003677',
  'Owner': 'I5K',
  'Scaffold': 'Scaffold1',
  'Scaffold_Again': 'Scaffold1',
  'Species': 'AGLA',
  'TE_finder': 'RepeatMasker',
  'dot': '.',
  'function': 'DNA binding',
  'gene_name': 'AGLA000006',
  'go_info': 'Any molecular function by which a gene product interacts selectively and non-covalently with DNA (deoxyribonucleic acid).',
  'info_type': 'similarity',
  'lovedots': '.',
  'maths': '-',
  'more_info': 'NA',
  'more_math': '+',
  'name': 'NA',
  'no': 'NA',
  'no_idea': 'NA',
  'np': 'NA',
  'number': '37',
  'ontology': 'molecular_function',
  'other_go_term': 'GO:0043566',
  'score': '15.8',
  'start': '194896',
  'stop': '201012',
  'te_name': 'NA',
  'te_start': '199138',
  'te_stop': '199174',
  'te_type': '(TTTTA)n',
  'term_': 'GO:0003676 (nucleic acid binding)',
  'type_of_transcript': 'gene',
  'wha': '.',
  'where_it_happens': 'microtubule/chromatin interaction(RELATED)|plasmid binding(NARROW)|structure speci

In [150]:
#loading the GO network
url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'
graph = obonet.read_obo(url)
#add a dictionary entry for TEs to each node
for node in graph.nodes():
    graph.node[node]['transposable_elements'] = {}


In [151]:
#this needs to do two things: add TE interactions to GO terms, then populate them up through the graph.
for entry in interaction_list:
    if entry["te_name"] in (graph.node[entry["GO_term"]]['transposable_elements']).keys():
        #graph.node[entry["GO_term"]]['transposable_elements'] = {entry['te_name']:1}
        graph.node[entry["GO_term"]]['transposable_elements'][entry['te_name']] = graph.node[entry["GO_term"]]['transposable_elements'][entry['te_name']] + 1
    else:
        graph.node[entry["GO_term"]]['transposable_elements'][entry['te_name']] = 1 #adding new ones
#here is for the upper level GO terms, they are ancestors.. but we use descendants for some reason    
    for higherlevelterm in networkx.descendants(graph,entry["GO_term"]): 
        #print(higherlevelterm)
        if entry["te_name"] in (graph.node[higherlevelterm]['transposable_elements']).keys():
            graph.node[higherlevelterm]['transposable_elements'][entry['te_name']] = graph.node[higherlevelterm]['transposable_elements'][entry['te_name']] + 1
        else:
            graph.node[higherlevelterm]['transposable_elements'][entry['te_name']] = 1 #adding new ones
        
    
    
    
    

In [86]:
graph.node['GO:0003723']

{'alt_id': ['GO:0044822'],
 'def': '"Interacting selectively and non-covalently with an RNA molecule or a portion thereof." [GOC:jl, GOC:mah]',
 'is_a': ['GO:0003676'],
 'name': 'RNA binding',
 'namespace': 'molecular_function',
 'subset': ['goslim_agr',
  'goslim_aspergillus',
  'goslim_candida',
  'goslim_chembl',
  'goslim_generic',
  'goslim_mouse',
  'goslim_plant',
  'goslim_yeast',
  'gosubset_prok'],
 'synonym': ['"poly(A) RNA binding" RELATED []',
  '"poly(A)-RNA binding" RELATED []',
  '"poly-A RNA binding" RELATED []'],
 'transposable_elements': {'DNA/Sola': 3,
  'DNA/TcMar-Mariner': 3,
  'DNA/TcMar-Tc1': 1,
  'NA': 3,
  'Unknown': 9},
 'xref': ['Reactome:REACT_101703 "Exportin-5 recognizes 3\' overhang of pre-miRNA, Xenopus tropicalis"',
  'Reactome:REACT_103323 "Exportin-5 recognizes 3\' overhang of pre-miRNA, Dictyostelium discoideum"',
  'Reactome:REACT_106430 "Exportin-5 recognizes 3\' overhang of pre-miRNA, Mus musculus"',
  'Reactome:REACT_107757 "Exportin-5 recognize

In [152]:
graph.node['GO:0003723']

{'alt_id': ['GO:0044822'],
 'def': '"Interacting selectively and non-covalently with an RNA molecule or a portion thereof." [GOC:jl, GOC:mah]',
 'is_a': ['GO:0003676'],
 'name': 'RNA binding',
 'namespace': 'molecular_function',
 'subset': ['goslim_agr',
  'goslim_aspergillus',
  'goslim_candida',
  'goslim_chembl',
  'goslim_generic',
  'goslim_mouse',
  'goslim_plant',
  'goslim_yeast',
  'gosubset_prok'],
 'synonym': ['"poly(A) RNA binding" RELATED []',
  '"poly(A)-RNA binding" RELATED []',
  '"poly-A RNA binding" RELATED []'],
 'transposable_elements': {'DNA/Sola': 3,
  'DNA/TcMar-Mariner': 3,
  'DNA/TcMar-Tc1': 1,
  'NA': 5,
  'Unknown': 11},
 'xref': ['Reactome:REACT_101703 "Exportin-5 recognizes 3\' overhang of pre-miRNA, Xenopus tropicalis"',
  'Reactome:REACT_103323 "Exportin-5 recognizes 3\' overhang of pre-miRNA, Dictyostelium discoideum"',
  'Reactome:REACT_106430 "Exportin-5 recognizes 3\' overhang of pre-miRNA, Mus musculus"',
  'Reactome:REACT_107757 "Exportin-5 recogniz

In [153]:
graph.node['GO:0003676']

{'def': '"Interacting selectively and non-covalently with any nucleic acid." [GOC:jl]',
 'is_a': ['GO:0097159', 'GO:1901363'],
 'name': 'nucleic acid binding',
 'namespace': 'molecular_function',
 'subset': ['goslim_chembl',
  'goslim_metagenomics',
  'goslim_pir',
  'goslim_plant',
  'gosubset_prok'],
 'transposable_elements': {'DNA/Sola': 3,
  'DNA/TcMar-Mariner': 3,
  'DNA/TcMar-Tc1': 1,
  'NA': 8,
  'Unknown': 13}}

In [154]:
graph.node['GO:1901363']

{'def': '"Interacting selectively and non-covalently with heterocyclic compound." [GOC:TermGenie]',
 'is_a': ['GO:0005488'],
 'name': 'heterocyclic compound binding',
 'namespace': 'molecular_function',
 'transposable_elements': {'DNA/Sola': 3,
  'DNA/TcMar-Mariner': 3,
  'DNA/TcMar-Tc1': 1,
  'NA': 8,
  'Unknown': 13}}

In [89]:
graph.node['GO:0005488']

{'comment': "Note that this term is in the subset of terms that should not be used for direct, manual gene product annotation. Please choose a more specific child term, or request a new one if no suitable term is available. For ligands that bind to signal transducing receptors, consider the molecular function term 'receptor binding ; GO:0005102' and its children.",
 'def': '"The selective, non-covalent, often stoichiometric, interaction of a molecule with one or more specific sites on another molecule." [GOC:ceb, GOC:mah, ISBN:0198506732]',
 'is_a': ['GO:0003674'],
 'name': 'binding',
 'namespace': 'molecular_function',
 'subset': ['gocheck_do_not_manually_annotate',
  'goslim_pir',
  'goslim_plant',
  'gosubset_prok'],
 'synonym': ['"ligand" NARROW []'],
 'xref': ['Wikipedia:Binding_(molecular)']}

In [155]:
graph.node['GO:0003674']

{'alt_id': ['GO:0005554'],
 'comment': "Note that, in addition to forming the root of the molecular function ontology, this term is recommended for use for the annotation of gene products whose molecular function is unknown. When this term is used for annotation, it indicates that no information was available about the molecular function of the gene product annotated as of the date the annotation was made; the evidence code ND, no data, is used to indicate this. Despite its name, this is not a type of 'function' in the sense typically defined by upper ontologies such as Basic Formal Ontology (BFO). It is instead a BFO:process carried out by a single gene product or complex.",
 'def': '"The actions of a single gene product or complex at the molecular level consisting of a single biochemical activity or multiple causally linked biochemical activities. A given gene product may exhibit one or more molecular functions." [GOC:go_curators]',
 'name': 'molecular_function',
 'namespace': 'molec

In [200]:
#Visualization of the small subset of the larger data, just to see, and make sure the upwards populating is working
terms = []
for entry in interaction_list:
    terms.append(entry["GO_term"])
terms = list(set(terms))
higherterms = []
for term in terms:
    higherterms.append(networkx.descendants(graph,term))
highterms = list(set(chain.from_iterable(higherterms)))
graphGOs = terms + highterms

In [204]:
subgraph = graph.subgraph(graphGOs)

In [216]:
for GO in subgraph.nodes():
    print(GO,subgraph.node[GO]['transposable_elements'])

GO:0000049 {'NA': 2, 'Unknown': 3}
GO:0003674 {'NA': 23, 'Unknown': 35, 'DNA/TcMar-Mariner': 7, 'DNA/Sola': 10, 'DNA/TcMar-Tc1': 9, 'DNA': 1, 'LINE/Penelope': 2}
GO:0003676 {'NA': 8, 'Unknown': 13, 'DNA/TcMar-Mariner': 3, 'DNA/Sola': 3, 'DNA/TcMar-Tc1': 1}
GO:0003677 {'NA': 3, 'Unknown': 2}
GO:0003723 {'NA': 5, 'Unknown': 11, 'DNA/TcMar-Mariner': 3, 'DNA/Sola': 3, 'DNA/TcMar-Tc1': 1}
GO:0003755 {'NA': 2, 'Unknown': 1}
GO:0003824 {'NA': 11, 'Unknown': 18, 'DNA/TcMar-Tc1': 8, 'DNA/Sola': 6, 'DNA': 1, 'DNA/TcMar-Mariner': 2, 'LINE/Penelope': 2}
GO:0004478 {'Unknown': 14, 'DNA/TcMar-Tc1': 3, 'NA': 7, 'DNA/TcMar-Mariner': 2, 'DNA/Sola': 1, 'LINE/Penelope': 2}
GO:0004871 {'DNA/TcMar-Mariner': 2, 'Unknown': 4, 'NA': 4, 'DNA/Sola': 1}
GO:0004872 {'DNA/TcMar-Mariner': 2, 'Unknown': 4, 'NA': 4, 'DNA/Sola': 1}
GO:0004888 {'DNA/TcMar-Mariner': 2, 'Unknown': 4, 'NA': 4, 'DNA/Sola': 1}
GO:0004930 {'DNA/TcMar-Mariner': 2, 'Unknown': 4, 'NA': 4, 'DNA/Sola': 1}
GO:0005488 {'NA': 8, 'Unknown': 13, 'DNA/