/
go-uniprot-to-topGO.py
executable file
·42 lines (38 loc) · 1.38 KB
/
go-uniprot-to-topGO.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# go-uniprot-to-topGO.py
#==============================================================================
import argparse
import sys
from collections import defaultdict
#==============================================================================
#Command line options==========================================================
#==============================================================================
parser = argparse.ArgumentParser()
parser.add_argument("gene_association", type=str,
help="A Uniprot gene association file")
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
#==============================================================================
def read_gene_association(source):
GOdict = defaultdict(list)
try:
with open(source, "r") as handle:
for line in handle:
if line.startswith("!"):
continue
line = line.rstrip().split()
GOdict[line[1]].append(line[3])
return GOdict
except IOError:
print("File does not exit!")
#==============================================================================
def main():
GOdict = read_gene_association(args.gene_association)
for g in GOdict:
print g,"\t",",".join(GOdict[g])
if __name__ == "__main__":
main()