/
golr-exporter.py
88 lines (69 loc) · 2.53 KB
/
golr-exporter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/python
import os
import getopt
import requests
import json
import csv
import shutil
import argparse
def uniqAndSort(output):
sorted_suffix = '.sorted2'
f = open(output, "r", encoding='utf-8')
fs = open(output + sorted_suffix, "w+", encoding='utf-8')
fs.writelines(sorted(set(f.readlines())))
shutil.move(output + sorted_suffix, output)
def transformLabel(data, output):
with open(output, 'w', encoding='utf-8') as tsvfile:
writer = csv.writer(tsvfile, delimiter='\t')
for entry in data:
id = entry["subject"]
label = entry["subject_label"]
writer.writerow([id, label])
uniqAndSort(output)
def transformAssociation(data, output):
with open(output, 'w', encoding='utf-8') as tsvfile:
writer = csv.writer(tsvfile, delimiter='\t')
for entry in data:
id = entry["subject"]
for obj in entry["objects"]:
writer.writerow([id, obj])
uniqAndSort(output)
def main():
#biolink = "http://localhost:5000/api" #mart/labels/gene/phenotype/NCBITaxon%3A7955
biolink = "https://api.monarchinitiative.org/api"
taxon_map = {
'Hs': 9606,
'Mm': 10090,
'Dr': 7955,
'Dm': 7227,
'Ce': 6239
}
parser = argparse.ArgumentParser(description='Fetcher from biolink for monarch data',
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('-t', '--taxon', type=str, required=False,
help='species prefix: ' + ",".join(taxon_map.keys()))
args = parser.parse_args()
if args.taxon is None:
tax_list = taxon_map.keys()
else:
tax_list = args.taxon.split(',')
print("Running for: " + ",".join(tax_list))
for tax in tax_list:
if not os.path.exists(tax):
os.makedirs(tax)
subjs = ["gene"]
obj = "phenotype"
if tax == "Hs":
subjs = ["disease", "case"]
for subj in subjs:
assocFileJson = tax + "/" + subj + "-" + obj + ".json"
assocFileTsv = tax + "/" + subj + "-" + obj + ".tsv"
labelFileTsv = tax + "/" + subj + "-label.tsv"
assocURL = biolink + "/mart/" + subj + "/" + obj + "/NCBITaxon:" + str(taxon_map.get(tax))
print("fetching " + assocURL)
req = requests.get(assocURL)
data = req.json()
transformAssociation(data, assocFileTsv)
transformLabel(data, labelFileTsv)
if __name__ == "__main__":
main()