Skip to content

Commit

Permalink
Merge pull request #42 from pbashyal-nmdp/master
Browse files Browse the repository at this point in the history
Use given data_dir for mac files.
  • Loading branch information
mmaiers-nmdp committed Jul 22, 2020
2 parents 39b59a1 + 8b875ab commit 4bb7c24
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 31 deletions.
9 changes: 4 additions & 5 deletions pyard/pyard.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,15 +128,14 @@ def __init__(self, dbversion: str='Latest',

imgt_hla_url = 'https://raw.githubusercontent.com/ANHIG/IMGTHLA/'
ars_url = imgt_hla_url + dbversion + '/wmda/hla_nom_g.txt'
allele_url = imgt_hla_url + dbversion + "/Allelelist.txt"

ars_file = data_dir + '/hla_nom_g.' + str(dbversion) + ".txt"
allele_file = data_dir + '/AlleleList.' + str(dbversion) + ".txt"
mac_file = data_dir + "/mac.txt"
mac_pickle = data_dir + "/mac.pickle"
# dna_relshp.csv is part of the codebase
broad_file = os.path.dirname(__file__) + "/dna_relshp.csv"
#print("mac_file:", mac_file)

allele_url = "https://raw.githubusercontent.com/ANHIG/IMGTHLA/" \
+ dbversion + "/Allelelist.txt"

# Downloading ARS file
if not os.path.isfile(ars_file):
Expand All @@ -155,7 +154,7 @@ def __init__(self, dbversion: str='Latest',
if not os.path.isfile(mac_pickle):
if verbose:
logging.info("Downloading MAC file")
self.mac = all_macs(mac_file)
self.mac = all_macs(mac_file, data_dir=data_dir)

# Writing dict to pickle file
with open(mac_pickle, 'wb') as handle:
Expand Down
32 changes: 7 additions & 25 deletions pyard/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,37 +21,19 @@
# > http://www.fsf.org/licensing/licenses/lgpl.html
# > http://www.opensource.org/licenses/lgpl-license.php
#
import os
import string
import random as r
from datetime import datetime, date
from six import integer_types, iteritems
import pandas as pd
import copy
import http.client
import pickle
import re
import urllib.request
import zipfile
import re
from datetime import datetime, date

import pandas as pd
from six import integer_types, iteritems


# def all_macs(csv_file, url='hml.nmdp.org'):
# # conn = http.client.HTTPSConnection(url, 443)
# # conn.putrequest('GET', '/mac/api/codes')
# # conn.endheaders()
# # response = conn.getresponse().read().decode('utf8').splitlines()
# data = [l.split("\t")[1:3] for l in response]
# urllib.request.urlretrieve(url, 'numeric.v3.zip')
# df = pd.DataFrame(data, columns=['Code','Alleles'])
# df.to_csv(csv_file, header=True, index=False)
# df['Alleles'] = df['Alleles'].apply(lambda x: x.split("/"))
# mac_dict = df.set_index("Code").to_dict('index')
# return mac_dict

def all_macs(csv_file, url='https://hml.nmdp.org/mac/files/numer.v3.zip'):
def all_macs(csv_file, data_dir, url='https://hml.nmdp.org/mac/files/numer.v3.zip'):
urllib.request.urlretrieve(url, 'numeric.v3.zip')
zip_ref = zipfile.ZipFile('numeric.v3.zip', 'r')
data_dir = os.path.dirname(__file__)
zip_ref.extractall(data_dir)
zip_ref.close()
data = []
Expand All @@ -62,7 +44,7 @@ def all_macs(csv_file, url='https://hml.nmdp.org/mac/files/numer.v3.zip'):
if re.search("^\D", line) and not re.search("CODE", line) and not re.search("LAST", line):
data.append(line.split("\t"))
f.close()
df = pd.DataFrame(data, columns=['Code','Alleles'])
df = pd.DataFrame(data, columns=['Code', 'Alleles'])
df.to_csv(csv_file, header=True, index=False)
df['Alleles'] = df['Alleles'].apply(lambda x: x.split("/"))
mac_dict = df.set_index("Code").to_dict('index')
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

setup(
name='py-ard',
version='0.0.16',
version='0.0.17',
description="ARD reduction for HLA with python",
long_description=readme + '\n\n' + history,
author="CIBMTR",
Expand Down

0 comments on commit 4bb7c24

Please sign in to comment.