Skip to content

Commit

Permalink
Merge pull request #471 from TomConlin/master
Browse files Browse the repository at this point in the history
mgi running and  host change for fly
  • Loading branch information
TomConlin committed May 30, 2017
2 parents 6be4f47 + f48f07e commit 629adf8
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 10 deletions.
2 changes: 1 addition & 1 deletion dipper/sources/FlyBase.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def fetch(self, is_dl_forced=False):

# create the connection details for Flybase
cxn = {
'host': 'flybase.org', 'database': 'flybase', 'port': 5432,
'host': 'chado.flybase.org', 'database': 'flybase', 'port': 5432,
'user': 'flybase', 'password': 'no password'}

self.dataset.setFileAccessUrl(
Expand Down
53 changes: 44 additions & 9 deletions dipper/sources/MGI.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,7 @@ def _process_gxd_genotype_view(self, limit=None):
'strain', re.sub(r':', '', str(strain_id)))
strain_id += re.sub(r':', '', str(mgiid))
strain_id = re.sub(r'^_', '_:', strain_id)
strain_id = re.sub(r'::', ':', strain_id)
model.addDescription(
strain_id,
"This genomic background is unknown. " +
Expand Down Expand Up @@ -540,16 +541,42 @@ def _process_all_summary_view(self, limit):
model = Model(g)
line_counter = 0
raw = '/'.join((self.rawdir, 'all_summary_view'))
logger.info("getting alleles and their labels and descriptions")
logger.info(
"alleles with labels and descriptions from all_summary_view")
with open(raw, 'r') as f:
f.readline() # read the header row; skip
col_count = f.readline().count('\t') # read the header row; skip
# head -1 workspace/build-mgi-ttl/dipper/raw/mgi/all_summary_view|\
# tr '\t' '\n' | grep -n . | \
# awk -F':' '{col=$1;$1="";print $0,",\t #" col}'
for line in f:
line_counter += 1

(accession_key, accid, prefixpart, numericpart, logicaldb_key,
object_key, mgitype_key, private, preferred, createdby_key,
modifiedby_key, creation_date, modification_date, mgiid,
subtype, description, short_description) = line.split('\t')
cols = line.count('\t')
# bail if the row is malformed
if cols != col_count:
logger.warning('Expected ' + str(col_count) + ' columns.')
logger.warning('Recieved ' + str(cols) + ' columns.')
logger.warning(line.format())
continue
# no stray tab in the description column
(
accession_key,
accid,
prefixpart,
numericpart,
logicaldb_key,
object_key,
mgitype_key,
private,
preferred,
createdby_key,
modifiedby_key,
creation_date,
modification_date,
mgiid,
subtype,
description,
short_description
) = line.split('\t')
# NOTE: May want to filter alleles based on the preferred field
# (preferred = 1) or will get duplicates
# (24288, to be exact...
Expand Down Expand Up @@ -617,12 +644,20 @@ def _process_all_allele_view(self, limit):
line_counter = 0
logger.info(
"adding alleles, mapping to markers, " +
"extracting their sequence alterations")
"extracting their sequence alterations " +
"from all_allele_view")
raw = '/'.join((self.rawdir, 'all_allele_view'))
with open(raw, 'r') as f:
f.readline() # read the header row; skip
col_count = f.readline().count('\t') # read the header row; skip
for line in f:
line_counter += 1
cols = line.count('\t')
# bail if the row is malformed
if cols != col_count:
logger.warning('Expected ' + str(col_count) + ' columns.')
logger.warning('Recieved ' + str(cols) + ' columns.')
logger.warning(line.format())
continue

(allele_key, marker_key, strain_key, mode_key, allele_type_key,
allele_status_key, transmission_key, collection_key, symbol,
Expand Down

0 comments on commit 629adf8

Please sign in to comment.