Skip to content

Commit

Permalink
bug fix relative path output type names
Browse files Browse the repository at this point in the history
  • Loading branch information
didillysquat committed Aug 8, 2018
1 parent df809ed commit d56634b
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 4 deletions.
12 changes: 10 additions & 2 deletions create_data_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,11 +269,13 @@ def worker(input, output, wkd, dataSubID, e_val_collection_dict, reference_db_na
lastSummary = readDefinedFileToList('{}{}.trim.contigs.summary'.format(currentDir, rootName))
number_of_seqs_contig_absolute = len(lastSummary) - 1
dataSetSampleInstanceInQ.initialTotSeqNum = number_of_seqs_contig_absolute
print('Sample: {}; dataSetSampleInstanceInQ.initialTotSeqNum = {}'.format(sampleName, number_of_seqs_contig_absolute))

# Get number of sequences after unique
lastSummary = readDefinedFileToList('{}{}.trim.contigs.good.unique.abund.pcr.unique.summary'.format(currentDir, rootName))
number_of_seqs_contig_unique = len(lastSummary) - 1
dataSetSampleInstanceInQ.initialUniqueSeqNum = number_of_seqs_contig_unique
print('Sample: {}; dataSetSampleInstanceInQ.initialUniqueSeqNum = {}'.format(sampleName, number_of_seqs_contig_unique))

# Get absolute number of sequences after after sequence QC
last_summary = readDefinedFileToList('{}{}.trim.contigs.good.unique.abund.pcr.unique.summary'.format(currentDir, rootName))
Expand All @@ -282,6 +284,8 @@ def worker(input, output, wkd, dataSubID, e_val_collection_dict, reference_db_na
absolute_count += int(line.split('\t')[6])
dataSetSampleInstanceInQ.post_seq_qc_absolute_num_seqs = absolute_count
dataSetSampleInstanceInQ.save()
print('Sample: {}; dataSetSampleInstanceInQ.post_seq_qc_absolute_num_seqs = {}'.format(sampleName,
absolute_count))

if sampleName == 'P7-F05_P7-F05_N705-S520':
apples = 'asdf'
Expand Down Expand Up @@ -336,7 +340,9 @@ def worker(input, output, wkd, dataSubID, e_val_collection_dict, reference_db_na
#Add any seqs that did not return a blast match to the throwAwaySeq list
diff = set(fastaDict.keys()) - set(blastDict.keys())
throwAwaySeqs.extend(list(diff))

print(
'Sample {}: {} sequences thrown out initially due to being too divergent from reference sequences'.format(
sampleName, len(list(diff))))
## 030518 We are starting to throw away Symbiodinium sequences here, especially in the non-coral samples
# I think we will need to severely relax the e value cut off in order to incorporate more sequences

Expand Down Expand Up @@ -540,7 +546,9 @@ def worker(input, output, wkd, dataSubID, e_val_collection_dict, reference_db_na
# Now update the data_set_sample instance to set initialProcessingComplete to True
dataSetSampleInstanceInQ.initialProcessingComplete = True
dataSetSampleInstanceInQ.save()
print('{}: initial processing complete'.format(sampleName))
print('{}: initial processing complete\n'
'dataSetSampleInstanceInQ.finalUniqueSeqNum = {}\n'
'dataSetSampleInstanceInQ.finalTotSeqNum = {}'.format(sampleName, len(nameDict), count))

os.chdir(currentDir)
fileList = [f for f in os.listdir(currentDir) if f.endswith((".names", ".fasta", ".qual", ".summary", ".oligos",
Expand Down
11 changes: 9 additions & 2 deletions data_sub_collection_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -6437,12 +6437,19 @@ def getMajList(atype):
name = atype.name
count = name.count('/')
majList = []
# list of the seqs in order of abundance across the type's samples
seqsInOrderOfAbunIDs = atype.orderedFootprintList.split(',')
# list of the maj seqs in the type
majSeqsIDs = atype.MajRefSeqSet.split(',')
for index in range(count + 1):
for item in range(len(seqsInOrderOfAbunIDs)):
if seqsInOrderOfAbunIDs[item] in majSeqsIDs:
majList.append(reference_sequence.objects.get(id=int(seqsInOrderOfAbunIDs[item])).name)
maj_seq_obj = reference_sequence.objects.get(id=int(seqsInOrderOfAbunIDs[item]))
maj_seq_obj_name = maj_seq_obj.name
if maj_seq_obj_name != 'noName':
majList.append(maj_seq_obj_name)
else:
majList.append(str(maj_seq_obj.id))
del seqsInOrderOfAbunIDs[item]
break
majStringOutput = '/'.join(majList)
Expand Down Expand Up @@ -6545,7 +6552,7 @@ def namingRefSeqsUsedInDefs():
# but that ref seqname has aleady been associated with a different seq

# Now assign names to those that aren't exact matches
with open('sp_config') as f:
with open('{}/sp_config'.format(os.path.dirname(__file__))) as f:
config_dict = json.load(f)
if config_dict['system_type'] == 'remote':
for bo in blastOutputFile:
Expand Down

0 comments on commit d56634b

Please sign in to comment.