From d56634b975c2c237c97f6ef7c8817f3ea08911d7 Mon Sep 17 00:00:00 2001 From: didillysquat Date: Wed, 8 Aug 2018 13:49:46 +0300 Subject: [PATCH] bug fix relative path output type names --- create_data_submission.py | 12 ++++++++++-- data_sub_collection_run.py | 11 +++++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/create_data_submission.py b/create_data_submission.py index 9d54718..c82c5d6 100755 --- a/create_data_submission.py +++ b/create_data_submission.py @@ -269,11 +269,13 @@ def worker(input, output, wkd, dataSubID, e_val_collection_dict, reference_db_na lastSummary = readDefinedFileToList('{}{}.trim.contigs.summary'.format(currentDir, rootName)) number_of_seqs_contig_absolute = len(lastSummary) - 1 dataSetSampleInstanceInQ.initialTotSeqNum = number_of_seqs_contig_absolute + print('Sample: {}; dataSetSampleInstanceInQ.initialTotSeqNum = {}'.format(sampleName, number_of_seqs_contig_absolute)) # Get number of sequences after unique lastSummary = readDefinedFileToList('{}{}.trim.contigs.good.unique.abund.pcr.unique.summary'.format(currentDir, rootName)) number_of_seqs_contig_unique = len(lastSummary) - 1 dataSetSampleInstanceInQ.initialUniqueSeqNum = number_of_seqs_contig_unique + print('Sample: {}; dataSetSampleInstanceInQ.initialUniqueSeqNum = {}'.format(sampleName, number_of_seqs_contig_unique)) # Get absolute number of sequences after after sequence QC last_summary = readDefinedFileToList('{}{}.trim.contigs.good.unique.abund.pcr.unique.summary'.format(currentDir, rootName)) @@ -282,6 +284,8 @@ def worker(input, output, wkd, dataSubID, e_val_collection_dict, reference_db_na absolute_count += int(line.split('\t')[6]) dataSetSampleInstanceInQ.post_seq_qc_absolute_num_seqs = absolute_count dataSetSampleInstanceInQ.save() + print('Sample: {}; dataSetSampleInstanceInQ.post_seq_qc_absolute_num_seqs = {}'.format(sampleName, + absolute_count)) if sampleName == 'P7-F05_P7-F05_N705-S520': apples = 'asdf' @@ -336,7 +340,9 @@ def worker(input, output, wkd, dataSubID, e_val_collection_dict, reference_db_na #Add any seqs that did not return a blast match to the throwAwaySeq list diff = set(fastaDict.keys()) - set(blastDict.keys()) throwAwaySeqs.extend(list(diff)) - + print( + 'Sample {}: {} sequences thrown out initially due to being too divergent from reference sequences'.format( + sampleName, len(list(diff)))) ## 030518 We are starting to throw away Symbiodinium sequences here, especially in the non-coral samples # I think we will need to severely relax the e value cut off in order to incorporate more sequences @@ -540,7 +546,9 @@ def worker(input, output, wkd, dataSubID, e_val_collection_dict, reference_db_na # Now update the data_set_sample instance to set initialProcessingComplete to True dataSetSampleInstanceInQ.initialProcessingComplete = True dataSetSampleInstanceInQ.save() - print('{}: initial processing complete'.format(sampleName)) + print('{}: initial processing complete\n' + 'dataSetSampleInstanceInQ.finalUniqueSeqNum = {}\n' + 'dataSetSampleInstanceInQ.finalTotSeqNum = {}'.format(sampleName, len(nameDict), count)) os.chdir(currentDir) fileList = [f for f in os.listdir(currentDir) if f.endswith((".names", ".fasta", ".qual", ".summary", ".oligos", diff --git a/data_sub_collection_run.py b/data_sub_collection_run.py index edb6b24..6ed85fe 100755 --- a/data_sub_collection_run.py +++ b/data_sub_collection_run.py @@ -6437,12 +6437,19 @@ def getMajList(atype): name = atype.name count = name.count('/') majList = [] + # list of the seqs in order of abundance across the type's samples seqsInOrderOfAbunIDs = atype.orderedFootprintList.split(',') + # list of the maj seqs in the type majSeqsIDs = atype.MajRefSeqSet.split(',') for index in range(count + 1): for item in range(len(seqsInOrderOfAbunIDs)): if seqsInOrderOfAbunIDs[item] in majSeqsIDs: - majList.append(reference_sequence.objects.get(id=int(seqsInOrderOfAbunIDs[item])).name) + maj_seq_obj = reference_sequence.objects.get(id=int(seqsInOrderOfAbunIDs[item])) + maj_seq_obj_name = maj_seq_obj.name + if maj_seq_obj_name != 'noName': + majList.append(maj_seq_obj_name) + else: + majList.append(str(maj_seq_obj.id)) del seqsInOrderOfAbunIDs[item] break majStringOutput = '/'.join(majList) @@ -6545,7 +6552,7 @@ def namingRefSeqsUsedInDefs(): # but that ref seqname has aleady been associated with a different seq # Now assign names to those that aren't exact matches - with open('sp_config') as f: + with open('{}/sp_config'.format(os.path.dirname(__file__))) as f: config_dict = json.load(f) if config_dict['system_type'] == 'remote': for bo in blastOutputFile: