Skip to content

Commit

Permalink
Merge 578da03 into e26c59b
Browse files Browse the repository at this point in the history
  • Loading branch information
charles-cowart committed May 26, 2023
2 parents e26c59b + 578da03 commit 2c1824b
Show file tree
Hide file tree
Showing 8 changed files with 599 additions and 252 deletions.
21 changes: 17 additions & 4 deletions .github/workflows/qiita-plugin-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ jobs:
pip --quiet install https://github.com/qiita-spots/qtp-job-output-folder/archive/refs/heads/main.zip
pip --quiet install .
pip --quiet install coveralls
export QP_KLP_CONFIG_FP=`pwd`/configuration.json
configure_qtp_job_output_folder --env-script "source /home/runner/.profile; conda activate klp" --server-cert $QIITA_SERVER_CERT
Expand Down Expand Up @@ -134,12 +135,13 @@ jobs:
export QIITA_CONFIG_FP=`pwd`/qiita-dev/qiita_core/support_files/config_test_local.cfg
export QP_KLP_CONFIG_FP=`pwd`/configuration.json
export PYTHONWARNINGS="ignore:Certificate for localhost has no \`subjectAltName\`"
nosetests --with-doctest --with-coverage -v --cover-package=qp_klp
nosetests --with-coverage --cover-inclusive --cover-package qp_klp
- uses: codecov/codecov-action@v3
- name: Coveralls
uses: AndreMiras/coveralls-python-action@develop
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: codecov.yml
parallel: true
flag-name: Unit Test

lint:
runs-on: ubuntu-latest
Expand All @@ -156,3 +158,14 @@ jobs:
run: |
pip install -q flake8
flake8 qp_klp setup.py scripts/*
coveralls_finish:
needs: main
runs-on: ubuntu-latest
steps:
- name: Coveralls Finished
uses: AndreMiras/coveralls-python-action@develop
with:
github-token: ${{ secrets.github_token }}
parallel-finished: true

49 changes: 40 additions & 9 deletions qp_klp/Amplicon.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,10 @@


class Amplicon(Step):
def __init__(self, pipeline, master_qiita_job_id, sn_tid_map_by_project,
def __init__(self, pipeline, master_qiita_job_id,
status_update_callback=None):
super().__init__(pipeline,
master_qiita_job_id,
sn_tid_map_by_project,
status_update_callback)

if pipeline.pipeline_type != Step.AMPLICON_TYPE:
Expand Down Expand Up @@ -41,7 +40,12 @@ def quality_control(self):
output_folder = join(self.pipeline.output_path,
'QCJob',
project_name,
Step.AMPLICON_TYPE)
# for legacy purposes, output folders are
# either 'trimmed_sequences', 'amplicon', or
# 'filtered_sequences'. Hence, this folder
# is not defined using AMPLICON_TYPE as that
# value may or may not equal the needed value.
'amplicon')

makedirs(output_folder)

Expand Down Expand Up @@ -81,23 +85,50 @@ def quality_control(self):
new_path = join(output_folder, basename(raw_fastq_file))
copyfile(raw_fastq_file, new_path)

def generate_reports(self, input_file_path):
super()._generate_reports(self.pipeline.mapping_file)
def generate_reports(self):
super()._generate_reports()
return None # amplicon doesn't need project names

def _get_data_type(self, prep_file_path):
metadata = Step.parse_prep_file(prep_file_path)
if 'target_gene' in metadata.columns:
# remove duplicate values, then convert back to list for
# accession.
tg = list(set(metadata['sample target_gene']))
if len(tg) != 1:
raise ValueError("More than one value for target_gene")

if tg[0] in Step.AMPLICON_SUB_TYPES:
return tg[0]

raise ValueError(f"'{tg[0]}' is not a valid type - valid data-"
f"types are {Step.AMPLICON_SUB_TYPES}")
else:
raise ValueError("'target_gene' column not present in "
"generated prep-files")

def generate_touched_studies(self, qclient):
results = {}
for study_id, pf_paths in self.prep_file_paths.items():
for pf_path in pf_paths:
results[pf_path] = self._get_data_type(pf_path)

super()._generate_touched_studies(qclient, results)

def generate_prep_file(self):
config = self.pipeline.configuration['seqpro']

seqpro_path = config['seqpro_path'].replace('seqpro', 'seqpro_mf')
project_names = [x['project_name'] for x in
self.pipeline.get_project_info()]

job = super()._generate_prep_file(config,
self.pipeline.mapping_file,
self.pipeline.mapping_file_path,
seqpro_path,
self.project_names)
project_names)

self.prep_file_paths = job.prep_file_paths

def generate_commands(self):
def generate_commands(self, qclient):
super()._generate_commands()
self.cmds.append(f'cd {self.pipeline.output_path}; '
'tar zcvf reports-ConvertJob.tgz ConvertJob/Reports')
Expand Down
37 changes: 24 additions & 13 deletions qp_klp/Metagenomic.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
from os import walk
from os.path import exists, join, basename
from os.path import exists
from sequence_processing_pipeline.PipelineError import PipelineError
import pandas as pd
from qp_klp.Step import Step, FailedSamplesRecord
from qp_klp.Step import FailedSamplesRecord
from os.path import join, basename
from qp_klp.Step import Step


class Metagenomic(Step):
def __init__(self, pipeline, master_qiita_job_id, sn_tid_map_by_project,
def __init__(self, pipeline, master_qiita_job_id,
status_update_callback=None):
super().__init__(pipeline,
master_qiita_job_id,
sn_tid_map_by_project,
status_update_callback)

if pipeline.pipeline_type not in Step.META_TYPES:
Expand All @@ -22,7 +23,6 @@ def __init__(self, pipeline, master_qiita_job_id, sn_tid_map_by_project,
# is not available.
self.fsr = FailedSamplesRecord(self.pipeline.output_path,
pipeline.sample_sheet.samples)
self.project_names = None

def convert_bcl_to_fastq(self):
# The 'bcl-convert' key is a convention hard-coded into mg-scripts and
Expand All @@ -41,7 +41,7 @@ def quality_control(self):
self.fsr.write(job.audit(self.pipeline.get_sample_ids()), 'QCJob')

def generate_reports(self):
job = super()._generate_reports(self.pipeline.sample_sheet.path)
job = super()._generate_reports()
self.fsr.write(job.audit(self.pipeline.get_sample_ids()), 'FastQCJob')

self.project_names = job.project_names
Expand All @@ -59,8 +59,19 @@ def generate_prep_file(self):

self.prep_file_paths = job.prep_file_paths

def generate_commands(self, special_map, server_url,
touched_studies_prep_info):
def generate_touched_studies(self, qclient):
results = {}

for study_id, pf_paths in self.prep_file_paths.items():
for pf_path in pf_paths:
# record the data-type as either metagenomic or
# metatranscriptomic, according to what's stored in the
# pipeline.
results[pf_path] = self.pipeline.pipeline_type

super()._generate_touched_studies(qclient, results)

def generate_commands(self, qclient):
super()._generate_commands()

out_dir = self.pipeline.output_path
Expand Down Expand Up @@ -90,7 +101,7 @@ def generate_commands(self, special_map, server_url,

touched_studies = []

for project, upload_dir, qiita_id in special_map:
for project, upload_dir, qiita_id in self.special_map:
# sif filenames are of the form:
blanks_file = f'{self.pipeline.run_id}_{project}_blanks.tsv'
if self.sifs and [x for x in self.sifs if blanks_file in x]:
Expand Down Expand Up @@ -134,12 +145,12 @@ def generate_commands(self, special_map, server_url,

data = []
for qiita_id, project in touched_studies:
for prep_id in touched_studies_prep_info[qiita_id]:
study_url = f'{server_url}/study/description/{qiita_id}'
prep_url = (f'{server_url}/study/description/'
for prep_id in self.touched_studies_prep_info[qiita_id]:
surl = f'{qclient._server_url}/study/description/{qiita_id}'
prep_url = (f'{qclient._server_url}/study/description/'
f'{qiita_id}?prep_id={prep_id}')
data.append({'Project': project, 'Qiita Study ID': qiita_id,
'Qiita Prep ID': prep_id, 'Qiita URL': study_url,
'Qiita Prep ID': prep_id, 'Qiita URL': surl,
'Prep URL': prep_url})

df = pd.DataFrame(data)
Expand Down

0 comments on commit 2c1824b

Please sign in to comment.