Skip to content

Commit

Permalink
Merge pull request #193 from sigven/docker_unwrap
Browse files Browse the repository at this point in the history
Remove docker command wrappers; CPSR refactor
  • Loading branch information
sigven committed Jul 11, 2022
2 parents fa03c91 + 1e560fc commit 79e27ee
Show file tree
Hide file tree
Showing 12 changed files with 1,710 additions and 2,098 deletions.
292 changes: 237 additions & 55 deletions pcgr/arg_checker.py

Large diffs are not rendered by default.

990 changes: 338 additions & 652 deletions pcgr/cpsr.py

Large diffs are not rendered by default.

344 changes: 126 additions & 218 deletions pcgr/main.py

Large diffs are not rendered by default.

63 changes: 55 additions & 8 deletions pcgr/pcgr_vars.py
Expand Up @@ -2,6 +2,14 @@

from pcgr._version import __version__

PCGR_VERSION = __version__
DB_VERSION = '20220203'
VEP_VERSION = '105'
GENCODE_VERSION = '39'
NCBI_BUILD_MAF = 'GRCh38'
VEP_ASSEMBLY = 'GRCh38'
MAX_VARIANTS_FOR_REPORT = 500_000

tsites = {
0: 'Any',
1: 'Adrenal Gland',
Expand Down Expand Up @@ -37,12 +45,51 @@
}

tumor_sites = '\n'.join([f'{k} = {tsites[k]}' for k in tsites]) # for displaying in help
PCGR_VERSION = __version__
DB_VERSION = '20220203'
VEP_VERSION = '105'
GENCODE_VERSION = '39'
NCBI_BUILD_MAF = 'GRCh38'
VEP_ASSEMBLY = 'GRCh38'
MAX_VARIANTS_FOR_REPORT = 500000
DOCKER_IMAGE_VERSION = f'sigven/pcgr:{PCGR_VERSION}'

GE_panels = {
0: "CPSR exploratory cancer predisposition panel (n = 433, GEP / TCGA Germline Study / Cancer Gene Census / Other)",
1: "Adult solid tumours cancer susceptibility (GEP)",
2: "Adult solid tumours for rare disease (GEP)",
3: "Bladder cancer pertinent cancer susceptibility (GEP)",
4: "Brain cancer pertinent cancer susceptibility (GEP)",
5: "Breast cancer pertinent cancer susceptibility (GEP)",
6: "Childhood solid tumours cancer susceptibility (GEP)",
7: "Colorectal cancer pertinent cancer susceptibility (GEP)",
8: "Endometrial cancer pertinent cancer susceptibility (GEP)",
9: "Familial Tumours Syndromes of the central & peripheral Nervous system (GEP)",
10: "Familial breast cancer (GEP)",
11: "Familial melanoma (GEP)",
12: "Familial prostate cancer (GEP)",
13: "Familial rhabdomyosarcoma (GEP)",
14: "GI tract tumours (GEP)",
15: "Genodermatoses with malignancies (GEP)",
16: "Haematological malignancies cancer susceptibility (GEP)",
17: "Haematological malignancies for rare disease (GEP)",
18: "Head and neck cancer pertinent cancer susceptibility (GEP)",
19: "Inherited MMR deficiency (Lynch Syndrome) - GEP",
20: "Inherited non-medullary thyroid cancer (GEP)",
21: "Inherited ovarian cancer (without breast cancer) (GEP)",
22: "Inherited pancreatic cancer (GEP)",
23: "Inherited polyposis (GEP)",
24: "Inherited predisposition to acute myeloid leukaemia (AML) (GEP)",
25: "Inherited predisposition to GIST (GEP)",
26: "Inherited renal cancer (GEP)",
27: "Inherited phaeochromocytoma and paraganglioma (GEP)",
28: "Melanoma pertinent cancer susceptibility (GEP)",
29: "Multiple endocrine tumours (GEP)",
30: "Multiple monogenic benign skin tumours (GEP)",
31: "Neuroendocrine cancer pertinent cancer susceptibility (GEP)",
32: "Neurofibromatosis Type 1 (GEP)",
33: "Ovarian cancer pertinent cancer susceptibility (GEP)",
34: "Parathyroid Cancer (GEP)",
35: "Prostate cancer pertinent cancer susceptibility (GEP)",
36: "Renal cancer pertinent cancer susceptibility (GEP)",
37: "Rhabdoid tumour predisposition (GEP)",
38: "Sarcoma cancer susceptibility (GEP)",
39: "Sarcoma susceptbility (GEP)",
40: "Thyroid cancer pertinent cancer susceptibility (GEP)",
41: "Tumour predisposition - childhood onset (GEP)",
42: "Upper gastrointestinal cancer pertinent cancer susceptibility (GEP)"
}

panels = '\n'.join([f'{k} = {GE_panels[k]}' for k in GE_panels]) # for displaying in help
49 changes: 23 additions & 26 deletions pcgr/utils.py
Expand Up @@ -6,23 +6,6 @@
import os
import platform

def get_docker_user_id(docker_user_id):
logger = getlogger('pcgr-get-OS')
uid = ''
if docker_user_id:
uid = docker_user_id
elif platform.system() == 'Linux' or platform.system() == 'Darwin' or sys.platform == 'darwin' or sys.platform == 'linux2' or sys.platform == 'linux':
uid = os.getuid()
else:
if platform.system() == 'Windows' or sys.platform == 'win32' or sys.platform == 'cygwin':
uid = getpass.getuser()

if uid == '':
warn_msg = (f'Was not able to get user id/username for logged-in user on the underlying platform '
f'(platform.system(): {platform.system()}, sys.platform: {sys.platform}, now running PCGR as root')
logger.warning(warn_msg)
uid = 'root'
return uid

def getlogger(logger_name):
logger = logging.getLogger(logger_name)
Expand Down Expand Up @@ -60,25 +43,25 @@ def check_subprocess(logger, command, debug):
print(e.output.decode())
exit(0)

def script_path(env, bin_script, docker_run):
def script_path(env, bin_script):
"""Returns e.g. /path/conda/envs/{env}/{bin_script}
"""
prefix = conda_env_path(env, docker_run)
prefix = conda_env_path(env)
return os.path.join(prefix, bin_script)

def conda_env_path(env, docker_run):
def conda_env_path(env):
"""Construct absolute path to a conda env
using the current activated env as a prefix.
e.g. /path/to/conda/envs/{env}
"""
if docker_run:
env_path = f'/opt/mambaforge/envs/{env}'
else:
cp = os.path.normpath(os.environ.get('CONDA_PREFIX')) # /path/to/conda/envs/FOO
env_dir = os.path.dirname(cp) # /path/to/conda/envs
env_path = os.path.join(env_dir, env) # /path/to/conda/envs/{env}
cp = os.path.normpath(os.environ.get('CONDA_PREFIX')) # /path/to/conda/envs/FOO
env_dir = os.path.dirname(cp) # /path/to/conda/envs
env_path = os.path.join(env_dir, env) # /path/to/conda/envs/{env}
return env_path

def get_loftee_dir():
return script_path("pcgr", "share/loftee")

def get_pcgr_bin():
"""Return abs path to e.g. conda/env/pcgr/bin
"""
Expand All @@ -101,3 +84,17 @@ def get_perl_exports():
perl_path_parent = os.path.dirname(perl_path) # /conda/env/pcgr/bin
out = f"unset PERL5LIB && export PATH={perl_path_parent}:\"$PATH\""
return out

def is_integer(n):
try:
float(n)
except ValueError:
return False
else:
return float(n).is_integer()

def get_cpsr_version():
# use pcgrr's Rscript to grab cpsr's R pkg version
rscript = script_path("pcgrr", "bin/Rscript")
v_cmd = f"{rscript} -e 'x <- paste0(\"cpsr \", as.character(packageVersion(\"cpsr\"))); cat(x, \"\n\")'"
return subprocess.check_output(v_cmd, shell=True).decode("utf-8")
26 changes: 0 additions & 26 deletions pcgrr/NEWS.md

This file was deleted.

4 changes: 2 additions & 2 deletions pcgrr/vignettes/output.Rmd
Expand Up @@ -320,8 +320,8 @@ A VCF file containing annotated, somatic calls (single nucleotide variants and i
| `COSMIC_MUTATION_ID` | Mutation identifier in [Catalog of somatic mutations in cancer](http://cancer.sanger.ac.uk/cancergenome/projects/cosmic/) database, as provided by VEP |
| `TCGA_PANCANCER_COUNT` | Raw variant count across all TCGA tumor types |
| `TCGA_FREQUENCY` | Frequency of variant across TCGA tumor types. Format: `tumortype| percent affected|affected cases|total cases` |
| `ICGC_PCAWG_OCCURRENCE` | Mutation occurrence in [ICGC|PCAWG](http://docs.icgc.org/pcawg/). By project: `project_code|affected_donors|tested_donors|frequency` |
| `ICGC_PCAWG_AFFECTED_DONORS` | Number of donors with the current mutation in [ICGC|PCAWG](http://docs.icgc.org/pcawg/) |
| `ICGC_PCAWG_OCCURRENCE` | Mutation occurrence in [ICGC-PCAWG](http://docs.icgc.org/pcawg/). By project: `project_code|tumor_type|affected_donors|tested_donors|frequency` |
| `ICGC_PCAWG_AFFECTED_DONORS` (**?**) | Number of donors with the current mutation in [ICGC-PCAWG](http://docs.icgc.org/pcawg/) |

##### _Clinical associations_

Expand Down

0 comments on commit 79e27ee

Please sign in to comment.