From 05ed18568ca9744c189a6bd48d895d9db1b82697 Mon Sep 17 00:00:00 2001
From: Sigve Nakken <sigven@ifi.uio.no>
Date: Sun, 18 Feb 2024 21:38:23 +0100
Subject: [PATCH] simplified tier assignment

---
 pcgr/arg_checker.py                           |    8 +-
 pcgr/biomarker.py                             |   28 +-
 pcgr/cna.py                                   |   16 +-
 pcgr/config.py                                |    2 +-
 pcgr/cpsr.py                                  |    4 +-
 pcgr/main.py                                  |    5 +-
 pcgr/pcgr_vars.py                             |    4 +-
 pcgrr/DESCRIPTION                             |    4 +-
 pcgrr/NAMESPACE                               |    7 +-
 pcgrr/R/acmg.R                                |  309 ++-
 pcgrr/R/biomarkers.R                          |   93 +-
 pcgrr/R/germline.R                            |  355 ++--
 pcgrr/R/input_data.R                          |  321 ++-
 pcgrr/R/kataegis.R                            |    5 +-
 pcgrr/R/main.R                                |   79 +-
 pcgrr/R/main2.R                               | 1801 +++++++++++++++++
 pcgrr/R/msi.R                                 |  216 +-
 pcgrr/R/mutation.R                            |   92 -
 pcgrr/R/mutational_burden.R                   |    4 +-
 pcgrr/R/mutational_signatures.R               |  327 +--
 pcgrr/R/reference_data.R                      |   84 +-
 pcgrr/R/report.R                              |  132 +-
 pcgrr/R/utils.R                               |  150 +-
 pcgrr/R/validate.R                            |   10 -
 pcgrr/R/value_boxes.R                         |    4 +-
 pcgrr/data-raw/data-raw.R                     |  196 ++
 pcgrr/data/cancer_phenotypes_regex.rda        |  Bin 539 -> 540 bytes
 pcgrr/data/color_palette.rda                  |  Bin 686 -> 686 bytes
 pcgrr/data/data_coltype_defs.rda              |  Bin 1898 -> 1922 bytes
 pcgrr/data/effect_prediction_algos.rda        |  Bin 1015 -> 1006 bytes
 pcgrr/data/evidence_levels.rda                |  Bin 110 -> 110 bytes
 pcgrr/data/evidence_types.rda                 |  Bin 138 -> 139 bytes
 pcgrr/data/tcga_cohorts.rda                   |  Bin 767 -> 762 bytes
 pcgrr/data/variant_db_url.rda                 |  Bin 549 -> 544 bytes
 .../pcgr_flexdb/flexdb_scna_tier1.Rmd         |   12 +-
 .../pcgr_flexdb/flexdb_scna_tier2.Rmd         |   12 +-
 .../pcgr_flexdb/flexdb_snv_tier1.Rmd          |   18 +-
 .../pcgr_flexdb/flexdb_snv_tier2.Rmd          |   18 +-
 .../pcgr_rmarkdown/cna_biomarkers.Rmd         |   16 +-
 .../templates/pcgr_rmarkdown/snv_tier1.Rmd    |   14 +-
 .../templates/pcgr_rmarkdown/snv_tier2.Rmd    |   10 +-
 pcgrr/man/append_cancer_gene_evidence.Rd      |   11 +-
 pcgrr/man/assign_acmg_tiers.Rd                |   29 +
 pcgrr/man/assign_germline_popfreq_status.Rd   |    2 +-
 pcgrr/man/assign_somatic_classification.Rd    |    6 +-
 pcgrr/man/assign_somatic_germline_evidence.Rd |    4 +-
 pcgrr/man/generate_pcgr_report2.Rd            |   14 +
 pcgrr/man/generate_report_data_msi.Rd         |   10 +-
 .../man/generate_report_data_signatures_mp.Rd |   20 +-
 pcgrr/man/generate_report_data_snv_indel.Rd   |   20 +-
 pcgrr/man/generate_report_data_snv_indel2.Rd  |   25 +
 pcgrr/man/generate_report_data_tumor_only.Rd  |   11 +-
 pcgrr/man/generate_tier_tsv.Rd                |  324 ++-
 pcgrr/man/get_population_tag.Rd               |   23 -
 pcgrr/man/get_prevalent_site_signatures.Rd    |    8 +-
 pcgrr/man/get_proper_maf_alleles.Rd           |   23 -
 pcgrr/man/init_kataegis_content.Rd            |   11 +
 pcgrr/man/init_msi_content.Rd                 |   11 +
 pcgrr/man/load_dna_variants.Rd                |   14 +-
 pcgrr/man/load_somatic_cna.Rd                 |    4 +-
 pcgrr/man/make_upset_plot_data.Rd             |    5 +-
 pcgrr/man/predict_msi_status.Rd               |    8 +-
 pcgrr/man/write_report_output.Rd              |   12 +-
 scripts/pcgrr.R                               |   34 +-
 64 files changed, 3932 insertions(+), 1053 deletions(-)
 create mode 100644 pcgrr/R/main2.R
 delete mode 100644 pcgrr/R/validate.R
 create mode 100644 pcgrr/man/assign_acmg_tiers.Rd
 create mode 100644 pcgrr/man/generate_pcgr_report2.Rd
 create mode 100644 pcgrr/man/generate_report_data_snv_indel2.Rd
 delete mode 100644 pcgrr/man/get_population_tag.Rd
 delete mode 100644 pcgrr/man/get_proper_maf_alleles.Rd
 create mode 100644 pcgrr/man/init_kataegis_content.Rd
 create mode 100644 pcgrr/man/init_msi_content.Rd

diff --git a/pcgr/arg_checker.py b/pcgr/arg_checker.py
index af13ad8c..bf20378e 100644
--- a/pcgr/arg_checker.py
+++ b/pcgr/arg_checker.py
@@ -106,12 +106,12 @@ def check_args(arg_dict):
 
     # if assay is targeted or mode is Tumor-Only, MSI prediction will not be performed/switched off
     assay_type = 'Tumor-Control'
-    if arg_dict['estimate_msi_status'] is True and (arg_dict['assay'] == 'TARGETED' or arg_dict['tumor_only'] is True):
+    if arg_dict['estimate_msi'] is True and (arg_dict['assay'] == 'TARGETED' or arg_dict['tumor_only'] is True):
         if arg_dict['tumor_only'] is True:
             assay_type = 'Tumor-Only'
         warn_msg = f"MSI status prediction can be applied for WGS/WES tumor-control assays only (query type: {arg_dict['assay']}|{assay_type}) - analysis will be omitted"
         warn_message(warn_msg, logger)
-        arg_dict['estimate_msi_status'] = 0
+        arg_dict['estimate_msi'] = 0
 
     # minimum number of mutations required for mutational signature reconstruction cannot be less than 100 (somewhat arbitrary lower threshold, recommended value is 200)
     if int(arg_dict['min_mutations_signatures']) < int(pcgr_vars.RECOMMENDED_N_MUT_SIGNATURE):
@@ -124,8 +124,8 @@ def check_args(arg_dict):
             error_message(err_msg, logger)
 
     # if MSI status is to be estimated, mutational burden must be turned on
-    if arg_dict['estimate_msi_status'] is True and arg_dict['estimate_tmb'] is False:
-        err_msg = "Prediction of MSI status ('--estimate_msi_status') requires mutational burden analysis ('--estimate_tmb')"
+    if arg_dict['estimate_msi'] is True and arg_dict['estimate_tmb'] is False:
+        err_msg = "Prediction of MSI status ('--estimate_msi') requires mutational burden analysis ('--estimate_tmb')"
         error_message(err_msg, logger)
 
     if arg_dict['tumor_only'] is True:
diff --git a/pcgr/biomarker.py b/pcgr/biomarker.py
index 01c627a1..31e3ea02 100644
--- a/pcgr/biomarker.py
+++ b/pcgr/biomarker.py
@@ -21,7 +21,7 @@ def load_biomarkers(logger, biomarker_variant_fname, biomarker_clinical_fname, b
 
    Returns:
    - variant_biomarkers: A dictionary containing variant biomarkers. The keys are variant alias types 
-     ('dbsnp', 'hgvsp', 'hgvsc', 'genomic', 'exon', 'other', 'aa_region'), and the values are 
+     ('dbsnp', 'hgvsp', 'hgvsc', 'genomic', 'exon', 'other_gene', 'aa_region'), and the values are 
      dictionaries containing variant information.
 
    Note:
@@ -33,7 +33,7 @@ def load_biomarkers(logger, biomarker_variant_fname, biomarker_clinical_fname, b
    """
 
    variant_biomarkers = {} ##dictionary to return
-   for variant_alias_type in ['dbsnp','hgvsp','hgvsc','genomic','exon','other','aa_region']:
+   for variant_alias_type in ['dbsnp','hgvsp','hgvsc','genomic','exon','other_gene','aa_region']:
       variant_biomarkers[variant_alias_type] = {}
    check_file_exists(biomarker_clinical_fname, logger)
    
@@ -92,12 +92,12 @@ def load_biomarkers(logger, biomarker_variant_fname, biomarker_clinical_fname, b
                entry_alias_type = str(row['alias_type']).replace("_grch37", "")
                entry_alias_type = entry_alias_type.replace("_grch38", "")
               
-               if entry_alias_type == "other":
+               if entry_alias_type == "other_gene":
                   if bool(re.search(r'^((ACTIVATING )?MUTATION|LOSS|START LOSS)$', row['variant_alias'])) is True:
                      varkey = str(row['entrezgene'])
-                     if not varkey in variant_biomarkers['other']:
-                        variant_biomarkers['other'][varkey] = []
-                     variant_biomarkers['other'][varkey].append(row)
+                     if not varkey in variant_biomarkers['other_gene']:
+                        variant_biomarkers['other_gene'][varkey] = []
+                     variant_biomarkers['other_gene'][varkey].append(row)
 
                if entry_alias_type == 'exon':
                   exons = row['variant_exon']
@@ -131,20 +131,18 @@ def load_biomarkers(logger, biomarker_variant_fname, biomarker_clinical_fname, b
                if biomarker_vartype == 'CNA' and (row['alteration_type'].startswith('CNA')):
                   row['clinical_evidence_items'] = '.'
                   if row['variant_id'] in variant_to_clinical_evidence.keys():
-                     row['clinical_evidence_items'] = variant_to_clinical_evidence[row['variant_id']]                 
-                  entry_alias_type = str(row['alias_type']).replace("_grch37", "")
-                  entry_alias_type = entry_alias_type.replace("_grch38", "")
+                     row['clinical_evidence_items'] = variant_to_clinical_evidence[row['variant_id']]                                 
                   
-                  if entry_alias_type == "other":
+                  if row['alias_type'] == "other_gene":
                      if bool(re.search(r'^(AMPLIFICATION|DELETION)$', row['variant_alias'])) is True:
                         varkey = str(row['entrezgene']) + "_" + \
                            re.sub(r"transcript_","",str(row['variant_consequence']))
-                        if not varkey in variant_biomarkers['other']:
-                           variant_biomarkers['other'][varkey] = []
+                        if not varkey in variant_biomarkers['other_gene']:
+                           variant_biomarkers['other_gene'][varkey] = []
                         del row['variant_exon']
                         del row['gene']
                         del row['alias_type']
-                        variant_biomarkers['other'][varkey].append(row)
+                        variant_biomarkers['other_gene'][varkey].append(row)
 
                
 
@@ -327,8 +325,8 @@ def match_csq_biomarker(transcript_csq_elements, variant_biomarkers, rec, princi
 
       ## Match biomarkers indicated by gene only - "gene level" resolution
       if entrezgene != "." and principal_csq_entrezgene is True:
-         if str(entrezgene) in variant_biomarkers['other'].keys():
-            hits_gene = variant_biomarkers['other'][str(entrezgene)]
+         if str(entrezgene) in variant_biomarkers['other_gene'].keys():
+            hits_gene = variant_biomarkers['other_gene'][str(entrezgene)]
             for ghit in hits_gene:
                bkey3 = f"{ghit['biomarker_source']}|{ghit['variant_id']}|{ghit['clinical_evidence_items']}"
                ## match biomarkers annotated as "Mutation" only for a given gene - 
diff --git a/pcgr/cna.py b/pcgr/cna.py
index 478deafd..fc63b332 100644
--- a/pcgr/cna.py
+++ b/pcgr/cna.py
@@ -10,7 +10,7 @@
 from pcgr import utils
 from pybedtools import BedTool
 from pcgr.annoutils import nuclear_chromosomes
-from pcgr.utils import error_message, warn_message, check_file_exists
+from pcgr.utils import error_message, warn_message, check_file_exists, remove_file
 from pcgr.biomarker import load_biomarkers
 
 def annotate_cna_segments(output_fname: str, 
@@ -129,8 +129,8 @@ def annotate_cna_segments(output_fname: str,
         biomarkers[db] = load_biomarkers(
             logger, variant_fname, clinical_fname, biomarker_vartype = 'CNA')
         
-        for key in biomarkers[db]['other']:
-            biomarker_data = biomarkers[db]['other'][key]
+        for key in biomarkers[db]['other_gene']:
+            biomarker_data = biomarkers[db]['other_gene'][key]
             biomarker_item = str(db) + '|' + str(biomarker_data[0]['variant_id']) + \
                     '|' + str(biomarker_data[0]['clinical_evidence_items']) + '|by_cna_segment'
             if not key in cna_actionable_dict:               
@@ -154,6 +154,10 @@ def annotate_cna_segments(output_fname: str,
     cna_query_segment_df.loc[cna_query_segment_df['n_major'] + cna_query_segment_df['n_minor'] > 0,"loss_cond"] = False
     cna_query_segment_df.loc[cna_query_segment_df['n_major'] + cna_query_segment_df['n_minor'] == 0,"loss_cond"] = True
     
+    cna_query_segment_df['variant_class'] = 'undefined'
+    cna_query_segment_df.loc[cna_query_segment_df.amp_cond, 'variant_class'] = 'gain'
+    cna_query_segment_df.loc[cna_query_segment_df.loss_cond, 'variant_class'] = 'homdel'
+    
     cna_query_segment_df.loc[cna_query_segment_df.loss_cond, 'aberration_key'] =  \
         cna_query_segment_df.loc[cna_query_segment_df.loss_cond, 'entrezgene'].astype(str) + '_ablation'
 
@@ -165,7 +169,7 @@ def annotate_cna_segments(output_fname: str,
     
     ## remove all temporary files
     for fname in temp_files:
-        utils.remove(fname)
+        remove_file(fname)
         
     cna_query_segment_df.columns = map(str.upper, cna_query_segment_df.columns)
     cna_query_segment_df.rename(columns = {'CHROMOSOME':'CHROM','SEGMENT_ID':'VAR_ID'}, inplace = True)
@@ -253,7 +257,7 @@ def annotate_cytoband(cna_segments_bt: BedTool, output_dir: str, pcgr_build_db_d
     
     ## remove all temporary files
     for fname in temp_files:
-        utils.remove(fname)
+        remove_file(fname)
             
     return cytoband_annotated_segments
 
@@ -363,7 +367,7 @@ def annotate_transcripts(cna_segments_bt: BedTool, output_dir: str,
     
     ## remove all temporary files
     for fname in temp_files:
-        utils.remove(fname)
+        remove_file(fname)
     
     return(cna_segments_annotated)
         
diff --git a/pcgr/config.py b/pcgr/config.py
index c17e4d4d..636a6d3f 100644
--- a/pcgr/config.py
+++ b/pcgr/config.py
@@ -94,7 +94,7 @@ def create_config(arg_dict, workflow = "PCGR"):
             'exclude_nonexonic': int(arg_dict['exclude_nonexonic'])
         }
         conf_options['somatic_snv']['msi'] = {
-            'run': int(arg_dict['estimate_msi_status'])
+            'run': int(arg_dict['estimate_msi'])
         }
         conf_options['somatic_snv']['tmb'] = {
             'run': int(arg_dict['estimate_tmb']),            
diff --git a/pcgr/cpsr.py b/pcgr/cpsr.py
index ab33ef4d..07834b97 100755
--- a/pcgr/cpsr.py
+++ b/pcgr/cpsr.py
@@ -38,7 +38,7 @@ def get_args():
     optional_panel.add_argument('--panel_id',dest = "virtual_panel_id",type = str, default = "-1", help="Comma-separated string with identifier(s) of predefined virtual cancer predisposition gene panels,\nchoose any combination of the following identifiers (GEP = Genomics England PanelApp):\n" + str(pcgr_vars.panels))
     optional_panel.add_argument('--custom_list',dest = "custom_list",help="Provide custom list of genes from virtual panel 0 (single-column .txt/.tsv file with Ensembl gene identifiers),\n alternative to predefined panels provided with --panel_id)")
     optional_panel.add_argument('--custom_list_name',dest = "custom_list_name", default="None", help="Set name for custom made panel/list (single word - no whitespace), will be displayed in the report")
-    optional_panel.add_argument('--diagnostic_grade_only', action="store_true",help="For panel_id's 1-42 (Genomics England PanelApp) - consider genes with a GREEN status only, default: %(default)s")
+    optional_panel.add_argument('--diagnostic_grade_only', action="store_true",help="For panel_id's 1-44 (Genomics England PanelApp) - consider genes with a GREEN status only, default: %(default)s")
 
     optional_other.add_argument('--force_overwrite', action = "store_true", help='By default, the script will fail with an error if any output file already exists.\n You can force the overwrite of existing result files by using this flag, default: %(default)s')
     optional_other.add_argument('--version', action='version', version=str(utils.get_cpsr_version()))
@@ -204,7 +204,6 @@ def run_cpsr(conf_options, cpsr_paths):
                                       output_vcf = vep_vcf)
 
         logger = getlogger('cpsr-vep')
-        #print(str(vep_command["main"]))
 
         logger.info((
             f"CPSR - STEP 1: Basic variant annotation with Variant Effect Predictor (version {pcgr_vars.VEP_VERSION}, "
@@ -224,7 +223,6 @@ def run_cpsr(conf_options, cpsr_paths):
         check_subprocess(logger, vep_command["tabix"], debug)
         logger.info("Finished cpsr-vep")
         print('----')
-        #exit(0)
 
         ## CPSR|vcfanno - run vcfanno on query VCF with a number of relevant annotated VCFs
         logger = getlogger('cpsr-vcfanno')
diff --git a/pcgr/main.py b/pcgr/main.py
index 3e010e91..130ad3b6 100755
--- a/pcgr/main.py
+++ b/pcgr/main.py
@@ -62,10 +62,9 @@ def cli():
     optional_allelic_support.add_argument("--control_af_max", type=float, default=1, dest="control_af_max", help="If VCF INFO tag for variant allelic fraction (control) is specified and found, set maximum tolerated AF for inclusion in report (default: %(default)s)")
 
     optional_tmb_msi.add_argument("--estimate_tmb", action="store_true", help="Estimate tumor mutational burden from the total number of somatic mutations and target region size, default: %(default)s")
-    #optional_tmb_msi.add_argument("--tmb_algorithm", dest="tmb_algorithm", default="all_coding", choices=[ "all_coding", "nonsyn"], help="Method for calculation of TMB, all coding variants (Chalmers et al., Genome Medicine, 2017), or non-synonymous variants only, default: %(default)s")
     optional_tmb_msi.add_argument("--tmb_dp_min", dest="tmb_dp_min", default=0, help="If VCF INFO tag for sequencing depth (tumor) is specified and found, set minimum required sequencing depth for TMB calculation: default: %(default)s")
     optional_tmb_msi.add_argument("--tmb_af_min", dest="tmb_af_min", default=0, help="If VCF INFO tag for allelic fraction (tumor) is specified and found, set minimum required allelic fraction for TMB calculation: default: %(default)s")
-    optional_tmb_msi.add_argument("--estimate_msi_status", action="store_true", help="Predict microsatellite instability status from patterns of somatic mutations/indels, default: %(default)s")
+    optional_tmb_msi.add_argument("--estimate_msi", action="store_true", help="Predict microsatellite instability status from patterns of somatic mutations/indels, default: %(default)s")
 
 
     optional_assay.add_argument("--assay", dest="assay", default="WES", choices=[ "WGS", "WES","TARGETED"], help="Type of DNA sequencing assay performed for input data (VCF), default: %(default)s")
@@ -77,7 +76,7 @@ def cli():
     optional_signatures.add_argument("--min_mutations_signatures", type=int, default=200, dest="min_mutations_signatures", help="Minimum number of SNVs required for reconstruction of mutational signatures (SBS) by MutationalPatterns (default: %(default)s, minimum n = 100)")
     optional_signatures.add_argument("--all_reference_signatures", action="store_true", help="Use all reference mutational signatures (SBS, n = 67) in signature reconstruction rather than only those already attributed to the tumor type (default: %(default)s)")
     optional_signatures.add_argument("--include_artefact_signatures", action="store_true", help="Include sequencing artefacts in the collection of reference signatures (default: %(default)s")
-    optional_signatures.add_argument("--prevalence_reference_signatures", type=int, default=5, choices=[1,2,5,10,15,20], help="Minimum tumor-type prevalence (in percent) of reference signatures to be included in refitting procedure (default: %(default)s)")
+    optional_signatures.add_argument("--prevalence_reference_signatures", type=int, default=1, choices=[1,2,5,10,15,20], help="Minimum tumor-type prevalence (in percent) of reference signatures to be included in refitting procedure (default: %(default)s)")
 
     optional_other.add_argument("--cpsr_report", dest="cpsr_report", help="CPSR report file (Gzipped JSON - file ending with 'cpsr.<genome_assembly>.json.gz' -  germline report of patient's blood/control sample")
     optional_other.add_argument("--vcf2maf", action="store_true", help="Generate a MAF file for input VCF using https://github.com/mskcc/vcf2maf (default: %(default)s)")
diff --git a/pcgr/pcgr_vars.py b/pcgr/pcgr_vars.py
index aff86bc3..0a959d20 100644
--- a/pcgr/pcgr_vars.py
+++ b/pcgr/pcgr_vars.py
@@ -3,7 +3,7 @@
 from pcgr._version import __version__
 
 PCGR_VERSION = __version__
-DB_VERSION = '20240203'
+DB_VERSION = '20240209'
 
 ## MISCELLANEOUS
 NCBI_BUILD_MAF = 'GRCh38'
@@ -103,7 +103,7 @@
       37: "Renal cancer pertinent cancer susceptibility (GEP)",
       38: "Rhabdoid tumour predisposition (GEP)",
       39: "Sarcoma cancer susceptibility (GEP)",
-      40: "Sarcoma susceptbility (GEP)",
+      40: "Sarcoma susceptibility (GEP)",
       41: "Thyroid cancer pertinent cancer susceptibility (GEP)",
       42: "Tumour predisposition - childhood onset (GEP)",
       43: "Upper gastrointestinal cancer pertinent cancer susceptibility (GEP)",
diff --git a/pcgrr/DESCRIPTION b/pcgrr/DESCRIPTION
index d6f71612..a42c4e84 100644
--- a/pcgrr/DESCRIPTION
+++ b/pcgrr/DESCRIPTION
@@ -2,7 +2,7 @@ Package: pcgrr
 Type: Package
 Title: Personal Cancer Genome ReporteR
 Version: 1.4.1.9001
-Date: 2023-12-30
+Date: 2024-12-18
 Authors@R:
     c(person(given = "Sigve",
              family = "Nakken",
@@ -69,5 +69,5 @@ Suggests:
     BSgenome.Hsapiens.UCSC.hg38
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 Roxygen: list(markdown = TRUE)
diff --git a/pcgrr/NAMESPACE b/pcgrr/NAMESPACE
index 954a4b6a..b15fa866 100644
--- a/pcgrr/NAMESPACE
+++ b/pcgrr/NAMESPACE
@@ -9,6 +9,7 @@ export(append_gwas_citation_phenotype)
 export(append_otargets_pheno_link)
 export(append_tcga_var_link)
 export(append_tfbs_annotation)
+export(assign_acmg_tiers)
 export(assign_germline_popfreq_status)
 export(assign_mutation_type)
 export(assign_somatic_classification)
@@ -28,11 +29,13 @@ export(filter_eitems_by_site)
 export(filter_read_support)
 export(generate_annotation_link)
 export(generate_pcgr_report)
+export(generate_pcgr_report2)
 export(generate_report_data_kataegis)
 export(generate_report_data_msi)
 export(generate_report_data_rainfall)
 export(generate_report_data_signatures_mp)
 export(generate_report_data_snv_indel)
+export(generate_report_data_snv_indel2)
 export(generate_report_data_tmb)
 export(generate_report_data_trials)
 export(generate_report_data_tumor_only)
@@ -45,15 +48,15 @@ export(get_cna_overlapping_transcripts)
 export(get_genome_obj)
 export(get_oncogene_tsgene_target_sets)
 export(get_ordinary_chromosomes)
-export(get_population_tag)
 export(get_prevalent_site_signatures)
-export(get_proper_maf_alleles)
 export(get_valid_chromosomes)
 export(het_af_germline_status)
 export(hom_af_status)
 export(init_cna_content)
 export(init_germline_content)
+export(init_kataegis_content)
 export(init_m_signature_content)
+export(init_msi_content)
 export(init_rainfall_content)
 export(init_report)
 export(init_report_display_content)
diff --git a/pcgrr/R/acmg.R b/pcgrr/R/acmg.R
index 34aec75f..e3c9630a 100644
--- a/pcgrr/R/acmg.R
+++ b/pcgrr/R/acmg.R
@@ -18,8 +18,8 @@ assign_tier1_tier2_acmg <- function(pcg_report_snv_indel) {
   unique_variants_tier2 <- data.frame()
 
   ## eitems
-  eitems_specific_ttype <-
-    pcg_report_snv_indel[["clin_eitem"]][["specific_ttype"]]
+  eitems_query_ttype <-
+    pcg_report_snv_indel[["clin_eitem"]][["query_ttype"]]
   eitems_any_ttype <-
     pcg_report_snv_indel[["clin_eitem"]][["any_ttype"]]
   eitems_other_ttype <-
@@ -27,9 +27,9 @@ assign_tier1_tier2_acmg <- function(pcg_report_snv_indel) {
 
 
   for (etype in c("diagnostic", "predictive", "prognostic")) {
-    if (nrow(eitems_specific_ttype[[etype]][["A_B"]]) > 0) {
+    if (nrow(eitems_query_ttype[[etype]][["A_B"]]) > 0) {
       vars <-
-        dplyr::select(eitems_specific_ttype[[etype]][["A_B"]],
+        dplyr::select(eitems_query_ttype[[etype]][["A_B"]],
                       .data$GENOMIC_CHANGE) |>
         dplyr::distinct()
       unique_variants_tier1 <-
@@ -43,16 +43,16 @@ assign_tier1_tier2_acmg <- function(pcg_report_snv_indel) {
       eitems_other_ttype[[etype]][["A_B"]] <-
         eitems_any_ttype[[etype]][["A_B"]]
 
-      if (nrow(eitems_specific_ttype[[etype]][["A_B"]]) > 0) {
+      if (nrow(eitems_query_ttype[[etype]][["A_B"]]) > 0) {
 
         if (pcgrr::check_common_colnames(
           df1 = eitems_any_ttype[[etype]][["A_B"]],
-          df2 = eitems_specific_ttype[[etype]][["A_B"]],
+          df2 = eitems_query_ttype[[etype]][["A_B"]],
           cnames = c("GENOMIC_CHANGE"))) {
 
           eitems_other_ttype[[etype]][["A_B"]] <-
             dplyr::anti_join(eitems_any_ttype[[etype]][["A_B"]],
-                             eitems_specific_ttype[[etype]][["A_B"]],
+                             eitems_query_ttype[[etype]][["A_B"]],
                              by = c("GENOMIC_CHANGE"))
         }
       }
@@ -61,7 +61,7 @@ assign_tier1_tier2_acmg <- function(pcg_report_snv_indel) {
           if (pcgrr::check_common_colnames(
             df1 = unique_variants_tier1,
             df2 = eitems_other_ttype[[etype]][["A_B"]],
-            cnames = c("GENOMIC_CHANGE"))){
+            cnames = c("GENOMIC_CHANGE"))) {
             eitems_other_ttype[[etype]][["A_B"]] <-
               dplyr::anti_join(eitems_other_ttype[[etype]][["A_B"]],
                                unique_variants_tier1,
@@ -77,22 +77,22 @@ assign_tier1_tier2_acmg <- function(pcg_report_snv_indel) {
         }
       }
     }
-    if (nrow(eitems_specific_ttype[[etype]][["C_D_E"]]) > 0) {
+    if (nrow(eitems_query_ttype[[etype]][["C_D_E"]]) > 0) {
       if (nrow(unique_variants_tier1) > 0) {
         if (pcgrr::check_common_colnames(
           df1 = unique_variants_tier1,
-          df2 = eitems_specific_ttype[[etype]][["C_D_E"]],
+          df2 = eitems_query_ttype[[etype]][["C_D_E"]],
           cnames = c("GENOMIC_CHANGE"))) {
-          eitems_specific_ttype[[etype]][["C_D_E"]] <-
+          eitems_query_ttype[[etype]][["C_D_E"]] <-
             dplyr::anti_join(
-              eitems_specific_ttype[[etype]][["C_D_E"]],
+              eitems_query_ttype[[etype]][["C_D_E"]],
               unique_variants_tier1, by = c("GENOMIC_CHANGE"))
         }
       }
-      if (nrow(eitems_specific_ttype[[etype]][["C_D_E"]]) > 0) {
+      if (nrow(eitems_query_ttype[[etype]][["C_D_E"]]) > 0) {
         unique_variants_tier2 <- unique_variants_tier2 |>
           dplyr::bind_rows(
-            dplyr::select(eitems_specific_ttype[[etype]][["C_D_E"]],
+            dplyr::select(eitems_query_ttype[[etype]][["C_D_E"]],
                           .data$GENOMIC_CHANGE)) |>
           dplyr::distinct()
       }
@@ -103,8 +103,8 @@ assign_tier1_tier2_acmg <- function(pcg_report_snv_indel) {
     unique_variants_tier1
   pcg_report_snv_indel[["disp"]][["tier2"]] <-
     unique_variants_tier2
-  pcg_report_snv_indel[["clin_eitem"]][["specific_ttype"]] <-
-    eitems_specific_ttype
+  pcg_report_snv_indel[["clin_eitem"]][["query_ttype"]] <-
+    eitems_query_ttype
   pcg_report_snv_indel[["clin_eitem"]][["any_ttype"]] <-
     eitems_any_ttype
   pcg_report_snv_indel[["clin_eitem"]][["other_ttype"]] <-
@@ -167,18 +167,18 @@ assign_tier1_tier2_acmg_cna <- function(pcg_report_cna) {
   unique_variants_tier2 <- data.frame()
 
   ## eitems
-  eitems_specific_ttype <- pcg_report_cna[["clin_eitem"]][["specific_ttype"]]
+  eitems_query_ttype <- pcg_report_cna[["clin_eitem"]][["query_ttype"]]
   eitems_any_ttype <- pcg_report_cna[["clin_eitem"]][["any_ttype"]]
   eitems_other_ttype <- pcg_report_cna[["clin_eitem"]][["other_ttype"]]
 
   for (etype in c("diagnostic", "predictive", "prognostic")) {
-    if (nrow(eitems_specific_ttype[[etype]][["A_B"]]) > 0) {
+    if (nrow(eitems_query_ttype[[etype]][["A_B"]]) > 0) {
 
-      assertable::assert_colnames(eitems_specific_ttype[[etype]][["A_B"]],
+      assertable::assert_colnames(eitems_query_ttype[[etype]][["A_B"]],
                                   c("SYMBOL", "SEGMENT", "CNA_TYPE"),
                                   only_colnames = F, quiet = T)
 
-      vars <- dplyr::select(eitems_specific_ttype[[etype]][["A_B"]],
+      vars <- dplyr::select(eitems_query_ttype[[etype]][["A_B"]],
                             .data$SYMBOL, .data$SEGMENT, .data$CNA_TYPE) |>
         dplyr::distinct()
       unique_variants_tier1 <- rbind(unique_variants_tier1, vars) |>
@@ -191,16 +191,16 @@ assign_tier1_tier2_acmg_cna <- function(pcg_report_cna) {
       eitems_other_ttype[[etype]][["A_B"]] <-
         eitems_any_ttype[[etype]][["A_B"]]
 
-      if (nrow(eitems_specific_ttype[[etype]][["A_B"]]) > 0) {
+      if (nrow(eitems_query_ttype[[etype]][["A_B"]]) > 0) {
 
         if (pcgrr::check_common_colnames(
           df1 = eitems_any_ttype[[etype]][["A_B"]],
-          df2 = eitems_specific_ttype[[etype]][["A_B"]],
+          df2 = eitems_query_ttype[[etype]][["A_B"]],
           cnames = c("SYMBOL", "SEGMENT", "CNA_TYPE"))) {
 
           eitems_other_ttype[[etype]][["A_B"]] <-
             dplyr::anti_join(eitems_any_ttype[[etype]][["A_B"]],
-                             eitems_specific_ttype[[etype]][["A_B"]],
+                             eitems_query_ttype[[etype]][["A_B"]],
                              by = c("SYMBOL", "SEGMENT", "CNA_TYPE"))
         }
       }
@@ -230,28 +230,28 @@ assign_tier1_tier2_acmg_cna <- function(pcg_report_cna) {
         }
       }
     }
-    if (nrow(eitems_specific_ttype[[etype]][["C_D_E"]]) > 0) {
+    if (nrow(eitems_query_ttype[[etype]][["C_D_E"]]) > 0) {
       if (nrow(unique_variants_tier1) > 0) {
         if (pcgrr::check_common_colnames(
           df1 = unique_variants_tier1,
-          df2 = eitems_specific_ttype[[etype]][["C_D_E"]],
+          df2 = eitems_query_ttype[[etype]][["C_D_E"]],
           cnames = c("SYMBOL", "SEGMENT", "CNA_TYPE"))) {
-          eitems_specific_ttype[[etype]][["C_D_E"]] <-
+          eitems_query_ttype[[etype]][["C_D_E"]] <-
             dplyr::anti_join(
-              eitems_specific_ttype[[etype]][["C_D_E"]],
+              eitems_query_ttype[[etype]][["C_D_E"]],
               unique_variants_tier1,
               by = c("SYMBOL", "SEGMENT", "CNA_TYPE"))
         }
       }
-      if (nrow(eitems_specific_ttype[[etype]][["C_D_E"]]) > 0) {
+      if (nrow(eitems_query_ttype[[etype]][["C_D_E"]]) > 0) {
 
-        assertable::assert_colnames(eitems_specific_ttype[[etype]][["C_D_E"]],
+        assertable::assert_colnames(eitems_query_ttype[[etype]][["C_D_E"]],
                                     c("SYMBOL", "SEGMENT", "CNA_TYPE"),
                                     only_colnames = F, quiet = T)
 
         unique_variants_tier2 <- unique_variants_tier2 |>
           dplyr::bind_rows(
-            dplyr::select(eitems_specific_ttype[[etype]][["C_D_E"]],
+            dplyr::select(eitems_query_ttype[[etype]][["C_D_E"]],
                           .data$SYMBOL, .data$SEGMENT, .data$CNA_TYPE)) |>
           dplyr::distinct()
       }
@@ -260,10 +260,257 @@ assign_tier1_tier2_acmg_cna <- function(pcg_report_cna) {
 
   pcg_report_cna[["disp"]][["tier1"]] <- unique_variants_tier1
   pcg_report_cna[["disp"]][["tier2"]] <- unique_variants_tier2
-  pcg_report_cna[["clin_eitem"]][["specific_ttype"]] <- eitems_specific_ttype
+  pcg_report_cna[["clin_eitem"]][["query_ttype"]] <- eitems_query_ttype
   pcg_report_cna[["clin_eitem"]][["any_ttype"]] <- eitems_any_ttype
   pcg_report_cna[["clin_eitem"]][["other_ttype"]] <- eitems_other_ttype
 
   return(pcg_report_cna)
 
 }
+
+#' Function that assigns tier classifications to somatic CNA segments and
+#' SNVs/InDels, based on the presence of biomarker evidence found in
+#' the variant set
+#'
+#' @param vartype variant type ('snv_indel' or 'cna')
+#' @param primary_site primary tumor site
+#' @param variants_df data frame with variants (SNVs/InDels or CNAs)
+#' @param biomarker_items data frame with biomarker evidence items
+#'
+#' @export
+assign_acmg_tiers <- function(
+    vartype = "snv_indel",
+    primary_site = "Any",
+    variants_df = NULL,
+    biomarker_items = NULL) {
+
+  invisible(assertthat::assert_that(
+    is.data.frame(variants_df),
+    msg = paste0("Argument variants_df needs be of type data.frame")))
+  assertable::assert_colnames(
+    variants_df, c("TUMOR_SUPPRESSOR",
+                   "VAR_ID",
+                   "VARIANT_CLASS",
+                   "ONCOGENE",
+                   "ENTREZGENE"),
+    only_colnames = F, quiet = T)
+  invisible(assertthat::assert_that(
+    is.data.frame(biomarker_items),
+    msg = paste0("Argument 'biomarker_items' needs be of type data.frame")))
+  assertable::assert_colnames(
+    biomarker_items,
+    c("VAR_ID",
+      "ENTREZGENE",
+      "BM_EVIDENCE_LEVEL",
+      "BM_PRIMARY_SITE"),
+    only_colnames = F, quiet = T)
+
+  results_acmg <- list()
+  tier_classification <- data.frame()
+
+  if (NROW(biomarker_items) > 0) {
+    tier_classification <-
+      biomarker_items |>
+      #results[['biomarker_evidence']][['items']] |>
+      dplyr::select(
+        c("VAR_ID",
+          "VARIANT_CLASS",
+          "ENTREZGENE",
+          "BM_EVIDENCE_LEVEL",
+          "BM_PRIMARY_SITE")) |>
+      dplyr::distinct() |>
+      dplyr::mutate(ACMG_AMP_TIER = dplyr::case_when(
+        .data$BM_PRIMARY_SITE == primary_site &
+          primary_site != "Any" &
+          stringr::str_detect(
+            .data$BM_EVIDENCE_LEVEL, "^(A|B)"
+          ) ~ as.integer(1),
+        .data$BM_PRIMARY_SITE != primary_site &
+          #primary_site != "Any" &
+          stringr::str_detect(
+            .data$BM_EVIDENCE_LEVEL, "^(A|B)"
+          ) ~ as.integer(2),
+        .data$BM_PRIMARY_SITE == primary_site &
+          primary_site != "Any" &
+          stringr::str_detect(
+            .data$BM_EVIDENCE_LEVEL, "^(C|D|E)"
+          ) ~ as.integer(2),
+        TRUE ~ as.integer(100)
+      )) |>
+      dplyr::group_by(
+        .data$VAR_ID,
+        .data$ENTREZGENE,
+        .data$VARIANT_CLASS) |>
+        #c("VAR_ID", "ENTREZGENE", "VARIANT_CLASS")) |>
+      dplyr::summarise(
+        ACMG_AMP_TIER = min(.data$ACMG_AMP_TIER, na.rm = T),
+        .groups = "drop") |>
+      dplyr::mutate(ACMG_AMP_TIER = dplyr::if_else(
+        .data$ACMG_AMP_TIER == 100,
+        as.integer(NA),
+        as.integer(.data$ACMG_AMP_TIER)
+      ))
+
+    if (vartype == 'snv_indel' &
+       "CODING_STATUS" %in% colnames(variants_df)) {
+
+      variants_df <- variants_df |>
+        dplyr::left_join(
+          tier_classification,
+          by = c("VAR_ID","ENTREZGENE","VARIANT_CLASS")) |>
+
+        dplyr::mutate(ACMG_TIER2 = dplyr::if_else(
+          (!is.na(.data$TUMOR_SUPPRESSOR) &
+             .data$TUMOR_SUPPRESSOR == TRUE) |
+            (!is.na(.data$ONCOGENE) &
+               .data$ONCOGENE == TRUE) &
+            .data$CODING_STATUS == "coding",
+          as.integer(3),
+          as.integer(NA)
+        )) |>
+        dplyr::mutate(ACMG_TIER2 = dplyr::if_else(
+          is.na(.data$ACMG_TIER2) |
+            (!is.na(.data$ACMG_TIER2) &
+               .data$ACMG_TIER2 != 3) &
+            .data$CODING_STATUS == "coding",
+          as.integer(4),
+          as.integer(.data$ACMG_TIER2)
+        )) |>
+        dplyr::mutate(ACMG_AMP_TIER = dplyr::if_else(
+          .data$CODING_STATUS == "noncoding",
+          as.integer(5),
+          as.integer(.data$ACMG_AMP_TIER)
+        )) |>
+        dplyr::mutate(ACMG_AMP_TIER = dplyr::case_when(
+          is.na(.data$ACMG_AMP_TIER) &
+            !is.na(.data$ACMG_TIER2) ~ .data$ACMG_TIER2,
+          TRUE ~ as.integer(.data$ACMG_AMP_TIER)
+        )) |>
+        dplyr::select(-c("ACMG_TIER2")) |>
+        dplyr::arrange(.data$ACMG_AMP_TIER)
+    }else{
+
+      if (vartype == 'cna') {
+
+        variants_df <- variants_df |>
+          dplyr::left_join(
+            tier_classification,
+            by = c("VAR_ID",
+                   "ENTREZGENE",
+                   "VARIANT_CLASS")) |>
+          dplyr::mutate(ACMG_TIER2 = dplyr::if_else(
+            (!is.na(.data$TUMOR_SUPPRESSOR) &
+               .data$TUMOR_SUPPRESSOR == TRUE &
+               .data$VARIANT_CLASS == "homdel") |
+              (!is.na(.data$ONCOGENE) &
+                 .data$ONCOGENE == TRUE &
+                 .data$VARIANT_CLASS == "gain"),
+            as.integer(3),
+            as.integer(.data$ACMG_AMP_TIER)
+          )) |>
+          dplyr::mutate(ACMG_AMP_TIER = dplyr::case_when(
+            is.na(.data$ACMG_AMP_TIER) &
+              !is.na(.data$ACMG_TIER2) ~ .data$ACMG_TIER2,
+            TRUE ~ as.integer(.data$ACMG_AMP_TIER)
+          )) |>
+          dplyr::select(-c("ACMG_TIER2")) |>
+          dplyr::arrange(.data$ACMG_AMP_TIER) |>
+          dplyr::distinct()
+
+      }
+    }
+  }
+  else{
+    if (vartype == 'snv_indel') {
+      variants_df <- variants_df |>
+        dplyr::mutate(ACMG_AMP_TIER = dplyr::if_else(
+          (!is.na(.data$TUMOR_SUPPRESSOR) &
+             .data$TUMOR_SUPPRESSOR == TRUE) |
+            (!is.na(.data$ONCOGENE) &
+               .data$ONCOGENE == TRUE) &
+            .data$CODING_STATUS == "coding",
+          as.integer(3),
+          as.integer(NA)
+        )) |>
+        dplyr::mutate(ACMG_AMP_TIER = dplyr::if_else(
+          is.na(.data$ACMG_AMP_TIER) &
+            .data$CODING_STATUS == "coding",
+          as.integer(4),
+          as.integer(.data$ACMG_AMP_TIER)
+        )) |>
+        dplyr::mutate(ACMG_AMP_TIER = dplyr::if_else(
+          .data$CODING_STATUS == "noncoding",
+          as.integer(5),
+          as.integer(.data$ACMG_AMP_TIER)
+        )) |>
+        dplyr::arrange(.data$ACMG_AMP_TIER) |>
+        dplyr::distinct()
+    }
+    if (vartype == 'cna') {
+
+      variants_df <- variants_df |>
+        dplyr::mutate(ACMG_AMP_TIER = dplyr::if_else(
+          (!is.na(.data$TUMOR_SUPPRESSOR) &
+             .data$TUMOR_SUPPRESSOR == TRUE &
+             .data$VARIANT_CLASS == "homdel") |
+            (!is.na(.data$ONCOGENE) &
+               .data$ONCOGENE == TRUE &
+               .data$VARIANT_CLASS == "gain"),
+          as.integer(3),
+          as.integer(NA)
+        )) |>
+        dplyr::distinct() |>
+        dplyr::arrange(.data$ACMG_AMP_TIER)
+    }
+  }
+
+  biomarker_items <- biomarker_items |>
+    dplyr::left_join(
+      dplyr::select(
+        variants_df,
+        c("VAR_ID",
+        "ENTREZGENE",
+        "ACMG_AMP_TIER")
+      ),
+      by = c("VAR_ID","ENTREZGENE")
+    ) |>
+    dplyr::mutate(ACMG_AMP_TIER = dplyr::case_when(
+      .data$BM_PRIMARY_SITE == primary_site &
+        primary_site != "Any" &
+      as.integer(.data$ACMG_AMP_TIER) == 1 &
+        stringr::str_detect(
+          .data$BM_EVIDENCE_LEVEL,"^(C|D|E)"
+        ) ~ as.integer(NA),
+      .data$BM_PRIMARY_SITE != primary_site &
+        primary_site != "Any" &
+        .data$ACMG_AMP_TIER == 2 &
+        stringr::str_detect(
+          .data$BM_EVIDENCE_LEVEL,"^(C|D|E)"
+        ) ~ as.integer(NA),
+      .data$BM_PRIMARY_SITE != primary_site &
+        primary_site != "Any" &
+        .data$ACMG_AMP_TIER == 1 ~ as.integer(NA),
+      TRUE ~ as.integer(.data$ACMG_AMP_TIER)
+    )) |>
+    dplyr::arrange(.data$ACMG_AMP_TIER,
+                   .data$BM_EVIDENCE_LEVEL,
+                   dplyr::desc(.data$BM_RATING)) |>
+    dplyr::distinct()
+
+  results_acmg[['variant']] <- variants_df |>
+    dplyr::rename(TIER = .data$ACMG_AMP_TIER) |>
+    dplyr::mutate(TIER_GUIDELINE = "ACMG_AMP")
+
+  results_acmg[['biomarker_evidence']][['items']] <-
+    biomarker_items |>
+    dplyr::rename(TIER = .data$ACMG_AMP_TIER) |>
+    dplyr::mutate(TIER_GUIDELINE = "ACMG_AMP")
+
+  results_acmg[['biomarker_evidence']][['tier_classification']] <-
+    tier_classification |>
+    dplyr::rename(TIER = .data$ACMG_AMP_TIER) |>
+    dplyr::mutate(TIER_GUIDELINE = "ACMG_AMP")
+
+  return(results_acmg)
+
+}
diff --git a/pcgrr/R/biomarkers.R b/pcgrr/R/biomarkers.R
index 1887a793..47bd82c3 100644
--- a/pcgrr/R/biomarkers.R
+++ b/pcgrr/R/biomarkers.R
@@ -104,7 +104,7 @@ get_clin_assocs_snv_indel <- function(sample_calls,
   pcgrr::log_var_eitem_stats(var_eitems = var_eitems, target_type = "exon")
 
   ## Organize all variants in a list object 'clin_items', organized through
-  ## 1) tumor type (specific_ttype|any_ttype|other_ttype)
+  ## 1) tumor type (query_ttype|any_ttype|other_ttype)
   ## 2) evidence type (diagnostic|prognostic|predictive)
   ## 3) clinical significance ('A_B','C_D_E','any')
 
@@ -145,7 +145,7 @@ get_clin_assocs_snv_indel <- function(sample_calls,
 #'
 get_clin_assocs_cna <- function(onco_ts_sets,
                                 annotation_tags = NULL,
-                                eitems = NULL){
+                                eitems = NULL) {
 
   assertthat::assert_that(
     "oncogene_gain" %in% names(onco_ts_sets) &
@@ -193,7 +193,7 @@ get_clin_assocs_cna <- function(onco_ts_sets,
 
 
   ## Organize all variants in a list object 'clin_items', organized through
-  ## 1) tumor type (specific_ttype|any_ttype|other_ttype)
+  ## 1) tumor type (query_ttype|any_ttype|other_ttype)
   ## 2) evidence type (diagnostic|prognostic|predictive)
   ## 3) clinical significance ('A_B','C_D_E','any')
 
@@ -243,7 +243,7 @@ load_eitems <- function(eitems_raw = NULL,
                    "two values: 'Germline' or 'Somatic' and NOT: ",
                    origin)))
 
-  if(origin == "Somatic"){
+  if (origin == "Somatic") {
     invisible(
       assertthat::assert_that(
         !is.null(tumor_type_specificity),
@@ -291,7 +291,7 @@ assertthat::assert_that(
   ## mutation type and origin
   eitems_all <- data.frame()
 
-  for(alteration_type in alteration_types){
+  for(alteration_type in alteration_types) {
     eitems_alteration_type <-
       pcgrr::load_all_eitems(
         eitems_raw = eitems_raw,
@@ -397,7 +397,7 @@ load_all_eitems <- function(eitems_raw = NULL,
       only_colnames = F,
       quiet = T)
 
-    if(alteration_type == "CNA") {
+    if (alteration_type == "CNA") {
       selected_eitems[[db]] <-
         eitems_raw[[db]] |>
           dplyr::filter(.data$ALTERATION_TYPE == alteration_type &
@@ -489,7 +489,7 @@ load_all_eitems <- function(eitems_raw = NULL,
 #     dplyr::distinct()
 #
 #
-#   if(db == "cgi"){
+#   if (db == "cgi") {
 #     evidence_identifiers <- c("CGI_ID", "CGI_ID_SEGMENT")
 #     if (region_marker == T) {
 #       evidence_identifiers <- c("CGI_ID_SEGMENT", "CGI_ID")
@@ -529,7 +529,7 @@ load_all_eitems <- function(eitems_raw = NULL,
 #             cnames = c("HGVS_ALIAS", evidence_identifiers))
 #       )
 #
-#       if(NROW(var_eitems[['by_id']]) > 0){
+#       if (NROW(var_eitems[['by_id']]) > 0) {
 #         var_eitems[['all']] <- var_eitems[['by_id']]
 #       }
 #
@@ -539,11 +539,11 @@ load_all_eitems <- function(eitems_raw = NULL,
 #   ## Add additional var_eitems based on matching against
 #   ## HGVS (protein_change) + SYMBOL
 #
-#   if(region_marker == F){
+#   if (region_marker == F) {
 #     eitems_hgvs <- eitems_db |>
 #       dplyr::filter(!is.na(.data$HGVS_ALIAS))
 #
-#     if(NROW(eitems_hgvs) > 0){
+#     if (NROW(eitems_hgvs) > 0) {
 #       eitems_hgvs <- eitems_hgvs |>
 #         tidyr::separate_rows(
 #           .data$HGVS_ALIAS, sep = "\\|") |>
@@ -555,7 +555,7 @@ load_all_eitems <- function(eitems_raw = NULL,
 #         dplyr::filter(!is.na(.data$PROTEIN_CHANGE)) |>
 #         dplyr::select(dplyr::one_of(colset))
 #
-#       if(NROW(vars_hgvs_mapped) > 0){
+#       if (NROW(vars_hgvs_mapped) > 0) {
 #         var_eitems_hgvs_mapped <- as.data.frame(vars_hgvs_mapped |>
 #           dplyr::inner_join(
 #             eitems_hgvs, by = c("SYMBOL","PROTEIN_CHANGE")) |>
@@ -565,15 +565,15 @@ load_all_eitems <- function(eitems_raw = NULL,
 #
 #         ## skip duplicate evidence items already found from
 #         ## exact matching at genomic level
-#         if(NROW(var_eitems_hgvs_mapped) > 0){
-#           if(NROW(var_eitems[['by_id']]) > 0){
+#         if (NROW(var_eitems_hgvs_mapped) > 0) {
+#           if (NROW(var_eitems[['by_id']]) > 0) {
 #             var_eitems_hgvs_mapped <-
 #               var_eitems_hgvs_mapped |>
 #               dplyr::anti_join(
 #                 var_eitems[['by_id']], by = c("GENOMIC_CHANGE"))
 #           }
 #
-#           if(NROW(var_eitems_hgvs_mapped) > 0){
+#           if (NROW(var_eitems_hgvs_mapped) > 0) {
 #             var_eitems[['all']] <- var_eitems_exact |>
 #               dplyr::bind_rows(var_eitems_hgvs_mapped) |>
 #               dplyr::distinct()
@@ -591,7 +591,7 @@ load_all_eitems <- function(eitems_raw = NULL,
 #       dplyr::filter(!is.na(.data$HGVS_ALIAS)) |>
 #       dplyr::filter(BIOMARKER_MAPPING == "codon")
 #
-#     if(NROW(eitems_hgvs_codon) > 0){
+#     if (NROW(eitems_hgvs_codon) > 0) {
 #       eitems_hgvs_codon <- eitems_hgvs_codon |>
 #         tidyr::separate_rows(.data$HGVS_ALIAS, sep = "\\|") |>
 #         dplyr::filter(
@@ -615,7 +615,7 @@ load_all_eitems <- function(eitems_raw = NULL,
 #           )) |>
 #         dplyr::select(dplyr::one_of(colset))
 #
-#       if(NROW(vars_codon_mapped) > 0){
+#       if (NROW(vars_codon_mapped) > 0) {
 #         var_eitems_codon_mapped <- as.data.frame(
 #           vars_codon_mapped |>
 #             dplyr::inner_join(
@@ -627,8 +627,8 @@ load_all_eitems <- function(eitems_raw = NULL,
 #
 #         ## skip duplicate evidence items already found from
 #         ## exact matching at genomic level
-#         if(nrow(var_eitems_codon_mapped) > 0){
-#           if(NROW(var_eitems[['by_id']]) > 0){
+#         if (nrow(var_eitems_codon_mapped) > 0) {
+#           if (NROW(var_eitems[['by_id']]) > 0) {
 #             var_eitems_codon_mapped <- var_eitems_codon_mapped |>
 #               dplyr::select(-c("AA_CODON")) |>
 #               dplyr::anti_join(
@@ -719,8 +719,8 @@ qc_var_eitems <- function(var_eitems = NULL,
 
   if (nrow(filtered_var_eitems) > 0) {
 
-    if("LOSS_OF_FUNCTION" %in% colnames(filtered_var_eitems) &
-       "ALTERATION_TYPE" %in% colnames(filtered_var_eitems)){
+    if ("LOSS_OF_FUNCTION" %in% colnames(filtered_var_eitems) &
+       "ALTERATION_TYPE" %in% colnames(filtered_var_eitems)) {
 
       filtered_var_eitems <- filtered_var_eitems |>
         dplyr::filter((.data$LOSS_OF_FUNCTION == T &
@@ -877,7 +877,7 @@ structure_var_eitems <- function(var_eitems,
 #' @export
 deduplicate_eitems <- function(var_eitems = NULL,
                                target_type = "exact",
-                               target_other = c("codon","exon","gene")){
+                               target_other = c("codon","exon","gene")) {
 
   invisible(
     assertthat::assert_that(!is.null(var_eitems),
@@ -887,7 +887,7 @@ deduplicate_eitems <- function(var_eitems = NULL,
                             msg = paste0("Argument 'target_type' can only",
                                          "take on values 'codon' or 'exact'")))
 
-  if(target_type == "exact"){
+  if (target_type == "exact") {
     invisible(
       assertthat::assert_that(
         ("codon" %in% target_other &
@@ -945,7 +945,7 @@ deduplicate_eitems <- function(var_eitems = NULL,
 
 #' @export
 log_var_eitem_stats <- function(var_eitems = NULL,
-                               target_type = "exact"){
+                               target_type = "exact") {
 
   invisible(
     assertthat::assert_that(!is.null(var_eitems),
@@ -981,7 +981,7 @@ log_var_eitem_stats <- function(var_eitems = NULL,
                          var_eitems[[target_type]]$PROTEIN_CHANGE,
                          sep = ":")),
             collapse = ", ")
-    if(nchar(variants_found_log) <= 200){
+    if (nchar(variants_found_log) <= 200) {
       pcgrr::log4r_info(
         variants_found_log
       )
@@ -1001,51 +1001,62 @@ log_var_eitem_stats <- function(var_eitems = NULL,
 expand_biomarker_items <- function(
     callset = NULL,
     variant_origin = "somatic",
-    target_genes = NULL){
+    target_genes = NULL) {
 
-  if("variant" %in% names(callset) &
-     "biomarker_evidence" %in% names(callset)){
+  if ("variant" %in% names(callset) &
+     "biomarker_evidence" %in% names(callset)) {
 
     variant_properties <-
       c("VAR_ID",
         "GENOMIC_CHANGE",
         "GENOME_VERSION",
         "SAMPLE_ID",
-        "GENOTYPE",
         "VARIANT_CLASS",
         "SYMBOL",
         "GENENAME",
         "ENTREZGENE",
+        "REFSEQ_TRANSCRIPT_ID",
+        "ENSEMBL_TRANSCRIPT_ID",
+        "ENSEMBL_PROTEIN_ID",
         "CONSEQUENCE",
         "PROTEIN_CHANGE",
         "MUTATION_HOTSPOT",
+        "MUTATION_HOTSPOT_CANCERTYPE",
         "CDS_CHANGE",
         "LOSS_OF_FUNCTION",
+        "ONCOGENICITY",
+        "ONCOGENICITY_CLASSIFICATION_CODE",
+        "ONCOGENICITY_SCORE",
         "HGVSc",
         "HGVSp",
         "REFSEQ",
         "OFFICIAL_GENENAME",
+        "TARGETED_CANCER_DRUGS",
         "PREDICTED_EFFECT",
         "PROTEIN_DOMAIN",
+        "TCGA_FREQUENCY",
         "DBSNP",
         "CLINVAR",
         "COSMIC",
         "VEP_ALL_CSQ")
 
-    if(variant_origin == "germline"){
+    if (variant_origin == "germline") {
       variant_properties <- c(
         variant_properties,
+        "GENOTYPE",
         "CLINVAR_CLASSIFICATION",
         "CPSR_CLASSIFICATION"
       )
     }
-    if(variant_origin == "somatic"){
+    if (variant_origin == "somatic") {
       variant_properties <- c(
         variant_properties,
+        "CALL_CONFIDENCE",
         "DP_TUMOR",
         "AF_TUMOR",
         "DP_CONTROL",
-        "AF_CONTROL"
+        "AF_CONTROL",
+        "GENOME_VERSION"
       )
     }
 
@@ -1054,7 +1065,7 @@ expand_biomarker_items <- function(
     for (type in c(pcgrr::evidence_types,
                    "all")) {
       for (elevel in c("any", "A_B", "C_D_E")) {
-        if(NROW(callset[['biomarker_evidence']][[type]][[elevel]]) > 0){
+        if (NROW(callset[['biomarker_evidence']][[type]][[elevel]]) > 0) {
           callset[['biomarker_evidence']][[type]][[elevel]] <-
             callset[['biomarker_evidence']][[type]][[elevel]] |>
             dplyr::left_join(
@@ -1068,23 +1079,23 @@ expand_biomarker_items <- function(
               dplyr::desc(
                 .data$RATING))
 
-          if(variant_origin == "germline"){
+          if (variant_origin == "germline") {
             callset[['biomarker_evidence']][[type]][[elevel]] <-
               callset[['biomarker_evidence']][[type]][[elevel]] |>
               dplyr::filter(
-                (!is.na(CLINVAR_CLASSIFICATION) &
+                (!is.na(.data$CLINVAR_CLASSIFICATION) &
                    stringr::str_detect(
-                     tolower(CLINVAR_CLASSIFICATION), "pathogenic")) |
-                  (is.na(CLINVAR_CLASSIFICATION) &
-                     !is.na(CPSR_CLASSIFICATION) &
+                     tolower(.data$CLINVAR_CLASSIFICATION), "pathogenic")) |
+                  (is.na(.data$CLINVAR_CLASSIFICATION) &
+                     !is.na(.data$CPSR_CLASSIFICATION) &
                      stringr::str_detect(
-                       tolower(CPSR_CLASSIFICATION), "pathogenic"))
+                       tolower(.data$CPSR_CLASSIFICATION), "pathogenic"))
               )
 
-            if(NROW(callset[['biomarker_evidence']][[type]][[elevel]]) > 0 &
+            if (NROW(callset[['biomarker_evidence']][[type]][[elevel]]) > 0 &
                is.data.frame(target_genes) &
                NROW(target_genes) > 0 &
-               "ENTREZGENE" %in% colnames(target_genes)){
+               "ENTREZGENE" %in% colnames(target_genes)) {
               callset[['biomarker_evidence']][[type]][[elevel]] <-
                 callset[['biomarker_evidence']][[type]][[elevel]] |>
                 dplyr::semi_join(target_genes, by = "ENTREZGENE")
@@ -1099,3 +1110,5 @@ expand_biomarker_items <- function(
   return(callset)
 
 }
+
+#assign_classification <-
diff --git a/pcgrr/R/germline.R b/pcgrr/R/germline.R
index afec4089..72576f16 100644
--- a/pcgrr/R/germline.R
+++ b/pcgrr/R/germline.R
@@ -5,7 +5,7 @@
 #' @param sample_calls data frame with sample variant calls
 #'
 #' @export
-max_af_gnomad <- function(sample_calls){
+max_af_gnomad <- function(sample_calls) {
   invisible(
     assertthat::assert_that(is.data.frame(sample_calls),
                             msg = paste0("Argument 'sample_calls' must be of ",
@@ -23,7 +23,7 @@ max_af_gnomad <- function(sample_calls){
                    "gnomADe_OTH_AF")
   sample_calls$MAX_AF_GNOMAD <- 0
   for (c in gnomad_cols) {
-    if(c %in% colnames(sample_calls)){
+    if (c %in% colnames(sample_calls)) {
       if (nrow(
         sample_calls[!is.na(sample_calls[, c]) &
                      sample_calls[, c] > sample_calls$MAX_AF_GNOMAD, ]) > 0) {
@@ -48,7 +48,7 @@ max_af_gnomad <- function(sample_calls){
 #'
 #'
 #' @export
-clinvar_germline_status <- function(sample_calls){
+clinvar_germline_status <- function(sample_calls) {
 
   invisible(
     assertthat::assert_that(is.data.frame(sample_calls),
@@ -77,7 +77,7 @@ clinvar_germline_status <- function(sample_calls){
 #' @param sample_calls data frame with sample variant calls
 #'
 #' @export
-dbsnp_germline_status <- function(sample_calls){
+dbsnp_germline_status <- function(sample_calls) {
 
   invisible(
     assertthat::assert_that(is.data.frame(sample_calls),
@@ -87,18 +87,12 @@ dbsnp_germline_status <- function(sample_calls){
   ## assign STATUS_DBSNP_GERMLINE status to all calls recorded in
   ## dbSNP (except relevant in a somatic setting, as defined by ClinVar/DoCM)
   if ("DBSNPRSID" %in% colnames(sample_calls) &
-      "DOCM_PMID" %in% colnames(sample_calls) &
       "CLINVAR_MSID" %in% colnames(sample_calls) &
       "CLINVAR_VARIANT_ORIGIN" %in% colnames(sample_calls)) {
     sample_calls <- sample_calls |>
       dplyr::mutate(
         STATUS_DBSNP_GERMLINE =
           dplyr::if_else(!is.na(.data$DBSNPRSID), TRUE, FALSE)) |>
-      dplyr::mutate(
-        STATUS_DBSNP_GERMLINE =
-          dplyr::if_else(.data$STATUS_DBSNP_GERMLINE == T &
-                           !is.na(.data$DOCM_PMID),
-                         FALSE, .data$STATUS_DBSNP_GERMLINE)) |>
       dplyr::mutate(
         STATUS_DBSNP_GERMLINE =
           dplyr::if_else(
@@ -116,7 +110,7 @@ dbsnp_germline_status <- function(sample_calls){
 #' @param sample_calls data frame with sample variant calls
 #'
 #' @export
-tcga_somatic_status <- function(sample_calls){
+tcga_somatic_status <- function(sample_calls) {
 
   invisible(
     assertthat::assert_that(is.data.frame(sample_calls),
@@ -129,7 +123,9 @@ tcga_somatic_status <- function(sample_calls){
     sample_calls <- sample_calls |>
       dplyr::mutate(
         STATUS_TCGA_SOMATIC =
-          dplyr::if_else(!is.na(.data$TCGA_PANCANCER_COUNT), TRUE, FALSE))
+          dplyr::if_else(
+            !is.na(.data$TCGA_PANCANCER_COUNT),
+            TRUE, FALSE))
   }
   return(sample_calls)
 
@@ -141,7 +137,7 @@ tcga_somatic_status <- function(sample_calls){
 #' @param sample_calls data frame with sample variant calls
 #'
 #' @export
-cosmic_somatic_status <- function(sample_calls){
+cosmic_somatic_status <- function(sample_calls) {
 
   invisible(
     assertthat::assert_that(is.data.frame(sample_calls),
@@ -154,7 +150,9 @@ cosmic_somatic_status <- function(sample_calls){
     sample_calls <- sample_calls |>
       dplyr::mutate(
         STATUS_COSMIC =
-          dplyr::if_else(!is.na(.data$COSMIC_MUTATION_ID), TRUE, FALSE))
+          dplyr::if_else(
+            !is.na(.data$COSMIC_MUTATION_ID),
+            TRUE, FALSE))
   }
   return(sample_calls)
 
@@ -166,7 +164,7 @@ cosmic_somatic_status <- function(sample_calls){
 #' @param sample_calls data frame with sample variant calls
 #'
 #' @export
-hom_af_status <- function(sample_calls){
+hom_af_status <- function(sample_calls) {
 
 
   invisible(
@@ -180,8 +178,11 @@ hom_af_status <- function(sample_calls){
     sample_calls <- sample_calls |>
       dplyr::mutate(
         STATUS_LIKELY_GERMLINE_HOMOZYGOUS =
-          dplyr::if_else(!is.na(.data$AF_TUMOR) & .data$AF_TUMOR == 1,
-                         TRUE, FALSE))
+          dplyr::if_else(
+            !is.na(.data$AF_TUMOR) &
+              .data$AF_TUMOR == 1,
+            TRUE,
+            FALSE))
   }
   return(sample_calls)
 }
@@ -193,7 +194,7 @@ hom_af_status <- function(sample_calls){
 #' @param sample_calls data frame with sample variant calls
 #'
 #' @export
-pon_status <- function(sample_calls){
+pon_status <- function(sample_calls) {
 
   invisible(
     assertthat::assert_that(is.data.frame(sample_calls),
@@ -206,8 +207,10 @@ pon_status <- function(sample_calls){
     sample_calls <- sample_calls |>
       dplyr::mutate(
         STATUS_PON =
-          dplyr::if_else(.data$PANEL_OF_NORMALS == TRUE,
-                         TRUE, FALSE))
+          dplyr::if_else(
+            .data$PANEL_OF_NORMALS == TRUE,
+            TRUE,
+            FALSE))
   }
   return(sample_calls)
 }
@@ -219,7 +222,7 @@ pon_status <- function(sample_calls){
 #' @param sample_calls data frame with sample variant calls
 #'
 #' @export
-het_af_germline_status <- function(sample_calls){
+het_af_germline_status <- function(sample_calls) {
 
   invisible(
     assertthat::assert_that(is.data.frame(sample_calls),
@@ -238,12 +241,13 @@ het_af_germline_status <- function(sample_calls){
     sample_calls <- sample_calls |>
       dplyr::mutate(
         STATUS_LIKELY_GERMLINE_HETEROZYGOUS =
-          dplyr::if_else(!is.na(.data$MAX_AF_GNOMAD) &
-                           .data$STATUS_DBSNP_GERMLINE == TRUE &
-                           !is.na(.data$AF_TUMOR) &
-                           .data$AF_TUMOR >= 0.40 & .data$AF_TUMOR <= 0.60 &
-                           .data$STATUS_TCGA_SOMATIC == FALSE &
-                           .data$STATUS_COSMIC == FALSE, TRUE, FALSE))
+          dplyr::if_else(
+            !is.na(.data$MAX_AF_GNOMAD) &
+              .data$STATUS_DBSNP_GERMLINE == TRUE &
+              !is.na(.data$AF_TUMOR) &
+              .data$AF_TUMOR >= 0.40 & .data$AF_TUMOR <= 0.60 &
+              .data$STATUS_TCGA_SOMATIC == FALSE &
+              .data$STATUS_COSMIC == FALSE, TRUE, FALSE))
   }
   return(sample_calls)
 }
@@ -253,14 +257,14 @@ het_af_germline_status <- function(sample_calls){
 #' based on evidence found in variant set,
 #' potentially limited by user-defined options
 #'
-#' @param sample_calls data frame with variants
-#' @param config configuration object
+#' @param sample_calls data frame with putative somatic variants
+#' @param settings PCGR configuration settings
 #'
 #' @return sample_calls
 #'
 #' @export
 
-assign_somatic_classification <- function(sample_calls, config) {
+assign_somatic_classification <- function(sample_calls, settings) {
 
   sample_calls$SOMATIC_CLASSIFICATION <- "SOMATIC"
 
@@ -285,7 +289,7 @@ assign_somatic_classification <- function(sample_calls, config) {
   sample_calls <- sample_calls |>
     dplyr::mutate(
       SOMATIC_CLASSIFICATION =
-        dplyr::if_else(.data$STATUS_POPFREQ_GNOMAD_ABOVE_TOLERATED == TRUE &
+        dplyr::if_else(.data$gnomADe_AF_ABOVE_TOLERATED == TRUE &
                          .data$SOMATIC_CLASSIFICATION == "SOMATIC",
                        "GERMLINE_GNOMAD", .data$SOMATIC_CLASSIFICATION)) |>
     dplyr::mutate(
@@ -296,7 +300,7 @@ assign_somatic_classification <- function(sample_calls, config) {
     dplyr::mutate(
       SOMATIC_CLASSIFICATION =
         dplyr::if_else(.data$STATUS_PON == TRUE &
-                         config[["tumor_only"]][["exclude_pon"]] == TRUE &
+                         settings$conf$somatic_snv[["exclude_pon"]] == TRUE &
                          .data$SOMATIC_CLASSIFICATION == "SOMATIC",
                        "GERMLINE_PON", .data$SOMATIC_CLASSIFICATION)) |>
 
@@ -304,28 +308,31 @@ assign_somatic_classification <- function(sample_calls, config) {
       SOMATIC_CLASSIFICATION =
         dplyr::if_else(
           .data$STATUS_LIKELY_GERMLINE_HOMOZYGOUS == TRUE &
-            config[["tumor_only"]][["exclude_likely_hom_germline"]] == TRUE &
+            settings$conf$somatic_snv[["exclude_likely_hom_germline"]] == TRUE &
             .data$SOMATIC_CLASSIFICATION == "SOMATIC",
-          "GERMLINE_HOMOZYGOUS", .data$SOMATIC_CLASSIFICATION)) |>
+          "GERMLINE_HOMOZYGOUS",
+          .data$SOMATIC_CLASSIFICATION)) |>
     dplyr::mutate(
       SOMATIC_CLASSIFICATION =
         dplyr::if_else(
           .data$STATUS_LIKELY_GERMLINE_HETEROZYGOUS == TRUE &
-            config[["tumor_only"]][["exclude_likely_het_germline"]] == TRUE &
+            settings$conf$somatic_snv[["exclude_likely_het_germline"]] == TRUE &
             .data$SOMATIC_CLASSIFICATION == "SOMATIC",
-          "GERMLINE_HETEROZYGOUS", .data$SOMATIC_CLASSIFICATION))
+          "GERMLINE_HETEROZYGOUS",
+          .data$SOMATIC_CLASSIFICATION))
 
   ## set variants found in DBSNP as germline if this option is set to TRUE
-  if (config[["tumor_only"]][["exclude_dbsnp_nonsomatic"]] == TRUE) {
+  if (settings$conf$somatic_snv[["exclude_dbsnp_nonsomatic"]] == TRUE) {
 
     sample_calls <- sample_calls |>
       dplyr::mutate(
         SOMATIC_CLASSIFICATION =
-          dplyr::if_else(.data$STATUS_DBSNP_GERMLINE == TRUE &
-                           .data$STATUS_TCGA_SOMATIC == FALSE &
-                           .data$STATUS_COSMIC == FALSE &
-                           .data$SOMATIC_CLASSIFICATION == "SOMATIC",
-                         "GERMLINE_DBSNP", .data$SOMATIC_CLASSIFICATION))
+          dplyr::if_else(
+            .data$STATUS_DBSNP_GERMLINE == TRUE &
+              .data$STATUS_TCGA_SOMATIC == FALSE &
+              .data$STATUS_COSMIC == FALSE &
+              .data$SOMATIC_CLASSIFICATION == "SOMATIC",
+            "GERMLINE_DBSNP", .data$SOMATIC_CLASSIFICATION))
 
   }
 
@@ -336,13 +343,15 @@ assign_somatic_classification <- function(sample_calls, config) {
 #' evidence for somatic/germline status of variants
 #'
 #' @param sample_calls data frame with variants
-#' @param config configuration object
+#' @param settings PCGR configuration settings
 #'
 #' @return sample_calls
 #'
 #' @export
 
-assign_somatic_germline_evidence <- function(sample_calls, config) {
+assign_somatic_germline_evidence <- function(
+    sample_calls,
+    settings = NULL) {
 
   invisible(
     assertthat::assert_that(
@@ -360,7 +369,7 @@ assign_somatic_germline_evidence <- function(sample_calls, config) {
         pop = pop,
         dbquery = "gnomADe",
         max_tolerated_af =
-          config[["tumor_only"]][[paste0("maf_gnomad_", tolower(pop))]])
+          settings$conf$somatic_snv$tumor_only[[paste0("maf_gnomad_", tolower(pop))]])
   }
 
   sample_calls <- sample_calls |>
@@ -389,26 +398,26 @@ assign_somatic_germline_evidence <- function(sample_calls, config) {
 #'
 #' @export
 assign_germline_popfreq_status <- function(sample_calls,
-                                           pop = "EUR",
+                                           pop = "NFE",
                                            dbquery = "gnomADe",
                                            max_tolerated_af = 0.01) {
 
 
   if (dbquery == "gnomADe") {
-    if (!("STATUS_POPFREQ_GNOMAD_ABOVE_TOLERATED" %in% colnames(sample_calls))) {
-      sample_calls$STATUS_POPFREQ_GNOMAD_ABOVE_TOLERATED <- FALSE
+    if (!("gnomADe_AF_ABOVE_TOLERATED" %in% colnames(sample_calls))) {
+      sample_calls$gnomADe_AF_ABOVE_TOLERATED <- FALSE
     }
     col <- paste0(dbquery,"_",pop, "_AF")
     if (any(grepl(paste0("^", col, "$"), names(sample_calls)))) {
 
       sample_calls$max_tolerated_af <- max_tolerated_af
 
-      if(nrow(
+      if (nrow(
         sample_calls[!is.na(sample_calls[, col]) &
-                     sample_calls[, col] > sample_calls$max_tolerated_af, ]) > 0){
+                     sample_calls[, col] > sample_calls$max_tolerated_af, ]) > 0) {
         sample_calls[!is.na(sample_calls[, col]) &
                        sample_calls[, col] > sample_calls$max_tolerated_af,
-                     "STATUS_POPFREQ_GNOMAD_ABOVE_TOLERATED"] <- TRUE
+                     "gnomADe_AF_ABOVE_TOLERATED"] <- TRUE
       }
       sample_calls$max_tolerated_af <- NULL
     }
@@ -421,132 +430,126 @@ assign_germline_popfreq_status <- function(sample_calls,
 #' Function that retrieves name of VCF INFO tag and
 #' population description for gnomad/1000G population
 #'
-#' @param population_code three-letter code
-#' @param db 1KG or GNOMAD
-#' @param subset NA or "non_cancer" (for GNOMAD)
 #'
-#' @return pop_tag_info
-#'
-#' @export
-get_population_tag <- function(population_code, db = "1KG", subset = NA) {
-  pop_tag_info <-
-    list("vcf_tag" = paste0(toupper(population_code), "_AF_", db),
-         "pop_description" = NA)
-  if (db == "GNOMAD"){
-    if(!is.na(subset)){
-      if(subset == "non_cancer"){
-        pop_tag_info <-
-          list("vcf_tag" =
-                 paste0("NON_CANCER_AF_", toupper(population_code)),
-               "pop_description" = NA)
-      }
-    }
-  }
-
-  pop_descriptions_1KG <-
-    data.frame(code = "afr",
-               pop_description = "African", stringsAsFactors = F) |>
-    rbind(data.frame(
-      code = "amr",
-      pop_description = "Admixed American", stringsAsFactors = F)) |>
-    rbind(data.frame(
-      code = "eur",
-      pop_description = "European", stringsAsFactors = F)) |>
-    rbind(data.frame(
-      code = "eas",
-      pop_description = "East Asian", stringsAsFactors = F)) |>
-    rbind(data.frame(
-      code = "sas",
-      pop_description = "South Asian", stringsAsFactors = F)) |>
-    rbind(data.frame(
-      code = "global",
-      pop_description = "global", stringsAsFactors = F))
-
-  pop_descriptions_gnomad <-
-    data.frame(code = "afr",
-               pop_description = "African", stringsAsFactors = F) |>
-    rbind(data.frame(
-      code = "amr",
-      pop_description = "Admixed American", stringsAsFactors = F)) |>
-    rbind(data.frame(
-      code = "nfe",
-      pop_description = "Non-Finnish European", stringsAsFactors = F)) |>
-    rbind(data.frame(
-      code = "fin",
-      pop_description = "Finnish", stringsAsFactors = F)) |>
-    rbind(data.frame(
-      code = "oth",
-      pop_description = "Other", stringsAsFactors = F)) |>
-    rbind(data.frame(
-      code = "asj",
-      pop_description = "Ashkenazi Jewish", stringsAsFactors = F)) |>
-    rbind(data.frame(
-      code = "eas",
-      pop_description = "East Asian", stringsAsFactors = F)) |>
-    rbind(data.frame(
-      code = "sas",
-      pop_description = "South Asian", stringsAsFactors = F)) |>
-    rbind(data.frame(
-      code = "global",
-      pop_description = "global", stringsAsFactors = F))
-
-  pop_descriptions_gnomad_non_cancer <-
-    data.frame(code = "afr",
-               pop_description = "African non-cancer subset",
-               stringsAsFactors = F) |>
-    rbind(data.frame(
-      code = "amr",
-      pop_description = "Admixed American non-cancer subset",
-      stringsAsFactors = F)) |>
-    rbind(data.frame(
-      code = "nfe",
-      pop_description = "Non-Finnish European non-cancer subset",
-      stringsAsFactors = F)) |>
-    rbind(data.frame(
-      code = "fin",
-      pop_description = "Finnish non-cancer subset",
-      stringsAsFactors = F)) |>
-    rbind(data.frame(
-      code = "oth",
-      pop_description = "Other non-cancer subset",
-      stringsAsFactors = F)) |>
-    rbind(data.frame(
-      code = "asj",
-      pop_description = "Ashkenazi Jewish non-cancer subset",
-      stringsAsFactors = F)) |>
-    rbind(data.frame(
-      code = "eas",
-      pop_description = "East Asian non-cancer subset",
-      stringsAsFactors = F)) |>
-    rbind(data.frame(
-      code = "sas",
-      pop_description = "South Asian non-cancer subset",
-      stringsAsFactors = F)) |>
-    rbind(data.frame(
-      code = "global",
-      pop_description = "Global non-cancer subset",
-      stringsAsFactors = F))
-
-  if (db == "1KG") {
-    pop_entry <- dplyr::filter(pop_descriptions_1KG,
-                               .data$code == population_code)
-    pop_tag_info[["pop_description"]] <- pop_entry$pop_description
-  }
-  if (db == "GNOMAD") {
-    pop_entry <- dplyr::filter(pop_descriptions_gnomad,
-                               .data$code == tolower(population_code))
-    pop_tag_info[["pop_description"]] <- pop_entry$pop_description
-    if(!is.na(subset)){
-      if (subset == "non_cancer") {
-        pop_entry <- dplyr::filter(pop_descriptions_gnomad_non_cancer,
-                                   .data$code == tolower(population_code))
-        pop_tag_info[["pop_description"]] <- pop_entry$pop_description
-      }
-    }
-
-  }
-  return(pop_tag_info)
-}
+# get_population_tag <- function(population_code, db = "1KG", subset = NA) {
+#   pop_tag_info <-
+#     list("vcf_tag" = paste0(toupper(population_code), "_AF_", db),
+#          "pop_description" = NA)
+#   if (db == "GNOMAD") {
+#     if (!is.na(subset)) {
+#       if (subset == "non_cancer") {
+#         pop_tag_info <-
+#           list("vcf_tag" =
+#                  paste0("NON_CANCER_AF_", toupper(population_code)),
+#                "pop_description" = NA)
+#       }
+#     }
+#   }
+#
+#   pop_descriptions_1KG <-
+#     data.frame(code = "afr",
+#                pop_description = "African", stringsAsFactors = F) |>
+#     rbind(data.frame(
+#       code = "amr",
+#       pop_description = "Admixed American", stringsAsFactors = F)) |>
+#     rbind(data.frame(
+#       code = "eur",
+#       pop_description = "European", stringsAsFactors = F)) |>
+#     rbind(data.frame(
+#       code = "eas",
+#       pop_description = "East Asian", stringsAsFactors = F)) |>
+#     rbind(data.frame(
+#       code = "sas",
+#       pop_description = "South Asian", stringsAsFactors = F)) |>
+#     rbind(data.frame(
+#       code = "global",
+#       pop_description = "global", stringsAsFactors = F))
+#
+#   pop_descriptions_gnomad <-
+#     data.frame(code = "afr",
+#                pop_description = "African", stringsAsFactors = F) |>
+#     rbind(data.frame(
+#       code = "amr",
+#       pop_description = "Admixed American", stringsAsFactors = F)) |>
+#     rbind(data.frame(
+#       code = "nfe",
+#       pop_description = "Non-Finnish European", stringsAsFactors = F)) |>
+#     rbind(data.frame(
+#       code = "fin",
+#       pop_description = "Finnish", stringsAsFactors = F)) |>
+#     rbind(data.frame(
+#       code = "oth",
+#       pop_description = "Other", stringsAsFactors = F)) |>
+#     rbind(data.frame(
+#       code = "asj",
+#       pop_description = "Ashkenazi Jewish", stringsAsFactors = F)) |>
+#     rbind(data.frame(
+#       code = "eas",
+#       pop_description = "East Asian", stringsAsFactors = F)) |>
+#     rbind(data.frame(
+#       code = "sas",
+#       pop_description = "South Asian", stringsAsFactors = F)) |>
+#     rbind(data.frame(
+#       code = "global",
+#       pop_description = "global", stringsAsFactors = F))
+#
+#   pop_descriptions_gnomad_non_cancer <-
+#     data.frame(code = "afr",
+#                pop_description = "African non-cancer subset",
+#                stringsAsFactors = F) |>
+#     rbind(data.frame(
+#       code = "amr",
+#       pop_description = "Admixed American non-cancer subset",
+#       stringsAsFactors = F)) |>
+#     rbind(data.frame(
+#       code = "nfe",
+#       pop_description = "Non-Finnish European non-cancer subset",
+#       stringsAsFactors = F)) |>
+#     rbind(data.frame(
+#       code = "fin",
+#       pop_description = "Finnish non-cancer subset",
+#       stringsAsFactors = F)) |>
+#     rbind(data.frame(
+#       code = "oth",
+#       pop_description = "Other non-cancer subset",
+#       stringsAsFactors = F)) |>
+#     rbind(data.frame(
+#       code = "asj",
+#       pop_description = "Ashkenazi Jewish non-cancer subset",
+#       stringsAsFactors = F)) |>
+#     rbind(data.frame(
+#       code = "eas",
+#       pop_description = "East Asian non-cancer subset",
+#       stringsAsFactors = F)) |>
+#     rbind(data.frame(
+#       code = "sas",
+#       pop_description = "South Asian non-cancer subset",
+#       stringsAsFactors = F)) |>
+#     rbind(data.frame(
+#       code = "global",
+#       pop_description = "Global non-cancer subset",
+#       stringsAsFactors = F))
+#
+#   if (db == "1KG") {
+#     pop_entry <- dplyr::filter(pop_descriptions_1KG,
+#                                .data$code == population_code)
+#     pop_tag_info[["pop_description"]] <- pop_entry$pop_description
+#   }
+#   if (db == "GNOMAD") {
+#     pop_entry <- dplyr::filter(pop_descriptions_gnomad,
+#                                .data$code == tolower(population_code))
+#     pop_tag_info[["pop_description"]] <- pop_entry$pop_description
+#     if (!is.na(subset)) {
+#       if (subset == "non_cancer") {
+#         pop_entry <- dplyr::filter(pop_descriptions_gnomad_non_cancer,
+#                                    .data$code == tolower(population_code))
+#         pop_tag_info[["pop_description"]] <- pop_entry$pop_description
+#       }
+#     }
+#
+#   }
+#   return(pop_tag_info)
+# }
 
 #' Function that makes input data for an UpSet plot
 #' (filtering/intersection results) for the somatic-germline
diff --git a/pcgrr/R/input_data.R b/pcgrr/R/input_data.R
index 399c9ae3..2f6f7b50 100644
--- a/pcgrr/R/input_data.R
+++ b/pcgrr/R/input_data.R
@@ -3,21 +3,45 @@
 #'
 #' @param fname Path to file name
 #' @param ref_data Object with reference data
+#' @param settings Object with PCGR report configuration
 #'
 #' @export
-load_somatic_cna <- function(fname, ref_data = NULL){
+load_somatic_cna <- function(
+    fname,
+    ref_data = NULL,
+    settings = NULL) {
 
   log4r_info(paste0(
     "Reading annotated molecular dataset (DNA) - somatic copy number aberrations"))
 
-  callset <- load_dna_variants(
+  callset_cna <- pcgrr::load_dna_variants(
     fname = fname,
     cols = pcgrr::data_coltype_defs$cna_somatic_raw,
     ref_data = ref_data,
+    vartype = 'cna',
+    primary_site = settings[['conf']][['sample_properties']]$site,
     retained_info_tags = "None",
     variant_origin = "Somatic")
 
-  return(callset)
+  tumor_site <-
+    settings[['conf']][['sample_properties']][['site']]
+
+  if (NROW(callset_cna$variant) > 0) {
+    callset_cna[['variant']] <- callset_cna[['variant']] |>
+      pcgrr::append_cancer_gene_evidence(
+        ref_data = ref_data,
+        site = tumor_site,
+        pos_var = 'SEGMENT_START') |>
+      pcgrr::append_drug_var_link(
+        ref_data = ref_data
+      ) |>
+      dplyr::arrange(
+        .data$TIER,
+        dplyr::desc(.data$TISSUE_ASSOC_RANK),
+        dplyr::desc(.data$GLOBAL_ASSOC_RANK))
+  }
+
+  return(callset_cna)
 
 }
 
@@ -32,19 +56,26 @@ load_somatic_cna <- function(fname, ref_data = NULL){
 load_somatic_snv_indel <- function(
     fname = NA,
     ref_data = NULL,
-    settings = NULL){
+    settings = NULL) {
 
   log4r_info(paste0(
     "Reading annotated molecular dataset (DNA) - somatic SNV/InDels"))
 
-  callset <- load_dna_variants(
+  callset <- pcgrr::load_dna_variants(
     fname = fname,
     cols = pcgrr::data_coltype_defs$snv_indel_somatic_raw,
     ref_data = ref_data,
+    vartype = 'snv_indel',
+    primary_site = settings[['conf']][['sample_properties']]$site,
     retained_info_tags =
       settings[['conf']][['other']]$retained_vcf_info_tags,
     variant_origin = "Somatic")
 
+  tumor_site <-
+    settings[['conf']][['sample_properties']][['site']]
+
+
+  callset[['variant_unfiltered']] <- data.frame()
   callset[['variant']] <- callset[['variant']] |>
     pcgrr::append_dbnsfp_var_link() |>
     pcgrr::append_dbmts_var_link() |>
@@ -52,12 +83,34 @@ load_somatic_snv_indel <- function(
     pcgrr::append_annotation_links() |>
     pcgrr::append_drug_var_link(ref_data = ref_data) |>
     pcgrr::append_tfbs_annotation() |>
-    pcgrr::append_cancer_gene_evidence(ref_data = ref_data)
+    pcgrr::append_cancer_gene_evidence(ref_data = ref_data,
+                                       site = tumor_site)
+
+  if (settings$conf$assay_properties$vcf_tumor_only == 1) {
+    callset[['variant_unfiltered']] <- callset[['variant']]
+    callset[['variant']] <- callset[['variant']] |>
+      ## assign evidence tags for germline/somatic state of variants,
+      ## partially based on user-defined criteria
+      ## (population allele frequency thresholds)
+        pcgrr::assign_somatic_germline_evidence2(
+          settings = settings) |>
+
+      ## assign somatic classification based on accumulation
+      ## of evidence tags and user-defined options
+        pcgrr::assign_somatic_classification(
+          settings = settings)
+  }
+
+  callset[['variant']] <- callset[['variant']] |>
+    dplyr::arrange(.data$TIER,
+                   dplyr::desc(.data$ONCOGENICITY_SCORE),
+                   dplyr::desc(.data$TISSUE_ASSOC_RANK),
+                   dplyr::desc(.data$GLOBAL_ASSOC_RANK))
 
-  callset <-
-    pcgrr::expand_biomarker_items(
-      callset = callset,
-      variant_origin = "somatic")
+  # callset <-
+  #   pcgrr::expand_biomarker_items(
+  #     callset = callset,
+  #     variant_origin = "somatic")
 
   return(callset)
 
@@ -65,11 +118,13 @@ load_somatic_snv_indel <- function(
 }
 
 #' Function that reads and validates CNA or SNV/InDel TSV files
-#' file from PCGR/CPSR pre-report pipeline
+#' file from PCGR/CPSR pre-report (Python) pipeline
 #'
-#' @param fname Path to raw file with DNA aberrations (PCGR/CPSR)
-#' @param cols column type definitions of input
+#' @param fname Path to raw input file with DNA aberrations (PCGR/CPSR)
+#' @param cols column type definitions of raw input file
 #' @param ref_data reference data object
+#' @param vartype type of DNA aberrations ('snv_indel','cna')
+#' @param primary_site primary site of tumor
 #' @param retained_info_tags VCF INFO tags to be retained in output (SNVs/InDels)
 #' @param variant_origin Germline/Somatic
 #'
@@ -79,8 +134,10 @@ load_dna_variants <- function(
     fname = NA,
     cols = NULL,
     ref_data = NULL,
+    vartype = 'snv_indel',
+    primary_site = "Any",
     retained_info_tags = "None",
-    variant_origin = "Somatic"){
+    variant_origin = "Somatic") {
 
   pcgrr::check_file_exists(fname)
   calls_raw <- suppressWarnings(
@@ -101,7 +158,7 @@ load_dna_variants <- function(
 
   raw_col_check <-
     tibble::has_name(calls_raw, compulsary_cols)
-  if(FALSE %in% raw_col_check){
+  if (FALSE %in% raw_col_check) {
     missing_cols <-
       compulsary_cols[!raw_col_check]
     log4r_fatal(
@@ -112,11 +169,11 @@ load_dna_variants <- function(
 
   cols_including_retained <- cols
   retained_cols <- NULL
-  if(retained_info_tags != "None"){
+  if (retained_info_tags != "None") {
     retained_cols <- stringr::str_split(
       retained_info_tags, pattern = ",")[[1]]
-    for(c in retained_cols){
-      if(c %in% colnames(calls_raw)){
+    for(c in retained_cols) {
+      if (c %in% colnames(calls_raw)) {
         col_retain <- readr::cols_only(
           !!rlang::sym(c) := readr::col_character()
         )
@@ -139,9 +196,9 @@ load_dna_variants <- function(
   )
 
   retained_cols_renamed <- c()
-  if(!is.null(retained_cols)){
-    for(c in retained_cols){
-      if(c %in% colnames(calls)){
+  if (!is.null(retained_cols)) {
+    for(c in retained_cols) {
+      if (c %in% colnames(calls)) {
         new_col <- paste0('VCF_INFO_', c)
         retained_cols_renamed <- c(
           retained_cols_renamed, new_col
@@ -155,30 +212,27 @@ load_dna_variants <- function(
   results <- list()
   results[['variant']] <- calls
   results[['biomarker_evidence']] <- list()
-  results[['biomarker_evidence']][['all']] <- list()
-  for (elevel in pcgrr::evidence_levels) {
-    results[['biomarker_evidence']][['all']][[elevel]] <- data.frame()
-  }
+  results[['biomarker_evidence']][['items']] <-
+    data.frame()
 
-  for (type in pcgrr::evidence_types) {
-    results[['biomarker_evidence']][[type]] <- list()
-    for (elevel in pcgrr::evidence_levels) {
-      results[['biomarker_evidence']][[type]][[elevel]] <- data.frame()
-    }
+  ## Rename annotations for more clarity
+  if ("TSG" %in% colnames(results[['variant']])) {
+    results[['variant']] <-
+      results[['variant']] |>
+      dplyr::rename(
+        TUMOR_SUPPRESSOR = "TSG"
+      )
   }
 
-  results[['retained_info_tags']] <- paste(
-    retained_cols_renamed, collapse=","
-  )
-
-  if("TSG" %in% colnames(results[['variant']])){
+  if ("ONCOGENICITY_CLASSIFICATION" %in% colnames(results[['variant']])) {
     results[['variant']] <-
       results[['variant']] |>
       dplyr::rename(
-        TUMOR_SUPPRESSOR = "TSG"
+        ONCOGENICITY = "ONCOGENICITY_CLASSIFICATION"
       )
   }
-  if("VEP_ALL_CSQ" %in% colnames(results[['variant']])){
+
+  if ("VEP_ALL_CSQ" %in% colnames(results[['variant']])) {
     results[['variant']] <-
       results[['variant']] |>
       dplyr::mutate(
@@ -187,14 +241,16 @@ load_dna_variants <- function(
         )
       )
   }
-  if("HGVSp_short" %in% colnames(results[['variant']])){
+
+  if ("HGVSp_short" %in% colnames(results[['variant']])) {
     results[['variant']] <-
       results[['variant']] |>
       dplyr::rename(
         HGVSP = "HGVSp_short"
       )
   }
-  if("TSG_RANK" %in% colnames(results[['variant']])){
+
+  if ("TSG_RANK" %in% colnames(results[['variant']])) {
     results[['variant']] <-
       results[['variant']] |>
       dplyr::rename(
@@ -202,39 +258,75 @@ load_dna_variants <- function(
       )
   }
 
-  if("BIOMARKER_MATCH" %in% colnames(calls) &
-     "VAR_ID" %in% colnames(calls)){
+  if (vartype == 'cna') {
 
-    biomarker_set <-
-      calls |>
-      dplyr::filter(!is.na(.data$BIOMARKER_MATCH))
-
-    citations <- as.data.frame(
-      ref_data[['biomarker']][['literature']] |>
+    results[['variant']] <-
+      results[['variant']] |>
+      dplyr::mutate(REFSEQ_TRANSCRIPT_ID = dplyr::if_else(
+        is.na(.data$REFSEQ_TRANSCRIPT_ID),
+        "",
+        as.character(.data$REFSEQ_TRANSCRIPT_ID)
+      )) |>
+      dplyr::mutate(
+        TRANSCRIPT_OVERLAP = paste(
+          .data$ENSEMBL_TRANSCRIPT_ID,
+          .data$REFSEQ_TRANSCRIPT_ID,
+          .data$TRANSCRIPT_START,
+          .data$TRANSCRIPT_END,
+          .data$TRANSCRIPT_OVERLAP_PERCENT, sep="|"
+        )) |>
       dplyr::select(
-        c("EVIDENCE_ID",
-        "LINK")
-      ) |>
-      tidyr::separate_rows(
-        .data$EVIDENCE_ID, sep=";"
-      ) |>
+        -c("ENSEMBL_TRANSCRIPT_ID",
+           "REFSEQ_TRANSCRIPT_ID",
+           "TRANSCRIPT_START",
+           "TRANSCRIPT_END")) |>
       dplyr::group_by(
-        EVIDENCE_ID
-      ) |>
+        dplyr::across(-c("TRANSCRIPT_OVERLAP",
+                         "TRANSCRIPT_OVERLAP_PERCENT"))) |>
       dplyr::summarise(
-        CITATION = paste(
-          unique(.data$LINK), collapse = ", "
-        )
+        TRANSCRIPT_OVERLAP = paste(.data$TRANSCRIPT_OVERLAP, collapse=", "),
+        MAX_TRANSCRIPT_OVERLAP_PERCENT =
+          max(.data$TRANSCRIPT_OVERLAP_PERCENT, na.rm = T),
+        .groups = "drop"
       )
-    )
 
-    if(NROW(biomarker_set) > 0){
-      results[['biomarker_evidence']][['all']][['any']] <-
+  }
+
+  if ("BIOMARKER_MATCH" %in% colnames(results[['variant']]) &
+     "VAR_ID" %in% colnames(results[['variant']])) {
+
+    biomarker_set <-
+      results[['variant']] |>
+      dplyr::filter(!is.na(.data$BIOMARKER_MATCH))
+
+    if (NROW(biomarker_set) > 0) {
+
+      citations <- as.data.frame(
+        ref_data[['biomarker']][['literature']] |>
+          dplyr::select(
+            c("EVIDENCE_ID",
+              "LINK")
+          ) |>
+          tidyr::separate_rows(
+            c("EVIDENCE_ID"),
+            sep=";"
+          ) |>
+          dplyr::group_by(
+            EVIDENCE_ID
+          ) |>
+          dplyr::summarise(
+            CITATION = paste(
+              unique(.data$LINK), collapse = ", "
+            )
+          )
+      )
+      results[['biomarker_evidence']][['items']] <-
         as.data.frame(
           biomarker_set |>
             dplyr::select(
               c("VAR_ID",
-              "BIOMARKER_MATCH"),
+                "VARIANT_CLASS",
+                "BIOMARKER_MATCH"),
               ) |>
             dplyr::distinct() |>
             tidyr::separate_rows(
@@ -247,8 +339,15 @@ load_dna_variants <- function(
                        "BIOMARKER_MATCHTYPE"),
               sep = "\\|"
             ) |>
-            dplyr::rename(BIOMARKER_MATCH = BIOMARKER_MATCHTYPE) |>
+            dplyr::mutate(VARIANT_ID = as.character(.data$VARIANT_ID)) |>
+            dplyr::left_join(
+              dplyr::select(
+                ref_data[['biomarker']][['variant']],
+                c("VARIANT_ID", "ENTREZGENE","BIOMARKER_SOURCE")),
+              by = c("VARIANT_ID","BIOMARKER_SOURCE")) |>
+            dplyr::rename(BIOMARKER_MATCH = .data$BIOMARKER_MATCHTYPE) |>
             dplyr::mutate(BIOMARKER_RESOLUTION = dplyr::case_when(
+              stringr::str_detect(.data$BIOMARKER_MATCH,"by_cna_segment") ~ "gene",
               stringr::str_detect(.data$BIOMARKER_MATCH,"by_genomic_coord") ~ "genomic",
               !stringr::str_detect(.data$BIOMARKER_MATCH,"by_genomic_coord") &
                 stringr::str_detect(.data$BIOMARKER_MATCH,"by_hgvsp_principal") ~ "hgvsp",
@@ -310,54 +409,70 @@ load_dna_variants <- function(
               ), by = c("EVIDENCE_ID"),
               relationship = "many-to-many"
             ) |>
+            dplyr::rename(
+              BM_VARIANT_ID = .data$VARIANT_ID,
+              BM_EVIDENCE_ID = .data$EVIDENCE_ID,
+              BM_SOURCE = .data$BIOMARKER_SOURCE,
+              BM_RESOLUTION = .data$BIOMARKER_RESOLUTION,
+              BM_MATCH = .data$BIOMARKER_MATCH,
+              BM_PRIMARY_SITE = .data$PRIMARY_SITE,
+              BM_EVIDENCE_TYPE = .data$EVIDENCE_TYPE,
+              BM_CANCER_TYPE = .data$CANCER_TYPE,
+              BM_VARIANT_ORIGIN = .data$VARIANT_ORIGIN,
+              BM_EVIDENCE_LEVEL = .data$EVIDENCE_LEVEL,
+              BM_EVIDENCE_DESCRIPTION = .data$EVIDENCE_DESCRIPTION,
+              BM_THERAPEUTIC_CONTEXT = .data$THERAPEUTIC_CONTEXT,
+              BM_CLINICAL_SIGNIFICANCE = .data$CLINICAL_SIGNIFICANCE,
+              BM_CITATION = .data$CITATION,
+              BM_RATING = .data$RATING,
+              BM_EVIDENCE_DIRECTION = .data$EVIDENCE_DIRECTION,
+              BM_MOLECULAR_PROFILE_NAME = .data$MOLECULAR_PROFILE_NAME,
+              BM_MOLECULAR_PROFILE_TYPE = .data$MOLECULAR_PROFILE_TYPE
+            ) |>
+            dplyr::select(
+              c("VAR_ID",
+                "VARIANT_CLASS",
+                "ENTREZGENE",
+                "BM_SOURCE",
+                "BM_VARIANT_ID",
+                "BM_EVIDENCE_ID",
+                "BM_EVIDENCE_TYPE",
+                "BM_EVIDENCE_LEVEL",
+                "BM_EVIDENCE_DESCRIPTION",
+                "BM_EVIDENCE_DIRECTION",
+                "BM_CLINICAL_SIGNIFICANCE",
+                "BM_VARIANT_ORIGIN",
+                "BM_CANCER_TYPE",
+                "BM_PRIMARY_SITE",
+                "BM_MATCH",
+                "BM_RESOLUTION"),
+              dplyr::everything()
+            ) |>
+            dplyr::filter(
+                .data$BM_VARIANT_ORIGIN == variant_origin &
+                  .data$BM_MOLECULAR_PROFILE_TYPE == "Any") |>
             dplyr::distinct()
         )
+    }
 
-      if(NROW(results[['biomarker_evidence']][['all']][['any']]) > 0){
-
-        for (type in pcgrr::evidence_types) {
-          results[['biomarker_evidence']][[type]][["any"]] <-
-            results[['biomarker_evidence']][['all']][['any']] |>
-            dplyr::filter(
-              .data$VARIANT_ORIGIN == variant_origin &
-                .data$EVIDENCE_TYPE == stringr::str_to_title(type))
-          if (NROW(results[['biomarker_evidence']][[type]][["any"]]) > 0) {
-            results[['biomarker_evidence']][[type]][["A_B"]] <-
-              results[['biomarker_evidence']][[type]][["any"]] |>
-              dplyr::filter(
-                stringr::str_detect(
-                  .data$EVIDENCE_LEVEL, "^(A|B|B1|B2):"))
-
-            if (NROW(results[['biomarker_evidence']][[type]][["A_B"]]) > 0) {
-              results[['biomarker_evidence']][[type]][["A_B"]] <-
-                results[['biomarker_evidence']][[type]][["A_B"]] |>
-                dplyr::arrange(
-                  .data$EVIDENCE_LEVEL,
-                  dplyr::desc(
-                    .data$RATING))
-            }
-
-            results[['biomarker_evidence']][[type]][["C_D_E"]] <-
-              results[['biomarker_evidence']][[type]][["any"]] |>
-              dplyr::filter(
-                stringr::str_detect(
-                  .data$EVIDENCE_LEVEL, "^(C|D|E):"))
-
-            if (NROW(results[['biomarker_evidence']][[type]][["C_D_E"]]) > 0) {
-              results[['biomarker_evidence']][[type]][["C_D_E"]] <-
-                results[['biomarker_evidence']][[type]][["C_D_E"]] |>
-                dplyr::arrange(
-                  .data$EVIDENCE_LEVEL,
-                  dplyr::desc(.data$RATING))
-            }
-          }
-        }
-      }
+    if (variant_origin == "Somatic") {
+      results <- pcgrr::assign_acmg_tiers(
+        vartype = vartype,
+        variants_df = results$variant,
+        primary_site = primary_site,
+        biomarker_items =
+          results$biomarker_evidence$items
+      )
     }
+
   }else{
     log4r_fatal("Input data does not contain 'BIOMARKER_MATCH' column - fatal")
   }
 
+  results[['retained_info_tags']] <- paste(
+    retained_cols_renamed, collapse=","
+  )
+
   return(results)
 
 }
diff --git a/pcgrr/R/kataegis.R b/pcgrr/R/kataegis.R
index 6e739f3c..95ec58c9 100644
--- a/pcgrr/R/kataegis.R
+++ b/pcgrr/R/kataegis.R
@@ -205,8 +205,9 @@ generate_report_data_kataegis <- function(variant_set,
                                           sample_name = "SampleX",
                                           build = "grch37") {
 
-  pcg_report_kataegis <- pcgrr::init_report(class = "kataegis")
-  if(NROW(variant_set) == 0){
+  pcg_report_kataegis <-
+    pcgrr::init_kataegis_content()
+  if (NROW(variant_set) == 0) {
     return(pcg_report_kataegis)
   }
 
diff --git a/pcgrr/R/main.R b/pcgrr/R/main.R
index ad155b40..220b29ee 100644
--- a/pcgrr/R/main.R
+++ b/pcgrr/R/main.R
@@ -44,7 +44,7 @@ generate_pcgr_report <-
 
 
     if (!is.null(cna_segments_tsv)) {
-      if(length(cna_segments_tsv) > 0){
+      if (length(cna_segments_tsv) > 0) {
         invisible(assertthat::assert_that(
           file.exists(cna_segments_tsv),
           msg = paste0("Filename provided for argument 'cna_segments_tsv' (",
@@ -67,7 +67,7 @@ generate_pcgr_report <-
 
     }
     if (!is.null(cpsr_report_fname)) {
-      if(length(cpsr_report_fname) > 0){
+      if (length(cpsr_report_fname) > 0) {
 
         invisible(assertthat::assert_that(
           file.exists(cpsr_report_fname),
@@ -204,7 +204,7 @@ generate_pcgr_report <-
       ## Estimate contribution of mutational signatures
       if (pcg_report[["metadata"]][["config"]][["msigs"]][["run"]] == T) {
 
-        if(NROW(pcg_report$content$snv_indel$variant_set$tsv) > 0){
+        if (NROW(pcg_report$content$snv_indel$variant_set$tsv) > 0) {
           pcgrr::write_processed_vcf(
             calls = pcg_report$content$snv_indel$variant_set$tsv,
             sample_name = sample_name,
@@ -329,7 +329,7 @@ generate_pcgr_report <-
     pcg_report[["content"]][["snv_indel"]][["variant_set"]][["noncoding"]] <- NULL
     pcg_report[["content"]][["snv_indel"]][["variant_set"]][["coding"]] <- NULL
     pcg_report[["content"]][["snv_indel"]][["variant_set"]][["all"]] <- NULL
-    if(!is.null(pcg_report[["content"]][["tumor_only"]])){
+    if (!is.null(pcg_report[["content"]][["tumor_only"]])) {
       pcg_report[["content"]][["snv_indel"]][["variant_set"]][["tsv_unfiltered"]] <-
         pcg_report[["content"]][["tumor_only"]][["variant_set"]][["tsv_unfiltered"]]
       pcg_report[["content"]][["tumor_only"]][["variant_set"]][["tsv_unfiltered"]] <- NULL
@@ -350,24 +350,16 @@ generate_pcgr_report <-
 
 #' Function that generates tiered variant sets for SNVs/InDels
 #'
-#' @param sample_calls variant calls subject to mutational signature analysis
-#' @param pcgr_data object with PCGR annotation data
-#' @param sample_name sample identifier
-#' @param config Object with PCGR configuration parameters
-#' @param callset type of calls
-#' @param biomarker_mapping_stringency quality level for biomarkers
+#' @param pcg_report PCGR report object
+#' @param callset Object with input calls (CNA, SNV/InDel)
 #' @param tier_model tier model (pcgr_acmg)
 #'
 #' @return pcg_report_data data frame with all report elements
 #'
 #' @export
 generate_report_data_snv_indel <- function(
-  sample_calls,
-  pcgr_data,
-  sample_name,
-  config,
-  callset = "somatic calls",
-  biomarker_mapping_stringency = 1,
+  pcg_report = NULL,
+  callset = NULL,
   tier_model = "pcgr_acmg") {
 
   pcgrr::log4r_info("------")
@@ -375,8 +367,7 @@ generate_report_data_snv_indel <- function(
     paste0("Generating data for tiered cancer genome report - ",
            callset, " tier model '", tier_model, "'"))
 
-  pcg_report_snv_indel <- pcgrr::init_report(config = config,
-                                             class = "snv_indel")
+  pcg_report_snv_indel <- pcg_report[['content']][['snv_indel']]
   pcg_report_snv_indel[["eval"]] <- TRUE
   pcg_report_snv_indel[["variant_set"]][["all"]] <- sample_calls
 
@@ -411,10 +402,10 @@ generate_report_data_snv_indel <- function(
 
   ## remove REGULATORY_ANNOTATION from display tags
   ## if regulatory annotation is not turned on
-  if(!is.null(config)){
-    if(config[['other']][['vep_regulatory']] == FALSE){
+  if (!is.null(config)) {
+    if (config[['other']][['vep_regulatory']] == FALSE) {
       for(e in c('all','tier4_display','tier5_display',
-                 'tsv')){
+                 'tsv')) {
         annotation_tags[[e]] <-
           annotation_tags[[e]][!annotation_tags[[e]] == "REGULATORY_ANNOTATION"]
       }
@@ -469,7 +460,7 @@ generate_report_data_snv_indel <- function(
           eitems = eitems_specific_tt)
 
       ## Assign putative TIER 1 variant set
-      pcg_report_snv_indel[["clin_eitem"]][["specific_ttype"]] <-
+      pcg_report_snv_indel[["clin_eitem"]][["query_ttype"]] <-
         biomarker_hits_snv_indels_specific$clin_eitem
       pcg_report_snv_indel[["variant_set"]][["tier1"]] <-
         biomarker_hits_snv_indels_specific$variant_set
@@ -1041,7 +1032,7 @@ generate_report_data_tumor_only <-
 #'           eitems = eitems_specific_tt)
 #'
 #'       ## Assign putative TIER 1 variant set
-#'       pcg_report_cna[["clin_eitem"]][["specific_ttype"]] <-
+#'       pcg_report_cna[["clin_eitem"]][["query_ttype"]] <-
 #'         biomarker_hits_cna_specific$clin_eitem
 #'       pcg_report_cna[["variant_set"]][["tier1"]] <-
 #'         biomarker_hits_cna_specific$variant_set
@@ -1345,7 +1336,7 @@ generate_report_data_tumor_only <-
 #           eitems = eitems_specific_tt)
 #
 #       ## Assign putative TIER 1 variant set
-#       pcg_report_cna[["clin_eitem"]][["specific_ttype"]] <-
+#       pcg_report_cna[["clin_eitem"]][["query_ttype"]] <-
 #         biomarker_hits_cna_specific$clin_eitem
 #       pcg_report_cna[["variant_set"]][["tier1"]] <-
 #         biomarker_hits_cna_specific$variant_set
@@ -1538,7 +1529,7 @@ write_report_output <- function(report,
     settings[["conf"]][["visual_reporting"]][["visual_theme"]]
 
   ## Somatic/tumor report settings
-  if (tier_model == "pcgr_acmg"){
+  if (tier_model == "pcgr_acmg") {
     pcgrr_tmpl <- system.file("templates", package = "pcgrr")
 
     disclaimer <- file.path(pcgrr_tmpl, "disclaimer.md")
@@ -1563,7 +1554,7 @@ write_report_output <- function(report,
       sequencing_design <- "Tumor-Only"
       css_fname <- file.path(pcgrr_tmpl, "pcgr_flexdb_tumor_only.css")
 
-      if (flexdb == FALSE){
+      if (flexdb == FALSE) {
         css_fname <- file.path(pcgrr_tmpl, "pcgr_rmarkdown_tumor_only.css")
       }
     }
@@ -1612,7 +1603,7 @@ write_report_output <- function(report,
       toc_depth <- 3
 
       ## Ignore collapsing menu for CPSR
-      if(tier_model == 'cpsr'){
+      if (tier_model == 'cpsr') {
         toc_float <-
           list(collapsed = FALSE,
                smooth_scroll = FALSE,
@@ -1622,7 +1613,7 @@ write_report_output <- function(report,
       ## If nonfloating TOC is chosen (PCGR/CPSR), set toc_float to FALSE
       nonfloating_toc <-
         as.logical(settings[["conf"]][["visual_reporting"]][["nonfloating_toc"]])
-      if(nonfloating_toc == T){
+      if (nonfloating_toc == T) {
         toc_float <- F
       }
 
@@ -1635,7 +1626,7 @@ write_report_output <- function(report,
          "templates",
          "_header.html",
          package = "pcgrr")
-      if(tier_model == "cpsr"){
+      if (tier_model == "cpsr") {
         header <- system.file(
           "templates",
           "_header.html",
@@ -1681,28 +1672,28 @@ write_report_output <- function(report,
 
     report_strip <- report
 
-    if(tier_model != "cpsr"){
-      if(!is.null(report_strip$content$rainfall)){
+    if (tier_model != "cpsr") {
+      if (!is.null(report_strip$content$rainfall)) {
         report_strip$content$rainfall <- NULL
       }
-      if(!is.null(report_strip$content$tmb)){
+      if (!is.null(report_strip$content$tmb)) {
         report_strip$content$tmb$tcga_tmb <- NULL
       }
-      if(!is.null(report_strip$content$clinicaltrials)){
+      if (!is.null(report_strip$content$clinicaltrials)) {
         report_strip$content$clinicaltrials <- NULL
       }
-      if(!is.null(report_strip$content$msi)){
-        if(!is.null(report_strip$content$msi$prediction)){
+      if (!is.null(report_strip$content$msi)) {
+        if (!is.null(report_strip$content$msi$prediction)) {
           report_strip$content$msi$prediction$tcga_dataset <- NULL
         }
       }
 
-      if(!is.null(report_strip$content$snv_indel$disp)){
+      if (!is.null(report_strip$content$snv_indel$disp)) {
         report_strip$content$snv_indel$disp <- NULL
       }
 
-      if(!is.null(report_strip$content$snv_indel$variant_set)){
-        if(!is.null(report_strip$content$snv_indel$variant_set$maf)){
+      if (!is.null(report_strip$content$snv_indel$variant_set)) {
+        if (!is.null(report_strip$content$snv_indel$variant_set$maf)) {
           report_strip$content$snv_indel$variant_set$maf <- NULL
         }
       }
@@ -1730,13 +1721,13 @@ write_report_output <- function(report,
                         "AF_CONTROL",
                         "TIER")
 
-      if(!is.null(report_strip$content$snv_indel$variant_set)){
+      if (!is.null(report_strip$content$snv_indel$variant_set)) {
 
-        for(o in c('tsv')){
+        for(o in c('tsv')) {
 
-          if(!is.null(report_strip$content$snv_indel$variant_set[[o]])){
+          if (!is.null(report_strip$content$snv_indel$variant_set[[o]])) {
 
-            if(nrow(report_strip$content$snv_indel$variant_set[[o]]) == 0){
+            if (nrow(report_strip$content$snv_indel$variant_set[[o]]) == 0) {
               next
             }
             assertable::assert_colnames(
@@ -1766,7 +1757,7 @@ write_report_output <- function(report,
 
     ## NOTE: set max size of report object to 750 Mb - have not figured out
     ## what the exact size should be for jsonlite::toJSON to succeed/fail
-    if(utils::object.size(report_strip) < 750000000){
+    if (utils::object.size(report_strip) < 750000000) {
 
       pcgr_json <- jsonlite::toJSON(
         report_strip, pretty = T, na = "string",
@@ -1802,7 +1793,7 @@ write_report_output <- function(report,
         file = fnames[[output_format]], sep = "\t", col.names = T,
         row.names = F, quote = F)
 
-      # if(tier_model == "pcgr_acmg"){
+      # if (tier_model == "pcgr_acmg") {
       #   pcgrr::log4r_info(
       #     paste0("Writing SNV/InDel Excel output file with ",
       #            "PCGR annotations"))
diff --git a/pcgrr/R/main2.R b/pcgrr/R/main2.R
new file mode 100644
index 00000000..12bb5db4
--- /dev/null
+++ b/pcgrr/R/main2.R
@@ -0,0 +1,1801 @@
+#' Function that generates all contents of the cancer genome report (PCGR)
+#'
+#' @param yaml_fname Name of PCGR configuration file (yaml)
+#'
+#' @export
+
+generate_pcgr_report2 <-
+  function(yaml_fname = NULL) {
+
+    invisible(assertthat::assert_that(
+      !is.null(yaml_fname),
+      msg = "Object 'yaml_fname' cannot be NULL"
+    ))
+    pcgrr::check_file_exists(yaml_fname)
+
+    pcg_report <- pcgrr::init_report(
+      yaml_fname = yaml_fname,
+      report_mode = "PCGR")
+
+    settings <- pcg_report$settings
+    ref_data <- pcg_report$ref_data
+
+    callset_snv <-
+      pcgrr::load_somatic_snv_indel(
+        fname = settings$molecular_data$fname_mut_tsv,
+        ref_data = ref_data,
+        settings = settings
+      )
+
+    callset_cna <- NULL
+    if (settings$molecular_data$fname_cna_tsv != "None") {
+      callset_cna <-
+        pcgrr::load_somatic_cna(
+          fname = settings$molecular_data$fname_cna_tsv,
+          ref_data = ref_data,
+          settings = settings
+        )
+    }
+
+
+    conf_somatic_snv <-
+      settings$conf$somatic_snv
+    conf_somatic_cna <-
+      settings$conf$somatic_cna
+    conf_other <-
+      settings$conf$other
+    assay_properties <-
+      settings$conf$assay_properties
+    sample_properties <-
+      settings$conf$sample_properties
+
+    #pcgrr::log4r_info(paste0("Initializing PCGR report - sample ", sample_name))
+    #pcgrr::log4r_info("------")
+
+    # if (!is.null(cpsr_report_fname)) {
+    #   if (length(cpsr_report_fname) > 0) {
+    #
+    #     invisible(assertthat::assert_that(
+    #       file.exists(cpsr_report_fname),
+    #       msg = paste0("Filename provided for argument 'cpsr_report' (",
+    #                    cpsr_report_fname, ") does not exist")))
+    #     invisible(assertthat::assert_that(
+    #       file.size(cpsr_report_fname) > 0,
+    #       msg = paste0("File provided for argument 'cpsr_report' (",
+    #                    cpsr_report_fname, ") has a filesize of zero")))
+    #   }
+    #
+    # }
+
+    ## Retrieve relevant clinical trials for the tumor type in question
+
+    if (as.logical(settings$conf$clinicaltrials$run) == T) {
+      # pcg_report_trials <-
+      #   pcgrr::generate_report_data_trials(
+      #     ref_data = ref_data,
+      #     settings = settings)
+      # ## Update genome report with trial data
+      # pcg_report <-
+      #   pcgrr::update_report(pcg_report, pcg_report_trials,
+      #                        a_elem = "clinicaltrials")
+    }
+
+    if (NROW(callset_snv$variant) > 0) {
+
+      ## Perform analyses in tumor-only mode
+      if (assay_properties[["vcf_tumor_only"]] == TRUE) {
+        pcg_report_tumor_only <-
+          pcgrr::generate_report_data_tumor_only(
+            sample_calls,
+            sample_name, config)
+
+        ## Generate data for SNVs/InDels
+        ## -
+        # pcg_report_snv_indel_filtered <-
+        #   pcgrr::generate_report_data_snv_indel(
+        #     pcg_report_tumor_only[["variant_set"]][["filtered"]],
+        #     pcgr_data,
+        #     sample_name,
+        #     config,
+        #     callset = "germline-filtered callset",
+        #     tier_model = tier_model)
+
+        pcg_report_tumor_only[["upset_data"]] <-
+          pcgrr::make_upset_plot_data(
+            pcg_report_tumor_only$variant_set$tsv_unfiltered, config)
+        num_upset_sources <- 0
+        for (c in colnames(pcg_report_tumor_only[["upset_data"]])) {
+          if (c != "VAR_ID") {
+            if (sum(pcg_report_tumor_only[["upset_data"]][, c]) > 0) {
+              num_upset_sources <- num_upset_sources + 1
+            }
+          }
+        }
+        if (num_upset_sources >= 2) {
+          pcg_report_tumor_only[["upset_plot_valid"]] <- TRUE
+        }
+
+        ## Update genome report with SNV/InDels (display, tiers etc)
+        pcg_report <-
+          pcgrr::update_report(pcg_report, pcg_report_snv_indel_filtered,
+                               a_elem = "snv_indel")
+        pcg_report <-
+          pcgrr::update_report(pcg_report, pcg_report_tumor_only,
+                               a_elem = "tumor_only")
+
+        ## Generate data for rainfall plot (SNVs)
+        pcg_report_rainfall <-
+          pcgrr::generate_report_data_rainfall(
+            pcg_report$content$snv_indel$variant_set$tsv,
+            build = pcg_report$metadata$genome_assembly)
+        ## Update genome report
+        pcg_report <-
+          pcgrr::update_report(pcg_report, pcg_report_rainfall,
+                               a_elem = "rainfall")
+
+      }else{
+        ## Generate report data for SNVs/InDels
+        pcg_report_snv_indel <-
+          pcgrr::generate_report_data_snv_indel(
+            pcg_report,
+            callset = callset_snv,
+            tier_model = "pcgr_acmg")
+
+        ## Update genome report
+        pcg_report <- pcgrr::update_report(
+          pcg_report, pcg_report_snv_indel,
+          a_elem = "snv_indel")
+      }
+
+      ## Estimate contribution of mutational signatures
+      if (conf_somatic_snv[["mutational_signatures"]][["run"]] == T) {
+
+        if (NROW(callset_snv$variant) > 0) {
+          pcg_report_signatures <-
+            pcgrr::generate_report_data_signatures_mp(
+              callset_snv = callset_snv,
+              settings = pcg_report$settings,
+              ref_data = pcg_report$ref_data)
+
+          ## Update genome report with signature info
+          pcg_report <- pcgrr::update_report(
+            pcg_report,
+            pcg_report_signatures,
+            a_elem = "m_signature_mp")
+        }
+
+        ## Generate report data for rainfall plot
+        pcg_report_rainfall <-
+          pcgrr::generate_report_data_rainfall(
+            variant_set = callset_snv$variant,
+            build = pcg_report$settings$genome_assembly)
+
+        ## Update genome report
+        pcg_report <-
+          pcgrr::update_report(pcg_report,
+                               pcg_report_rainfall,
+                               a_elem = "rainfall")
+
+        ## Generate report data for kataegis events (for WES/WGS runs)
+        if (stringr::str_detect(
+          assay_properties[["type"]],
+          "WGS|WES")) {
+          pcg_report_kataegis <-
+            pcgrr::generate_report_data_kataegis(
+              variant_set = callset_snv$variant,
+              sample_name = settings$sample_id,
+              build = settings$genome_assembly)
+          ## Update genome report
+          pcg_report <- pcgrr::update_report(
+            pcg_report,
+            pcg_report_kataegis,
+            a_elem = "kataegis")
+        }
+      }
+
+      ## If assay is Tumor-Control and WES/WGS - perform MSI prediction
+      if (as.logical(settings$conf$somatic_snv$msi$run) == T &
+          stringr::str_detect(assay_properties[["type"]], "WGS|WES") &
+          as.logical(assay_properties[["vcf_tumor_only"]]) == FALSE) {
+        pcg_report_msi <-
+          pcgrr::generate_report_data_msi(
+            variant_set = callset_snv$variant,
+            ref_data = ref_data,
+            settings = settings)
+
+        ## Update genome report with MSI info
+        pcg_report <-
+          pcgrr::update_report(
+            pcg_report,
+            pcg_report_msi,
+            a_elem = "msi")
+      }
+
+      ## Generate report contents for analysis of mutational burden (TMB)
+      if (settings$conf$somatic_snv$tmb$run == T) {
+        pcg_report_tmb <-
+          pcgrr::generate_report_data_tmb(
+            pcg_report[["content"]][["snv_indel"]][["variant_set"]][["tsv"]],
+            pcgr_data, sample_name, config)
+
+        ## Update genome report with TMB info
+        pcg_report <- pcgrr::update_report(
+          pcg_report,
+          pcg_report_tmb,
+          a_elem = "tmb")
+      }
+    }else{
+      pcg_report[["content"]][["snv_indel"]][["zero"]] <- TRUE
+      pcg_report[["metadata"]][["config"]][["other"]][["list_noncoding"]] <- FALSE
+    }
+
+    # if (!is.null(cpsr_report_fname)) {
+    #   pcg_report[["content"]][["cpsr"]][['eval']] <- TRUE
+    #
+    #   pcg_report[['content']][['cpsr']][['report']] <-
+    #     jsonlite::fromJSON(
+    #       gzfile(cpsr_report_fname)
+    #     )
+    #
+    #   ## append report elements in pcg_report[['content']][['cpsr]][['cpsr_json']]
+    # }
+
+    if (!is.null(cna_segments_tsv)) {
+      pcg_report_cna <-
+        pcgrr::generate_report_data_cna(
+          cna_segments_tsv,
+          pcgr_data,
+          sample_name,
+          config,
+          oncotree = pcg_report[["metadata"]][["phenotype"]][["oncotree_query"]],
+          transcript_overlap_pct = config[["cna"]][["cna_overlap_pct"]])
+      pcg_report <-
+        pcgrr::update_report(pcg_report,
+                             pcg_report_cna,
+                             a_elem = "cna")
+
+    }
+
+    pcg_report_value_box <- pcgrr::generate_report_data_value_box(
+      pcg_report, pcgr_data, sample_name, config)
+    pcg_report <- pcgrr::update_report(
+      pcg_report, pcg_report_value_box,
+      a_elem = "value_box")
+
+    for (elem in c("tier1", "tier2", "tier3", "tier4")) {
+      stat <- paste0("n_", elem)
+      pcg_report[["content"]][["snv_indel"]][["v_stat"]][[stat]] <-
+        nrow(pcg_report[["content"]][["snv_indel"]][["variant_set"]][[elem]])
+      pcg_report[["content"]][["snv_indel"]][["variant_set"]][[elem]] <- NULL
+    }
+    pcg_report[["content"]][["snv_indel"]][["variant_set"]][["noncoding"]] <- NULL
+    pcg_report[["content"]][["snv_indel"]][["variant_set"]][["coding"]] <- NULL
+    pcg_report[["content"]][["snv_indel"]][["variant_set"]][["all"]] <- NULL
+    if (!is.null(pcg_report[["content"]][["tumor_only"]])) {
+      pcg_report[["content"]][["snv_indel"]][["variant_set"]][["tsv_unfiltered"]] <-
+        pcg_report[["content"]][["tumor_only"]][["variant_set"]][["tsv_unfiltered"]]
+      pcg_report[["content"]][["tumor_only"]][["variant_set"]][["tsv_unfiltered"]] <- NULL
+      pcg_report[["content"]][["tumor_only"]][["variant_set"]][["filtered"]] <- NULL
+    }
+    pcg_report[["content"]][["snv_indel"]][["variant_set"]][["all"]] <- NULL
+    pcg_report[["content"]][["cna"]][["variant_set"]][["cna_print"]] <- NULL
+    pcg_report[["metadata"]][["phenotype"]] <- list()
+    gc()
+
+    # if (!is.null(cna_plot) && cna_plot != "None") {
+    #   pcg_report[["content"]][["cna_plot"]][["png"]] <- cna_plot
+    #   pcg_report[["content"]][["cna_plot"]][["eval"]] <- TRUE
+    # }
+    return(pcg_report)
+  }
+
+
+#' Function that generates tiered variant sets for SNVs/InDels
+#'
+#' @param pcg_report PCGR report object
+#' @param callset Object with input calls (CNA, SNV/InDel)
+#' @param tier_model tier model (pcgr_acmg)
+#'
+#' @return pcg_report_data data frame with all report elements
+#'
+#' @export
+generate_report_data_snv_indel2 <- function(
+    pcg_report = NULL,
+    callset = NULL,
+    tier_model = "pcgr_acmg") {
+
+  pcgrr::log4r_info("------")
+  pcgrr::log4r_info(
+    paste0("Generating data for tiered cancer genome report - ",
+           " tier model '", tier_model, "'"))
+
+  pcg_report_snv_indel <- pcg_report[['content']][['snv_indel']]
+  pcg_report_snv_indel[["eval"]] <- TRUE
+  pcg_report_snv_indel[["variant_set"]][["all"]] <- callset[['variant']]
+
+  ## Get basic variant statistics (type, coding status)
+  call_stats <- pcgrr::variant_stats_report(
+    callset[['variant']],
+    name = "v_stat")
+  for (stat in c("n", "n_snv", "n_indel", "n_coding", "n_noncoding")) {
+    pcg_report_snv_indel[["v_stat"]][[stat]] <-
+      call_stats[["v_stat"]][[stat]]
+  }
+  pcgrr::log4r_info(
+    paste0("Number of protein-coding variants: ",
+           pcg_report_snv_indel[["v_stat"]][["n_coding"]]))
+
+  # if (pcg_report_snv_indel[["v_stat"]][["n"]] > 0) {
+  #
+  #   tumor_type <-
+  #     pcg_report[["settings"]][["conf"]][["sample_properties"]][["site"]]
+  #
+  #   ## Assign putative TIER 2 variant set
+  #   for(etype in c('predictive','prognostic','diagnostic')) {
+  #     pcg_report_snv_indel[["clin_eitem"]][["any_ttype"]][[etype]] <-
+  #       callset[['biomarker_evidence']][[etype]]
+  #   }
+  #
+  #   pcg_report_snv_indel[["variant_set"]][["tier2"]] <-
+  #     callset[["biomarker_evidence"]][["all"]][["any"]] |>
+  #     dplyr::filter(EVIDENCE_TYPE == "Prognostic" |
+  #                     EVIDENCE_TYPE == "Predictive" |
+  #                     EVIDENCE_TYPE == "Diagnostic")
+  #
+  #   if (NROW(pcg_report_snv_indel[["variant_set"]][["tier2"]]) > 0) {
+  #     pcg_report_snv_indel[["variant_set"]][["tier2"]] <-
+  #       pcg_report_snv_indel[["variant_set"]][["tier2"]] |>
+  #       dplyr::select("VAR_ID") |>
+  #       dplyr::distinct() |>
+  #       dplyr::inner_join(
+  #         pcg_report_snv_indel[["variant_set"]][["all"]],
+  #         by = "VAR_ID")
+  #   }
+  #
+  #   ## Get all clinical evidence items that
+  #   ## overlap query set (if tumor type is specified)
+  #   if (tumor_type != "Cancer, NOS") {
+  #
+  #     ## Assign putative TIER 1 variant set
+  #     for(etype in c('predictive','prognostic','diagnostic')) {
+  #       for(elevel in c('any','A_B','C_D_E')) {
+  #         if (NROW(callset[['biomarker_evidence']][[etype]][[elevel]]) > 0) {
+  #           pcg_report_snv_indel[["clin_eitem"]][["query_ttype"]][[etype]][[elevel]] <-
+  #             callset[['biomarker_evidence']][[etype]][[elevel]] |>
+  #             dplyr::filter(!is.na(PRIMARY_SITE) & PRIMARY_SITE == tumor_type)
+  #
+  #           pcg_report_snv_indel[["clin_eitem"]][["other_ttype"]][[etype]][[elevel]] <-
+  #             callset[['biomarker_evidence']][[etype]][[elevel]] |>
+  #             dplyr::filter(is.na(PRIMARY_SITE) | PRIMARY_SITE != tumor_type)
+  #         }
+  #       }
+  #     }
+  #
+  #     pcg_report_snv_indel[["variant_set"]][["tier1"]] <-
+  #       callset[["biomarker_evidence"]][["all"]][["any"]] |>
+  #       dplyr::filter(PRIMARY_SITE == tumor_type &
+  #                       stringr::str_detect(
+  #                         EVIDENCE_LEVEL,"^(A|B)") &
+  #                       (EVIDENCE_TYPE == "Prognostic" |
+  #                          EVIDENCE_TYPE == "Predictive" |
+  #                          EVIDENCE_TYPE == "Diagnostic"))
+  #
+  #     if (NROW(pcg_report_snv_indel[["variant_set"]][["tier1"]]) > 0) {
+  #       pcg_report_snv_indel[["variant_set"]][["tier1"]] <-
+  #         pcg_report_snv_indel[["variant_set"]][["tier1"]] |>
+  #         dplyr::select("VAR_ID") |>
+  #         dplyr::distinct() |>
+  #         dplyr::inner_join(
+  #           pcg_report_snv_indel[["variant_set"]][["all"]],
+  #           by = "VAR_ID")
+  #
+  #       if (NROW(pcg_report_snv_indel[["variant_set"]][["tier1"]]) > 0) {
+  #         pcg_report_snv_indel[["variant_set"]][["tier2"]] <-
+  #           pcg_report_snv_indel[["variant_set"]][["tier2"]] |>
+  #           dplyr::anti_join(
+  #             dplyr::select(pcg_report_snv_indel[["variant_set"]][["tier1"]],
+  #                           VAR_ID),
+  #             by = "VAR_ID"
+  #           )
+  #       }
+  #     }
+  #   }
+  #
+  #   ## Remove potential overlap/redundancies and assign final
+  #   ## TIER1/TIER2 classification
+  #   #pcg_report_snv_indel <- pcgrr::assign_tier1_tier2_acmg(pcg_report_snv_indel)
+  #   tier12 <- dplyr::bind_rows(
+  #     data.frame(
+  #       'VAR_ID' = unique(
+  #         pcg_report_snv_indel[["variant_set"]][["tier1"]]$VAR_ID)),
+  #     data.frame(
+  #       'VAR_ID' = unique(
+  #         pcg_report_snv_indel[["variant_set"]][["tier2"]]$VAR_ID)))
+  #
+  #   ## Determine TIER 3 variant set: coding mutations in
+  #   ## oncogenes/tumor suppressors/cancer census genes
+  #   pcg_report_snv_indel[["variant_set"]][["tier3"]] <-
+  #     pcg_report_snv_indel[["variant_set"]][["all"]] |>
+  #     dplyr::filter(.data$CODING_STATUS == "coding") |>
+  #     dplyr::filter(
+  #       (!is.na(.data$ONCOGENE) & .data$ONCOGENE == TRUE) |
+  #         (!is.na(.data$TUMOR_SUPPRESSOR) & .data$TUMOR_SUPPRESSOR == TRUE))
+  #
+  #   if (NROW(tier12) > 0 &
+  #       NROW(pcg_report_snv_indel[["variant_set"]][["tier3"]]) > 0) {
+  #     pcg_report_snv_indel[["variant_set"]][["tier3"]] <-
+  #       dplyr::anti_join(pcg_report_snv_indel[["variant_set"]][["tier3"]],
+  #                        tier12, by = c("VAR_ID"))
+  #   }
+  #   tier123 <- tier12
+  #   if (nrow(pcg_report_snv_indel[["variant_set"]][["tier3"]]) > 0) {
+  #     pcg_report_snv_indel[["variant_set"]][["tier3"]] <-
+  #       pcg_report_snv_indel[["variant_set"]][["tier3"]] |>
+  #       dplyr::arrange(dplyr::desc(.data$ONCOGENICITY_SCORE),
+  #                      dplyr::desc(.data$TISSUE_ASSOC_RANK),
+  #                      dplyr::desc(.data$GLOBAL_ASSOC_RANK))
+  #     tier123 <- tier12 |>
+  #       dplyr::bind_rows(
+  #         dplyr::select(pcg_report_snv_indel[["variant_set"]][["tier3"]],
+  #                       "VAR_ID")) |>
+  #       dplyr::distinct()
+  #     pcg_report_snv_indel[["disp"]][["tier3"]][["proto_oncogene"]] <-
+  #       dplyr::select(
+  #         pcg_report_snv_indel[["variant_set"]][["tier3"]],
+  #         dplyr::any_of(annotation_tags[["tier3_display"]])) |>
+  #       dplyr::filter(.data$ONCOGENE == TRUE &
+  #                       (is.na(.data$TUMOR_SUPPRESSOR) |
+  #                          .data$TUMOR_SUPPRESSOR == FALSE))
+  #     pcg_report_snv_indel[["disp"]][["tier3"]][["tumor_suppressor"]] <-
+  #       dplyr::select(
+  #         pcg_report_snv_indel[["variant_set"]][["tier3"]],
+  #         dplyr::any_of(annotation_tags[["tier3_display"]])) |>
+  #       dplyr::filter(!is.na(.data$TUMOR_SUPPRESSOR) &
+  #                       .data$TUMOR_SUPPRESSOR == TRUE)
+  #   }
+  #
+  #   ## Determine TIER 4: Other coding mutations
+  #   pcg_report_snv_indel[["variant_set"]][["tier4"]] <-
+  #     dplyr::select(pcg_report_snv_indel[["variant_set"]][["all"]],
+  #                   dplyr::any_of(annotation_tags[["all"]])) |>
+  #     dplyr::filter(.data$CODING_STATUS == "coding")
+  #   if (NROW(tier123) > 0 &
+  #       NROW(pcg_report_snv_indel[["variant_set"]][["tier4"]]) > 0) {
+  #     pcg_report_snv_indel[["variant_set"]][["tier4"]] <-
+  #       dplyr::anti_join(pcg_report_snv_indel[["variant_set"]][["tier4"]],
+  #                        tier123, by = c("GENOMIC_CHANGE"))
+  #   }
+  #   if (nrow(pcg_report_snv_indel[["variant_set"]][["tier4"]]) > 0) {
+  #     pcg_report_snv_indel[["variant_set"]][["tier4"]] <-
+  #       pcg_report_snv_indel[["variant_set"]][["tier4"]] |>
+  #       dplyr::arrange(dplyr::desc(.data$TISSUE_ASSOC_RANK),
+  #                      dplyr::desc(.data$GLOBAL_ASSOC_RANK))
+  #     pcg_report_snv_indel[["disp"]][["tier4"]] <-
+  #       dplyr::select(
+  #         pcg_report_snv_indel[["variant_set"]][["tier4"]],
+  #         dplyr::any_of(annotation_tags[["tier4_display"]]))
+  #   }
+  #
+  #   ## Determine non-coding mutation set
+  #   pcg_report_snv_indel[["variant_set"]][["noncoding"]] <-
+  #     dplyr::select(pcg_report_snv_indel[["variant_set"]][["all"]],
+  #                   dplyr::any_of(annotation_tags[["all"]])) |>
+  #     dplyr::filter(.data$CODING_STATUS == "noncoding")
+  #   if (nrow(pcg_report_snv_indel[["variant_set"]][["noncoding"]]) > 0) {
+  #     if (nrow(tier123) > 0) {
+  #       pcg_report_snv_indel[["variant_set"]][["noncoding"]] <-
+  #         dplyr::anti_join(pcg_report_snv_indel[["variant_set"]][["noncoding"]],
+  #                          tier123,
+  #                          by = c("VAR_ID"))
+  #     }
+  #     pcg_report_snv_indel[["variant_set"]][["noncoding"]] <-
+  #       pcg_report_snv_indel[["variant_set"]][["noncoding"]] |>
+  #       dplyr::arrange(dplyr::desc(.data$OPENTARGETS_RANK))
+  #     pcg_report_snv_indel[["disp"]][["noncoding"]] <-
+  #       dplyr::select(
+  #         pcg_report_snv_indel[["variant_set"]][["noncoding"]],
+  #         dplyr::any_of(annotation_tags[["tier5_display"]]))
+  #   }
+
+    ## Make TSV content with variant set
+    # pcg_report_snv_indel[["v_stat"]][["n_noncoding"]] <-
+    #   pcg_report_snv_indel[["variant_set"]][["noncoding"]] |> nrow()
+    # pcg_report_snv_indel[["variant_set"]][["tsv"]] <-
+    #   pcgrr::generate_tier_tsv(
+    #     pcg_report_snv_indel[["variant_set"]],
+    #     config,
+    #     annotation_tags,
+    #     sample_name = sample_name)
+
+ # }
+
+  return(pcg_report_snv_indel)
+
+}
+
+
+#' Function that generates germline-filtered callset and PCGR
+#' report statistics for a given tumor-only callsets
+#'
+#' @param unfiltered_sample_calls variant calls
+#' @param sample_name sample identifier
+#' @param pcgr_config Object with PCGR configuration parameters
+#'
+#' @export
+generate_report_data_tumor_only <-
+  function(unfiltered_sample_calls,
+           sample_name,
+           pcgr_config) {
+
+  sample_calls <- unfiltered_sample_calls
+  gline_filter_stats <- list()
+  for (m in c("remain_post_gnomad",
+              "remain_post_clinvar",
+              "remain_post_dbsnp",
+              "remain_post_pon",
+              "remain_post_nonexonic",
+              "remain_post_hom",
+              "remain_post_het")) {
+    gline_filter_stats[m] <- 0
+  }
+
+  ## initiate report
+  pcg_report_to <-
+    pcgrr::init_tumor_only_content()
+
+  ## assign evidence tags for germline/somatic state of variants,
+  ## partially based on user-defined criteria
+  ## (population allele frequency thresholds)
+  vcalls <-
+    pcgrr::assign_somatic_germline_evidence(sample_calls, pcgr_config)
+
+  ## assign somatic classification based on accumulation
+  ## of evidence tags and user-defined options
+  vcalls <-
+    pcgrr::assign_somatic_classification(vcalls, pcgr_config)
+
+  ## Assign statistics to successive filtering levels for
+  ## different evidence criteria
+  ## excluded germline calls found in gnomAD
+  gline_filter_stats[["remain_post_gnomad"]] <-
+    nrow(vcalls) -
+    nrow(vcalls[vcalls$SOMATIC_CLASSIFICATION == "GERMLINE_GNOMAD", ])
+  pcgrr::log4r_info(paste0("Excluding coinciding germline variants in ",
+                           "gnomAD populations"))
+  pcgrr::log4r_info(paste0("Total sample calls remaining: ",
+                           gline_filter_stats[["remain_post_gnomad"]]))
+
+  ## excluded germline calls found in ClinVar
+  gline_filter_stats[["remain_post_clinvar"]] <-
+    gline_filter_stats[["remain_post_gnomad"]] -
+    nrow(vcalls[vcalls$SOMATIC_CLASSIFICATION == "GERMLINE_CLINVAR", ])
+  pcgrr::log4r_info(paste0("Excluding coinciding germline variants in ClinVar"))
+  pcgrr::log4r_info(paste0("Total sample calls remaining: ",
+                           gline_filter_stats[["remain_post_clinvar"]]))
+
+
+  ## excluded germline calls found in panel of normals (if provided)
+  gline_filter_stats[["remain_post_pon"]] <-
+    gline_filter_stats[["remain_post_clinvar"]]
+  if (pcgr_config[["tumor_only"]][["exclude_pon"]] == TRUE) {
+    gline_filter_stats[["remain_post_pon"]] <-
+      gline_filter_stats[["remain_post_pon"]] -
+      nrow(vcalls[vcalls$SOMATIC_CLASSIFICATION == "GERMLINE_PON", ])
+    pcgrr::log4r_info(
+      paste0("Excluding putative germline variants found in calls ",
+      "from panel-of-normals (PON)"))
+    pcgrr::log4r_info(
+      paste0("Total sample calls remaining: ",
+             gline_filter_stats[["remain_post_pon"]]))
+  }
+
+  ## excluded germline calls found with 100% allelic fraction
+  ## (likely homozygous germline variants)
+  gline_filter_stats[["remain_post_hom"]] <-
+    gline_filter_stats[["remain_post_pon"]]
+  if (pcgr_config[["tumor_only"]][["exclude_likely_hom_germline"]] == TRUE) {
+    gline_filter_stats[["remain_post_hom"]] <-
+      gline_filter_stats[["remain_post_hom"]] -
+      nrow(vcalls[vcalls$SOMATIC_CLASSIFICATION == "GERMLINE_HOMOZYGOUS", ])
+    pcgrr::log4r_info(
+      paste0("Excluding likely homozygous germline variants found ",
+             "as variants with 100% allelic fraction"))
+    pcgrr::log4r_info(paste0("Total sample calls remaining: ",
+                             gline_filter_stats[["remain_post_hom"]]))
+  }
+
+  ## excluded germline calls found as likely heterozygous germline variants
+  gline_filter_stats[["remain_post_het"]] <-
+    gline_filter_stats[["remain_post_hom"]]
+  if (pcgr_config[["tumor_only"]][["exclude_likely_het_germline"]] == TRUE) {
+    gline_filter_stats[["remain_post_het"]] <-
+      gline_filter_stats[["remain_post_het"]] -
+      nrow(vcalls[vcalls$SOMATIC_CLASSIFICATION == "GERMLINE_HETEROZYGOUS", ])
+    pcgrr::log4r_info(paste0(
+      "Excluding likely heterozygous germline variants found as variants ",
+      "with 40-60% allelic fraction and recorded in gnomAD + dbSNP"))
+    pcgrr::log4r_info(paste0("Total sample calls remaining: ",
+                             gline_filter_stats[["remain_post_het"]]))
+  }
+
+  ## excluded calls with dbSNP germline status (if set in config)
+  gline_filter_stats[["remain_post_dbsnp"]] <-
+    gline_filter_stats[["remain_post_het"]]
+  if (pcgr_config[["tumor_only"]][["exclude_dbsnp_nonsomatic"]] == TRUE) {
+
+    pcgrr::log4r_info(
+      paste0("Excluding non-somatically associated dbSNP variants ",
+             "(dbSNP - not recorded as somatic in DoCM/ClinVar",
+             "and not registered in COSMIC or found in TCGA"))
+
+    gline_filter_stats[["remain_post_dbsnp"]] <-
+      gline_filter_stats[["remain_post_dbsnp"]] -
+      nrow(vcalls[vcalls$SOMATIC_CLASSIFICATION == "GERMLINE_DBSNP", ])
+    pcgrr::log4r_info(paste0("Total sample calls remaining: ",
+                             gline_filter_stats[["remain_post_dbsnp"]]))
+  }
+
+  unfiltered_sample_calls <- vcalls
+  vcalls <- vcalls |>
+    dplyr::filter(.data$SOMATIC_CLASSIFICATION == "SOMATIC")
+
+  gline_filter_stats[["remain_post_nonexonic"]] <-
+    gline_filter_stats[["remain_post_dbsnp"]]
+  if (pcgr_config[["tumor_only"]][["exclude_nonexonic"]] == TRUE) {
+    pcgrr::log4r_info(paste0("Excluding non-exonic variants"))
+    vcalls <- dplyr::filter(vcalls, .data$EXONIC_STATUS == "exonic")
+    pcgrr::log4r_info(paste0("Total sample calls remaining: ",
+                             nrow(vcalls)))
+    gline_filter_stats[["remain_post_nonexonic"]] <- nrow(vcalls)
+  }
+
+  pcg_report_to[["eval"]] <- TRUE
+  pcg_report_to[["variant_set"]][["tsv_unfiltered"]] <- unfiltered_sample_calls |>
+    dplyr::select(.data$GENOMIC_CHANGE,
+                  .data$VAR_ID,
+                  .data$DP_TUMOR,
+                  .data$AF_TUMOR,
+                  .data$SYMBOL,
+                  .data$EXONIC_STATUS,
+                  .data$CONSEQUENCE,
+                  .data$STATUS_PON,
+                  .data$STATUS_LIKELY_GERMLINE_HOMOZYGOUS,
+                  .data$STATUS_LIKELY_GERMLINE_HETEROZYGOUS,
+                  .data$STATUS_DBSNP_GERMLINE,
+                  .data$STATUS_POPFREQ_1KG_ABOVE_TOLERATED,
+                  .data$STATUS_POPFREQ_GNOMAD_ABOVE_TOLERATED,
+                  .data$STATUS_CLINVAR_GERMLINE,
+                  .data$SOMATIC_CLASSIFICATION)
+  pcg_report_to[["variant_set"]][["filtered"]] <- vcalls
+  pcg_report_to[["v_stat"]][["unfiltered_n"]] <-
+    nrow(unfiltered_sample_calls)
+  pcg_report_to[["v_stat"]][["onekg_n_remain"]] <-
+    gline_filter_stats[["remain_post_onekg"]]
+  pcg_report_to[["v_stat"]][["gnomad_n_remain"]] <-
+    gline_filter_stats[["remain_post_gnomad"]]
+  pcg_report_to[["v_stat"]][["clinvar_n_remain"]] <-
+    gline_filter_stats[["remain_post_clinvar"]]
+  pcg_report_to[["v_stat"]][["pon_n_remain"]] <-
+    gline_filter_stats[["remain_post_pon"]]
+  pcg_report_to[["v_stat"]][["hom_n_remain"]] <-
+    gline_filter_stats[["remain_post_hom"]]
+  pcg_report_to[["v_stat"]][["het_n_remain"]] <-
+    gline_filter_stats[["remain_post_het"]]
+  pcg_report_to[["v_stat"]][["dbsnp_n_remain"]] <-
+    gline_filter_stats[["remain_post_dbsnp"]]
+  pcg_report_to[["v_stat"]][["nonexonic_n_remain"]] <-
+    gline_filter_stats[["remain_post_nonexonic"]]
+  for (db_filter in c("onekg", "gnomad", "dbsnp", "pon",
+                      "clinvar", "hom", "het", "nonexonic")) {
+    if (pcg_report_to[["v_stat"]][[paste0(db_filter, "_n_remain")]] > 0 &
+        pcg_report_to[["v_stat"]][["unfiltered_n"]] > 0) {
+      pcg_report_to[["v_stat"]][[paste0(db_filter, "_frac_remain")]] <-
+        round((as.numeric(pcg_report_to[["v_stat"]][[paste0(db_filter,
+                                                            "_n_remain")]]) /
+                 pcg_report_to[["v_stat"]][["unfiltered_n"]]) * 100, digits = 2)
+    }
+  }
+  return(pcg_report_to)
+
+}
+
+#' Function that annotates CNV segment files
+#'
+#' param cna_segments_tsv CNV file name with chromosomal log(2)-ratio segments
+#' param pcgr_data object with PCGR annotation data
+#' param sample_name sample identifier
+#' param pcgr_config Object with PCGR configuration parameters
+#' param oncotree Data frame with phenotype terms relevant for tumor type
+#' param transcript_overlap_pct required aberration overlap fraction
+#' (percent) for reported transcripts (default 100 percent)
+#'
+#' export
+#' generate_report_data_cna <-
+#'   function(cna_segments_tsv,
+#'            pcgr_data,
+#'            sample_name,
+#'            pcgr_config,
+#'            oncotree,
+#'            transcript_overlap_pct = 100) {
+#'
+#'     invisible(
+#'       assertthat::assert_that(
+#'         file.exists(cna_segments_tsv),
+#'         msg = paste0("File 'cna_segments_tsv' (",
+#'                      cna_segments_tsv, ") does not exist")))
+#'     pcg_report_cna <- pcgrr::init_report(config = pcgr_config,
+#'                                          class = "cna")
+#'     log_r_homdel <- pcgr_config[["cna"]][["log_r_homdel"]]
+#'     log_r_gain <- pcgr_config[["cna"]][["log_r_gain"]]
+#'     tumor_type <- pcgr_config[["t_props"]][["tumor_type"]]
+#'     MEGABASE <- 1000000
+#'
+#'     pcgrr::log4r_info("------")
+#'     pcgrr::log4r_info(paste0("Generating report data for copy number segment file ",
+#'                       cna_segments_tsv))
+#'
+#'     ## READ INPUT FILE, VALIDATE INPUT CHROMOSOMES AND SEGMENTS, ADD CYTOBAND INFO
+#'     cna_df <- utils::read.table(file = cna_segments_tsv, header = T,
+#'                          stringsAsFactors = F, sep = "\t",
+#'                          comment.char = "", quote = "") |>
+#'       dplyr::rename(chromosome = Chromosome,
+#'                     LogR = Segment_Mean,
+#'                     segment_start = Start,
+#'                     segment_end = End) |>
+#'       dplyr::distinct() |>
+#'       dplyr::select(
+#'         c("chromosome","LogR",
+#'           "segment_start","segment_end")) |>
+#'       dplyr::mutate(
+#'         chromosome = stringr::str_replace(
+#'           .data$chromosome, "^chr", "")) |>
+#'       pcgrr::get_valid_chromosomes(
+#'         chromosome_column = "chromosome",
+#'         bsg = pcgr_data[["assembly"]][["bsg"]]) |>
+#'       pcgrr::get_valid_chromosome_segments(
+#'         genome_assembly = pcgr_data[["assembly"]][["grch_name"]],
+#'         bsg = pcgr_data[["assembly"]][["bsg"]]) |>
+#'       dplyr::filter(!is.na(.data$LogR)) |>
+#'       dplyr::mutate(LogR = round(as.numeric(.data$LogR), digits = 3)) |>
+#'       dplyr::mutate(SEGMENT_ID = paste0(.data$chromosome, ":",
+#'                                         .data$segment_start, "-",
+#'                                         .data$segment_end)) |>
+#'       pcgrr::get_cna_cytoband(pcgr_data = pcgr_data) |>
+#'       dplyr::mutate(SAMPLE_ID = sample_name) |>
+#'       pcgrr::append_ucsc_segment_link(
+#'         hgname = pcgr_data[["assembly"]][["hg_name"]],
+#'         chrom = "chromosome",
+#'         start = "segment_start",
+#'         end = "segment_end") |>
+#'       dplyr::mutate(
+#'         SEGMENT_LENGTH_MB =
+#'           round((as.numeric((.data$segment_end - .data$segment_start) /
+#'                               MEGABASE)),
+#'                 digits = 5)) |>
+#'       dplyr::rename(SEGMENT = .data$SEGMENT_LINK, LOG_R = .data$LogR)
+#'
+#'     ## MAKE SIMPLE SEGMENTS DATA FRAME FOR FILTERING IN REPORT
+#'     cna_segments <- cna_df |>
+#'       dplyr::select(.data$SEGMENT,
+#'                     .data$SEGMENT_LENGTH_MB,
+#'                     .data$CYTOBAND,
+#'                     .data$LOG_R,
+#'                     .data$EVENT_TYPE) |>
+#'       dplyr::distinct()
+#'
+#'     #### FIND AND APPEND GENCODE TRANSCRIPTS THAT OVERLAP
+#'     cna_transcript_df <-
+#'       pcgrr::get_cna_overlapping_transcripts(
+#'          cna_df, pcgr_data = pcgr_data)
+#'     #get_cna_overlapping_transcripts(
+#'     #  cna_df, pcgr_data = pcgr_data)
+#'
+#'     #### GENERATE DATAFRAME OF UNIQUE TRANSCRIPT-CNA SEGMENTS FOR OUTPUT TSV
+#'     cna_transcript_df_print <- cna_transcript_df |>
+#'       dplyr::select(.data$chrom,
+#'                     .data$segment_start,
+#'                     .data$segment_end,
+#'                     .data$SEGMENT_ID,
+#'                     .data$SEGMENT_LENGTH_MB,
+#'                     .data$EVENT_TYPE,
+#'                     .data$CYTOBAND,
+#'                     .data$LOG_R,
+#'                     .data$SAMPLE_ID,
+#'                     .data$ensembl_gene_id,
+#'                     .data$symbol,
+#'                     .data$ensembl_transcript_id,
+#'                     .data$transcript_start,
+#'                     .data$transcript_end,
+#'                     .data$transcript_overlap_percent,
+#'                     .data$name,
+#'                     .data$biotype,
+#'                     .data$tumor_suppressor,
+#'                     .data$oncogene,
+#'                     .data$intogen_driver,
+#'                     .data$chembl_compound_id,
+#'                     .data$gencode_tag,
+#'                     .data$gencode_release) |>
+#'       magrittr::set_colnames(tolower(names(.)))
+#'
+#'     avg_transcript_overlap <- as.data.frame(
+#'       cna_transcript_df |>
+#'         dplyr::filter(.data$biotype == "protein_coding") |>
+#'         dplyr::group_by(.data$SEGMENT_ID, .data$symbol) |>
+#'         dplyr::summarise(
+#'           MEAN_TRANSCRIPT_CNA_OVERLAP = mean(
+#'             .data$transcript_overlap_percent),
+#'           TRANSCRIPTS = paste0(.data$ensembl_transcript_id, collapse = ", "),
+#'           .groups = "drop") |>
+#'         dplyr::rename(SYMBOL = .data$symbol) |>
+#'         dplyr::mutate(
+#'           MEAN_TRANSCRIPT_CNA_OVERLAP =
+#'             round(.data$MEAN_TRANSCRIPT_CNA_OVERLAP, digits = 2))
+#'     )
+#'
+#'     cna_transcript_df <-
+#'       dplyr::select(cna_transcript_df, -.data$ensembl_transcript_id) |>
+#'       dplyr::filter(.data$biotype == "protein_coding") |>
+#'       dplyr::distinct() |>
+#'       dplyr::mutate(VAR_ID = as.character(rep(1:nrow(.)))) |>
+#'       magrittr::set_colnames(toupper(names(.))) |>
+#'       pcgrr::append_otargets_pheno_link(
+#'         pcgr_data = pcgr_data,
+#'         oncotree = oncotree) |>
+#'       dplyr::rename(OPENTARGETS_ASSOCIATIONS =
+#'                       .data$OT_DISEASE_LINK) |>
+#'       dplyr::select(.data$VAR_ID,
+#'                     .data$SEGMENT_ID,
+#'                     .data$SYMBOL,
+#'                     .data$ONCOGENE,
+#'                     .data$ONCOGENE_EVIDENCE,
+#'                     .data$TUMOR_SUPPRESSOR,
+#'                     .data$TUMOR_SUPPRESSOR_EVIDENCE,
+#'                     .data$CANCERGENE_SUPPORT,
+#'                     .data$OPENTARGETS_ASSOCIATIONS,
+#'                     .data$OPENTARGETS_RANK,
+#'                     .data$ENTREZ_ID,
+#'                     .data$CHROM,
+#'                     .data$NAME,
+#'                     .data$EVENT_TYPE,
+#'                     .data$SEGMENT_LENGTH_MB,
+#'                     .data$SEGMENT,
+#'                     .data$TRANSCRIPT_OVERLAP_PERCENT,
+#'                     .data$LOG_R) |>
+#'       dplyr::mutate(ENTREZ_ID = as.character(.data$ENTREZ_ID)) |>
+#'       dplyr::rename(GENENAME = .data$NAME,
+#'                     TRANSCRIPT_OVERLAP = .data$TRANSCRIPT_OVERLAP_PERCENT,
+#'                     CHROMOSOME = .data$CHROM) |>
+#'       dplyr::left_join(pcgr_data[["kegg"]][["pathway_links"]],
+#'                        by = c("ENTREZ_ID" = "gene_id")) |>
+#'       dplyr::rename(KEGG_PATHWAY = .data$kegg_pathway_urls)
+#'
+#'     ## Get gene annotation links
+#'     entrezgene_annotation_links <-
+#'       pcgrr::generate_annotation_link(
+#'         cna_transcript_df,
+#'         vardb = "GENE_NAME",
+#'         group_by_var = "VAR_ID",
+#'         link_key_var = "ENTREZ_ID",
+#'         link_display_var = "GENENAME",
+#'         url_prefix = "http://www.ncbi.nlm.nih.gov/gene/")
+#'
+#'     cna_transcript_df <- cna_transcript_df |>
+#'       dplyr::left_join(
+#'         dplyr::rename(entrezgene_annotation_links,
+#'                       GENE_NAME = .data$link),
+#'         by = c("VAR_ID")) |>
+#'       dplyr::select(.data$SEGMENT_ID,
+#'                     .data$CHROMOSOME,
+#'                     .data$SYMBOL,
+#'                     .data$GENE_NAME,
+#'                     .data$KEGG_PATHWAY,
+#'                     .data$TUMOR_SUPPRESSOR,
+#'                     .data$TUMOR_SUPPRESSOR_EVIDENCE,
+#'                     .data$ONCOGENE,
+#'                     .data$ONCOGENE_EVIDENCE,
+#'                     .data$CANCERGENE_SUPPORT,
+#'                     .data$OPENTARGETS_ASSOCIATIONS,
+#'                     .data$OPENTARGETS_RANK,
+#'                     .data$SEGMENT_LENGTH_MB,
+#'                     .data$SEGMENT,
+#'                     .data$EVENT_TYPE,
+#'                     .data$LOG_R) |>
+#'       dplyr::distinct() |>
+#'       dplyr::left_join(avg_transcript_overlap,
+#'                        by = c("SEGMENT_ID", "SYMBOL"))
+#'
+#'
+#'     n_cna_loss <-
+#'       dplyr::filter(cna_segments, .data$LOG_R <= log_r_homdel) |>
+#'       nrow()
+#'     n_cna_gain <-
+#'       dplyr::filter(cna_segments, .data$LOG_R >= log_r_gain) |>
+#'       nrow()
+#'     cna_segments_filtered <- cna_segments |>
+#'       dplyr::filter(.data$LOG_R >= log_r_gain | .data$LOG_R <= log_r_homdel) |>
+#'       dplyr::arrange(dplyr::desc(.data$LOG_R))
+#'     pcgrr::log4r_info(
+#'       paste0("Detected ", nrow(cna_segments_filtered),
+#'              " segments subject to amplification/deletion (",
+#'              n_cna_loss, " deletions, ", n_cna_gain,
+#'              " gains according to user-defined log(2) ratio thresholds)"))
+#'
+#'
+#'     ## Get aberration sets related to tumor suppressor genes
+#'     ## /oncogenes/drug targets
+#'     onco_ts_sets <-
+#'       get_oncogene_tsgene_target_sets(
+#'         cna_transcript_df,
+#'         transcript_overlap_pct = transcript_overlap_pct,
+#'         log_r_homdel = log_r_homdel,
+#'         log_r_gain = log_r_gain,
+#'         tumor_type = tumor_type,
+#'         pcgr_data = pcgr_data)
+#'
+#'     ## load all clinical evidence items ()
+#'     eitems_any_tt <- pcgrr::load_eitems(
+#'       eitems_raw = pcgr_data$biomarkers,
+#'       alteration_types = "CNA",
+#'       ontology =
+#'         pcgr_data$phenotype$oncotree,
+#'       origin = "Somatic",
+#'       tumor_type_specificity = "any")
+#'
+#'
+#'
+#'     ## Get all clinical evidence items that are related to
+#'     ## tumor suppressor genes/oncogenes/drug targets (NOT tumor-type specific)
+#'     biomarker_hits_cna_any <-
+#'       pcgrr::get_clin_assocs_cna(
+#'         onco_ts_sets,
+#'         annotation_tags = pcgr_data$annotation_tags,
+#'         eitems = eitems_any_tt)
+#'
+#'     pcg_report_cna[["clin_eitem"]][["any_ttype"]] <-
+#'       biomarker_hits_cna_any[["clin_eitem"]]
+#'     pcg_report_cna[["variant_set"]][["tier2"]] <-
+#'       biomarker_hits_cna_any$variant_set
+#'
+#'     ## Get all clinical evidence items that
+#'     ## overlap query set (if tumor type is specified)
+#'     if (tumor_type != "Cancer, NOS") {
+#'
+#'       ## load tumor-type specific evidence items ()
+#'       eitems_specific_tt <- pcgrr::load_eitems(
+#'         eitems_raw = pcgr_data$biomarkers,
+#'         alteration_types = "CNA",
+#'         ontology =
+#'           pcgr_data$phenotype$oncotree,
+#'         origin = "Somatic",
+#'         tumor_type_specificity = "specific",
+#'         tumor_type = tumor_type)
+#'
+#'       biomarker_hits_cna_specific <-
+#'         pcgrr::get_clin_assocs_cna(
+#'           onco_ts_sets,
+#'           annotation_tags = pcgr_data$annotation_tags,
+#'           eitems = eitems_specific_tt)
+#'
+#'       ## Assign putative TIER 1 variant set
+#'       pcg_report_cna[["clin_eitem"]][["query_ttype"]] <-
+#'         biomarker_hits_cna_specific$clin_eitem
+#'       pcg_report_cna[["variant_set"]][["tier1"]] <-
+#'         biomarker_hits_cna_specific$variant_set
+#'     }
+#'
+#'     pcg_report_cna[["eval"]] <- T
+#'     pcg_report_cna[["variant_set"]][["tsv"]] <-
+#'       cna_transcript_df_print
+#'     pcg_report_cna[["v_stat"]][["n_cna_gain"]] <-
+#'       n_cna_gain
+#'     pcg_report_cna[["v_stat"]][["n_cna_loss"]] <-
+#'       n_cna_loss
+#'     pcg_report_cna[["disp"]][["segment"]] <-
+#'       cna_segments_filtered
+#'     pcg_report_cna[["disp"]][["oncogene_gain"]] <-
+#'       onco_ts_sets[["oncogene_gain"]]
+#'     pcg_report_cna[["disp"]][["tsgene_loss"]] <-
+#'       onco_ts_sets[["tsgene_loss"]]
+#'     pcg_report_cna[["disp"]][["other_target"]] <-
+#'       onco_ts_sets[["other_target"]]
+#'
+#'
+#'     pcg_report_cna <-
+#'       pcgrr::assign_tier1_tier2_acmg_cna(pcg_report_cna)
+#'
+#'     return(pcg_report_cna)
+#'   }
+#'
+
+#' Function that annotates CNV segment files
+#'
+#' param yaml_fname PCGR yaml file
+#' param ref_data PCGR/CPSR reference data object
+#'
+#' export
+# generate_report_data_cna2 <-
+#   function(yaml_fname,
+#            ref_data) {
+#
+#     ## 1. Validate CNA segments tsv
+#     ## - check file exists
+#     ## - check columns necessary
+#     ## - check types
+#
+#     invisible(
+#       assertthat::assert_that(
+#         file.exists(cna_segments_tsv),
+#         msg = paste0("File 'cna_segments_tsv' (",
+#                      cna_segments_tsv, ") does not exist")))
+#     pcg_report_cna <- pcgrr::init_report(
+#       yaml_fname, report_mode = "PCGR")
+#     #log_r_homdel <- pcgr_config[["cna"]][["log_r_homdel"]]
+#     #log_r_gain <- pcgr_config[["cna"]][["log_r_gain"]]
+#     tumor_type <- pcgr_config[["t_props"]][["tumor_type"]]
+#     MEGABASE <- 1000000
+#
+#     pcgrr::log4r_info("------")
+#     pcgrr::log4r_info(paste0("Generating report data for copy number segment file ",
+#                              cna_segments_tsv))
+#
+#     # ## READ INPUT FILE, VALIDATE INPUT CHROMOSOMES AND SEGMENTS, ADD CYTOBAND INFO
+#     # cna_df <- utils::read.table(file = cna_segments_tsv, header = T,
+#     #                             stringsAsFactors = F, sep = "\t",
+#     #                             comment.char = "", quote = "") |>
+#     #   dplyr::rename(chromosome = Chromosome,
+#     #                 LogR = Segment_Mean,
+#     #                 segment_start = Start,
+#     #                 segment_end = End) |>
+#     #   dplyr::distinct() |>
+#     #   dplyr::select(
+#     #     c("chromosome","LogR",
+#     #       "segment_start","segment_end")) |>
+#     #   dplyr::mutate(
+#     #     chromosome = stringr::str_replace(
+#     #       .data$chromosome, "^chr", "")) |>
+#     #   pcgrr::get_valid_chromosomes(
+#     #     chromosome_column = "chromosome",
+#     #     bsg = pcgr_data[["assembly"]][["bsg"]]) |>
+#     #   pcgrr::get_valid_chromosome_segments(
+#     #     genome_assembly = pcgr_data[["assembly"]][["grch_name"]],
+#     #     bsg = pcgr_data[["assembly"]][["bsg"]]) |>
+#     #   dplyr::filter(!is.na(.data$LogR)) |>
+#     #   dplyr::mutate(LogR = round(as.numeric(.data$LogR), digits = 3)) |>
+#     #   dplyr::mutate(SEGMENT_ID = paste0(.data$chromosome, ":",
+#     #                                     .data$segment_start, "-",
+#     #                                     .data$segment_end)) |>
+#     #   pcgrr::get_cna_cytoband(pcgr_data = pcgr_data) |>
+#       dplyr::mutate(SAMPLE_ID = sample_name) |>
+#       pcgrr::append_ucsc_segment_link(
+#         hgname = pcgr_data[["assembly"]][["hg_name"]],
+#         chrom = "chromosome",
+#         start = "segment_start",
+#         end = "segment_end") |>
+#       # dplyr::mutate(
+#       #   SEGMENT_LENGTH_MB =
+#       #     round((as.numeric((.data$segment_end - .data$segment_start) /
+#       #                         MEGABASE)),
+#       #           digits = 5)) |>
+#       dplyr::rename(SEGMENT = .data$SEGMENT_LINK, LOG_R = .data$LogR)
+#
+#     ## MAKE SIMPLE SEGMENTS DATA FRAME FOR FILTERING IN REPORT
+#     cna_segments <- cna_df |>
+#       dplyr::select(.data$SEGMENT,
+#                     .data$SEGMENT_LENGTH_MB,
+#                     .data$CYTOBAND,
+#                     .data$LOG_R,
+#                     .data$EVENT_TYPE) |>
+#       dplyr::distinct()
+#
+#     #### FIND AND APPEND GENCODE TRANSCRIPTS THAT OVERLAP
+#     cna_transcript_df <-
+#       pcgrr::get_cna_overlapping_transcripts(
+#         cna_df, pcgr_data = pcgr_data)
+#     #get_cna_overlapping_transcripts(
+#     #  cna_df, pcgr_data = pcgr_data)
+#
+#     #### GENERATE DATAFRAME OF UNIQUE TRANSCRIPT-CNA SEGMENTS FOR OUTPUT TSV
+#     cna_transcript_df_print <- cna_transcript_df |>
+#       dplyr::select(.data$chrom,
+#                     .data$segment_start,
+#                     .data$segment_end,
+#                     .data$SEGMENT_ID,
+#                     .data$SEGMENT_LENGTH_MB,
+#                     .data$EVENT_TYPE,
+#                     .data$CYTOBAND,
+#                     .data$LOG_R,
+#                     .data$SAMPLE_ID,
+#                     .data$ensembl_gene_id,
+#                     .data$symbol,
+#                     .data$ensembl_transcript_id,
+#                     .data$transcript_start,
+#                     .data$transcript_end,
+#                     .data$transcript_overlap_percent,
+#                     .data$name,
+#                     .data$biotype,
+#                     .data$tumor_suppressor,
+#                     .data$oncogene,
+#                     .data$intogen_driver,
+#                     .data$chembl_compound_id,
+#                     .data$gencode_tag,
+#                     .data$gencode_release) |>
+#       magrittr::set_colnames(tolower(names(.)))
+#
+#     avg_transcript_overlap <- as.data.frame(
+#       cna_transcript_df |>
+#         dplyr::filter(.data$biotype == "protein_coding") |>
+#         dplyr::group_by(.data$SEGMENT_ID, .data$symbol) |>
+#         dplyr::summarise(
+#           MEAN_TRANSCRIPT_CNA_OVERLAP = mean(
+#             .data$transcript_overlap_percent),
+#           TRANSCRIPTS = paste0(.data$ensembl_transcript_id, collapse = ", "),
+#           .groups = "drop") |>
+#         dplyr::rename(SYMBOL = .data$symbol) |>
+#         dplyr::mutate(
+#           MEAN_TRANSCRIPT_CNA_OVERLAP =
+#             round(.data$MEAN_TRANSCRIPT_CNA_OVERLAP, digits = 2))
+#     )
+#
+#     cna_transcript_df <-
+#       dplyr::select(cna_transcript_df, -.data$ensembl_transcript_id) |>
+#       dplyr::filter(.data$biotype == "protein_coding") |>
+#       dplyr::distinct() |>
+#       dplyr::mutate(VAR_ID = as.character(rep(1:nrow(.)))) |>
+#       magrittr::set_colnames(toupper(names(.))) |>
+#       pcgrr::append_otargets_pheno_link(
+#         pcgr_data = pcgr_data,
+#         oncotree = oncotree) |>
+#       dplyr::rename(OPENTARGETS_ASSOCIATIONS =
+#                       .data$OT_DISEASE_LINK) |>
+#       dplyr::select(.data$VAR_ID,
+#                     .data$SEGMENT_ID,
+#                     .data$SYMBOL,
+#                     .data$ONCOGENE,
+#                     .data$ONCOGENE_EVIDENCE,
+#                     .data$TUMOR_SUPPRESSOR,
+#                     .data$TUMOR_SUPPRESSOR_EVIDENCE,
+#                     .data$CANCERGENE_SUPPORT,
+#                     .data$OPENTARGETS_ASSOCIATIONS,
+#                     .data$OPENTARGETS_RANK,
+#                     .data$ENTREZ_ID,
+#                     .data$CHROM,
+#                     .data$NAME,
+#                     .data$EVENT_TYPE,
+#                     .data$SEGMENT_LENGTH_MB,
+#                     .data$SEGMENT,
+#                     .data$TRANSCRIPT_OVERLAP_PERCENT,
+#                     .data$LOG_R) |>
+#       dplyr::mutate(ENTREZ_ID = as.character(.data$ENTREZ_ID)) |>
+#       dplyr::rename(GENENAME = .data$NAME,
+#                     TRANSCRIPT_OVERLAP = .data$TRANSCRIPT_OVERLAP_PERCENT,
+#                     CHROMOSOME = .data$CHROM) |>
+#       dplyr::left_join(pcgr_data[["kegg"]][["pathway_links"]],
+#                        by = c("ENTREZ_ID" = "gene_id")) |>
+#       dplyr::rename(KEGG_PATHWAY = .data$kegg_pathway_urls)
+#
+#     ## Get gene annotation links
+#     entrezgene_annotation_links <-
+#       pcgrr::generate_annotation_link(
+#         cna_transcript_df,
+#         vardb = "GENE_NAME",
+#         group_by_var = "VAR_ID",
+#         link_key_var = "ENTREZ_ID",
+#         link_display_var = "GENENAME",
+#         url_prefix = "http://www.ncbi.nlm.nih.gov/gene/")
+#
+#     cna_transcript_df <- cna_transcript_df |>
+#       dplyr::left_join(
+#         dplyr::rename(entrezgene_annotation_links,
+#                       GENE_NAME = .data$link),
+#         by = c("VAR_ID")) |>
+#       dplyr::select(.data$SEGMENT_ID,
+#                     .data$CHROMOSOME,
+#                     .data$SYMBOL,
+#                     .data$GENE_NAME,
+#                     .data$KEGG_PATHWAY,
+#                     .data$TUMOR_SUPPRESSOR,
+#                     .data$TUMOR_SUPPRESSOR_EVIDENCE,
+#                     .data$ONCOGENE,
+#                     .data$ONCOGENE_EVIDENCE,
+#                     .data$CANCERGENE_SUPPORT,
+#                     .data$OPENTARGETS_ASSOCIATIONS,
+#                     .data$OPENTARGETS_RANK,
+#                     .data$SEGMENT_LENGTH_MB,
+#                     .data$SEGMENT,
+#                     .data$EVENT_TYPE,
+#                     .data$LOG_R) |>
+#       dplyr::distinct() |>
+#       dplyr::left_join(avg_transcript_overlap,
+#                        by = c("SEGMENT_ID", "SYMBOL"))
+#
+#
+#     n_cna_loss <-
+#       dplyr::filter(cna_segments, .data$LOG_R <= log_r_homdel) |>
+#       nrow()
+#     n_cna_gain <-
+#       dplyr::filter(cna_segments, .data$LOG_R >= log_r_gain) |>
+#       nrow()
+#     cna_segments_filtered <- cna_segments |>
+#       dplyr::filter(.data$LOG_R >= log_r_gain | .data$LOG_R <= log_r_homdel) |>
+#       dplyr::arrange(dplyr::desc(.data$LOG_R))
+#     pcgrr::log4r_info(
+#       paste0("Detected ", nrow(cna_segments_filtered),
+#              " segments subject to amplification/deletion (",
+#              n_cna_loss, " deletions, ", n_cna_gain,
+#              " gains according to user-defined log(2) ratio thresholds)"))
+#
+#
+#     ## Get aberration sets related to tumor suppressor genes
+#     ## /oncogenes/drug targets
+#     onco_ts_sets <-
+#       get_oncogene_tsgene_target_sets(
+#         cna_transcript_df,
+#         transcript_overlap_pct = transcript_overlap_pct,
+#         log_r_homdel = log_r_homdel,
+#         log_r_gain = log_r_gain,
+#         tumor_type = tumor_type,
+#         pcgr_data = pcgr_data)
+#
+#     ## load all clinical evidence items ()
+#     eitems_any_tt <- pcgrr::load_eitems(
+#       eitems_raw = pcgr_data$biomarkers,
+#       alteration_types = "CNA",
+#       ontology =
+#         pcgr_data$phenotype$oncotree,
+#       origin = "Somatic",
+#       tumor_type_specificity = "any")
+#
+#
+#
+#     ## Get all clinical evidence items that are related to
+#     ## tumor suppressor genes/oncogenes/drug targets (NOT tumor-type specific)
+#     biomarker_hits_cna_any <-
+#       pcgrr::get_clin_assocs_cna(
+#         onco_ts_sets,
+#         annotation_tags = pcgr_data$annotation_tags,
+#         eitems = eitems_any_tt)
+#
+#     pcg_report_cna[["clin_eitem"]][["any_ttype"]] <-
+#       biomarker_hits_cna_any[["clin_eitem"]]
+#     pcg_report_cna[["variant_set"]][["tier2"]] <-
+#       biomarker_hits_cna_any$variant_set
+#
+#     ## Get all clinical evidence items that
+#     ## overlap query set (if tumor type is specified)
+#     if (tumor_type != "Cancer, NOS") {
+#
+#       ## load tumor-type specific evidence items ()
+#       eitems_specific_tt <- pcgrr::load_eitems(
+#         eitems_raw = pcgr_data$biomarkers,
+#         alteration_types = "CNA",
+#         ontology =
+#           pcgr_data$phenotype$oncotree,
+#         origin = "Somatic",
+#         tumor_type_specificity = "specific",
+#         tumor_type = tumor_type)
+#
+#       biomarker_hits_cna_specific <-
+#         pcgrr::get_clin_assocs_cna(
+#           onco_ts_sets,
+#           annotation_tags = pcgr_data$annotation_tags,
+#           eitems = eitems_specific_tt)
+#
+#       ## Assign putative TIER 1 variant set
+#       pcg_report_cna[["clin_eitem"]][["query_ttype"]] <-
+#         biomarker_hits_cna_specific$clin_eitem
+#       pcg_report_cna[["variant_set"]][["tier1"]] <-
+#         biomarker_hits_cna_specific$variant_set
+#     }
+#
+#     pcg_report_cna[["eval"]] <- T
+#     pcg_report_cna[["variant_set"]][["tsv"]] <-
+#       cna_transcript_df_print
+#     pcg_report_cna[["v_stat"]][["n_cna_gain"]] <-
+#       n_cna_gain
+#     pcg_report_cna[["v_stat"]][["n_cna_loss"]] <-
+#       n_cna_loss
+#     pcg_report_cna[["disp"]][["segment"]] <-
+#       cna_segments_filtered
+#     pcg_report_cna[["disp"]][["oncogene_gain"]] <-
+#       onco_ts_sets[["oncogene_gain"]]
+#     pcg_report_cna[["disp"]][["tsgene_loss"]] <-
+#       onco_ts_sets[["tsgene_loss"]]
+#     pcg_report_cna[["disp"]][["other_target"]] <-
+#       onco_ts_sets[["other_target"]]
+#
+#
+#     pcg_report_cna <-
+#       pcgrr::assign_tier1_tier2_acmg_cna(pcg_report_cna)
+#
+#     return(pcg_report_cna)
+#   }
+#
+
+#' Function that generates dense and tiered annotated variant datasets
+#' @param variant_set List with tiered variants
+#' @param config PCGR configuration settings
+#' @param annotation_tags List with display columns
+#' @param sample_name Sample identifier
+#'
+#' @return tsv_variants data frame with tier-annotated list of
+#' variants for tab-separated output
+#'
+#' @export
+generate_tier_tsv <- function(variant_set,
+                              config,
+                              annotation_tags,
+                              sample_name = "test") {
+
+  tags <- NULL
+  if (!is.null(config[["preserved_info_tags"]])) {
+    if (config[["preserved_info_tags"]] != "None") {
+      tags <-
+        stringr::str_split(
+          config[["preserved_info_tags"]], pattern = ",")[[1]]
+    }
+  }
+  pcgrr::log4r_info(paste0(
+    "Generating tiered set of result variants for output",
+    " in tab-separated values (TSV) file"))
+  tsv_variants <- NULL
+  for (tier in c("tier1", "tier2", "tier3", "tier4", "noncoding")) {
+    if (nrow(variant_set[[tier]]) > 0) {
+      tierset <- variant_set[[tier]]
+      tierset$VCF_SAMPLE_ID <- sample_name
+      tsv_columns <- annotation_tags[["tsv"]]
+      if (!is.null(tags)) {
+        for (t in tags) {
+          t <- stringr::str_trim(t)
+          if (t %in% colnames(tierset)) {
+            tsv_columns <- c(tsv_columns, t)
+          }
+        }
+      }
+
+      if (tier == "tier1") {
+        tierset$TIER_DESCRIPTION <- "Variants of strong clinical significance"
+        tierset$TIER <- "TIER 1"
+      }
+      if (tier == "tier2") {
+        tierset$TIER_DESCRIPTION <-
+          "Variants of potential clinical significance"
+        tierset$TIER <- "TIER 2"
+      }
+      if (tier == "tier3") {
+        tierset$TIER_DESCRIPTION <- "Variants of uncertain significance"
+        tierset$TIER <- "TIER 3"
+      }
+      if (tier == "tier4") {
+        tierset$TIER_DESCRIPTION <- "Other coding mutation"
+        tierset$TIER <- "TIER 4"
+      }
+      if (tier == "noncoding") {
+        tierset$TIER_DESCRIPTION <- "Noncoding mutation"
+        tierset$TIER <- "NONCODING"
+      }
+      tierset <- tierset |>
+        dplyr::select(dplyr::any_of(tsv_columns)) |>
+        dplyr::distinct()
+
+      tsv_variants <- dplyr::bind_rows(tsv_variants, tierset)
+    }
+  }
+  tsv_variants$GENE_NAME <-
+    unlist(lapply(stringr::str_match_all(tsv_variants$GENE_NAME, ">.+<"),
+                  paste, collapse = ","))
+  tsv_variants$GENE_NAME <-
+    stringr::str_replace_all(tsv_variants$GENE_NAME, ">|<", "")
+  tsv_variants$CLINVAR <-
+    unlist(lapply(stringr::str_match_all(tsv_variants$CLINVAR, ">.+<"),
+                  paste, collapse = ","))
+  tsv_variants$CLINVAR <-
+    stringr::str_replace_all(tsv_variants$CLINVAR, ">|<", "")
+  tsv_variants$PROTEIN_DOMAIN <-
+    unlist(lapply(stringr::str_match_all(tsv_variants$PROTEIN_DOMAIN, ">.+<"),
+                  paste, collapse = ","))
+  tsv_variants$PROTEIN_DOMAIN <-
+    stringr::str_replace_all(tsv_variants$PROTEIN_DOMAIN, ">|<", "")
+  tsv_variants$TCGA_FREQUENCY <-
+    stringr::str_replace_all(
+      tsv_variants$TCGA_FREQUENCY,
+      "<a href='https://portal.gdc.cancer.gov/projects/TCGA-[A-Z]{1,}' target=\"_blank\">|</a>", "")
+  tsv_variants <- tsv_variants |> dplyr::distinct()
+
+  return(tsv_variants)
+}
+
+
+#' Function that writes contents of PCGR object to various output formats
+#' (Rmarkdown/flexdashboard HTML reports, JSON, tab-separated etc)
+#'
+#' @param report List object with all report data (PCGR/CPSR), settings etc.
+#' @param tier_model type of tier model
+#' @param output_format contents/file format of output
+#' (html/json/tsv/cna_tsv etc)
+#' @param flexdb logical indicating if HTML output should be dashboard
+
+#' @export
+write_report_output <- function(report,
+                                tier_model = "pcgr_acmg",
+                                output_format = "html",
+                                flexdb = FALSE) {
+
+  settings <- report[['settings']]
+  project_directory <- settings[['output_dir']]
+  sample_name <- settings[['sample_id']]
+  genome_assembly <- settings[['genome_assembly']]
+
+  sample_fname_pattern <-
+    paste(sample_name, tier_model, genome_assembly, sep = ".")
+
+  fnames <- list()
+  fnames[["snv_tsv_unfiltered"]] <-
+    file.path(project_directory,
+              paste0(sample_fname_pattern,
+                     ".snvs_indels.unfiltered.tsv"))
+  fnames[["msigs_tsv"]] <-
+    file.path(project_directory,
+              paste0(sample_fname_pattern,
+                     ".mutational_signatures.tsv"))
+  fnames[["snv_tsv"]] <-
+    file.path(project_directory,
+              paste0(sample_fname_pattern,
+                     ".snvs_indels.tiers.tsv"))
+  fnames[["xlsx"]] <-
+    file.path(project_directory,
+              paste0(sample_fname_pattern,
+                     ".snvs_indels.tiers.xlsx"))
+  # fnames[["cna_tsv"]] <-
+  #   file.path(project_directory,
+  #             paste0(sample_fname_pattern,
+  #                    ".cna_segments.tsv"))
+  # fnames[["json"]] <-
+  #   file.path(project_directory,
+  #             paste0(sample_fname_pattern, ".json"))
+  fnames[["html"]] <-
+    file.path(project_directory,
+              paste0(sample_fname_pattern, ".html"))
+  if (flexdb == T) {
+    fnames[["html"]] <-
+      file.path(project_directory,
+                paste0(sample_fname_pattern,
+                       ".flexdb.html"))
+  }
+
+
+
+  ## Set to CPSR/germline settings as default
+  sequencing_design <- "Germline"
+  cpsr_tmpl <- system.file("templates", package = "cpsr")
+  disclaimer <- file.path(cpsr_tmpl, "disclaimer_predisposition.md")
+  markdown_input <- file.path(cpsr_tmpl, "cpsr_rmarkdown_report.Rmd")
+  css_fname <- file.path(cpsr_tmpl, "cpsr.css")
+  report_theme <-
+    settings[["conf"]][["visual_reporting"]][["visual_theme"]]
+
+  ## Somatic/tumor report settings
+  if (tier_model == "pcgr_acmg") {
+    pcgrr_tmpl <- system.file("templates", package = "pcgrr")
+
+    disclaimer <- file.path(pcgrr_tmpl, "disclaimer.md")
+    assay_props <-
+      settings[["conf"]][["assay_properties"]]
+    sequencing_assay <-
+      assay_props[["type"]]
+
+    ## Flexdashboard layout
+    sequencing_design <- "Tumor-Control"
+    markdown_input <- file.path(pcgrr_tmpl, "pcgr_flexdb_report.Rmd")
+    css_fname <- file.path(pcgrr_tmpl, "pcgr_flexdb_tumor_control.css")
+
+    ## Rmarkdown layout
+    if (flexdb == FALSE) {
+      markdown_input <- file.path(pcgrr_tmpl, "pcgr_rmarkdown_report.Rmd")
+      css_fname <- file.path(pcgrr_tmpl, "pcgr_rmarkdown_tumor_control.css")
+    }
+
+    ## Tumor-only settings (CSS)
+    if (assay_props[["vcf_tumor_only"]] == T) {
+      sequencing_design <- "Tumor-Only"
+      css_fname <- file.path(pcgrr_tmpl, "pcgr_flexdb_tumor_only.css")
+
+      if (flexdb == FALSE) {
+        css_fname <- file.path(pcgrr_tmpl, "pcgr_rmarkdown_tumor_only.css")
+      }
+    }
+  }
+
+  if (output_format == "html") {
+
+    if (flexdb == T & tier_model == "pcgr_acmg") {
+      pcgrr::log4r_info("------")
+      pcgrr::log4r_info(
+        "Writing HTML file (.html) with report contents - flexdashboard")
+      navbar_items <- list()
+      navbar_items[[1]] <-
+        list("title" = paste0(
+          "<b>", sample_name, "</b> | <i>",
+          report[["metadata"]][["config"]][["t_props"]][["tumor_type"]],
+          "</i> | ", sequencing_design, " | ", sequencing_assay),
+          href = "", target = "_blank", align = "right")
+      navbar_items[[2]] <-
+        list("icon" = "fa-github",
+             href = "https://github.com/sigven/pcgr", target = "_blank",
+             align = "right")
+
+      rmarkdown::render(
+        markdown_input,
+        output_format =
+          flexdashboard::flex_dashboard(
+            orientation = "rows",
+            favicon = system.file(
+              "templates","favicon-16x16.png",
+              package = "pcgrr"),
+            theme = "cosmo",
+            css = css_fname,
+            navbar = navbar_items),
+        output_file = fnames[["html"]],
+        output_dir = project_directory,
+        clean = T,
+        intermediates_dir = project_directory,
+        quiet = T)
+    }else{
+
+      toc_float <-
+        list(collapsed = TRUE,
+             smooth_scroll = TRUE,
+             print = TRUE)
+      toc_depth <- 3
+
+      ## Ignore collapsing menu for CPSR
+      if (tier_model == 'cpsr') {
+        toc_float <-
+          list(collapsed = FALSE,
+               smooth_scroll = FALSE,
+               print = TRUE)
+      }
+
+      ## If nonfloating TOC is chosen (PCGR/CPSR), set toc_float to FALSE
+      nonfloating_toc <-
+        as.logical(settings[["conf"]][["visual_reporting"]][["nonfloating_toc"]])
+      if (nonfloating_toc == T) {
+        toc_float <- F
+      }
+
+      disclaimer <- system.file(
+        "templates",
+        "disclaimer.md",
+        package = "pcgrr")
+
+      header <- system.file(
+         "templates",
+         "_header.html",
+         package = "pcgrr")
+      if (tier_model == "cpsr") {
+        header <- system.file(
+          "templates",
+          "_header.html",
+          package = "cpsr")
+      }
+
+      pcgrr::log4r_info("------")
+      pcgrr::log4r_info(paste0(
+        "Writing HTML file (.html) with report contents - rmarkdown (theme = '",
+        report_theme,"')"))
+      rmarkdown::render(
+        markdown_input,
+        output_format =
+          rmarkdown::html_document(
+            theme = report_theme,
+            fig_width = 5,
+            fig_height = 4,
+            toc = T,
+            toc_depth = toc_depth,
+            toc_float = toc_float,
+            number_sections = F,
+            css = css_fname,
+            includes =
+              rmarkdown::includes(
+                in_header = header,
+                after_body = disclaimer)),
+        output_file = fnames[["html"]],
+        output_dir = project_directory,
+        clean = T,
+        intermediates_dir = project_directory,
+        quiet = T)
+    }
+  }
+  if (output_format == "json") {
+    if (!is.null(report[["cna_plot"]][["png"]])) {
+      report[["cna_plot"]][["png"]] <- NULL
+    }
+    if (!is.null(report[["tmb"]][["tcga_tmb"]])) {
+      report[["tmb"]][["tcga_tmb"]] <- NULL
+    }
+    pcgrr::log4r_info("------")
+    pcgrr::log4r_info("Writing JSON file (.json) with key report contents")
+
+    report_strip <- report
+
+    if (tier_model != "cpsr") {
+      if (!is.null(report_strip$content$rainfall)) {
+        report_strip$content$rainfall <- NULL
+      }
+      if (!is.null(report_strip$content$tmb)) {
+        report_strip$content$tmb$tcga_tmb <- NULL
+      }
+      if (!is.null(report_strip$content$clinicaltrials)) {
+        report_strip$content$clinicaltrials <- NULL
+      }
+      if (!is.null(report_strip$content$msi)) {
+        if (!is.null(report_strip$content$msi$prediction)) {
+          report_strip$content$msi$prediction$tcga_dataset <- NULL
+        }
+      }
+
+      if (!is.null(report_strip$content$snv_indel$disp)) {
+        report_strip$content$snv_indel$disp <- NULL
+      }
+
+      if (!is.null(report_strip$content$snv_indel$variant_set)) {
+        if (!is.null(report_strip$content$snv_indel$variant_set$maf)) {
+          report_strip$content$snv_indel$variant_set$maf <- NULL
+        }
+      }
+
+      key_tsv_cols <- c("GENOMIC_CHANGE",
+                        "VARIANT_CLASS",
+                        "SYMBOL",
+                        "ENTREZ_ID",
+                        "ENSEMBL_TRANSCRIPT_ID",
+                        "TUMOR_SUPPRESSOR",
+                        "ONCOGENE",
+                        "CONSEQUENCE",
+                        "PROTEIN_CHANGE",
+                        "PROTEIN_DOMAIN",
+                        "CODING_STATUS",
+                        "EXONIC_STATUS",
+                        "HGVSp",
+                        "MUTATION_HOTSPOT",
+                        "DBSNPRSID",
+                        "COSMIC_MUTATION_ID",
+                        "CALL_CONFIDENCE",
+                        "DP_TUMOR",
+                        "AF_TUMOR",
+                        "DP_CONTROL",
+                        "AF_CONTROL",
+                        "TIER")
+
+      if (!is.null(report_strip$content$snv_indel$variant_set)) {
+
+        for(o in c('tsv')) {
+
+          if (!is.null(report_strip$content$snv_indel$variant_set[[o]])) {
+
+            if (nrow(report_strip$content$snv_indel$variant_set[[o]]) == 0) {
+              next
+            }
+            assertable::assert_colnames(
+              report_strip$content$snv_indel$variant_set[[o]],
+              colnames = key_tsv_cols,
+              only_colnames = F,
+              quiet = T
+            )
+
+            report_strip$content$snv_indel$variant_set[[o]] <-
+              dplyr::select(
+                report_strip$content$snv_indel$variant_set[[o]],
+                dplyr::any_of(key_tsv_cols)
+              )
+
+          }
+        }
+      }
+
+    } ## if tier_model != "cpsr"
+
+
+    size <- format(utils::object.size(report_strip), units = "auto")
+    #hsize <- R.utils::hsize.object_size(size)
+    pcgrr::log4r_info(paste0("Size of PCGR report object for JSON output: ", size))
+
+
+    ## NOTE: set max size of report object to 750 Mb - have not figured out
+    ## what the exact size should be for jsonlite::toJSON to succeed/fail
+    if (utils::object.size(report_strip) < 750000000) {
+
+      pcgr_json <- jsonlite::toJSON(
+        report_strip, pretty = T, na = "string",
+        null = "null", force = T)
+      write(pcgr_json, fnames[["json"]])
+      gzip_command <- paste0("gzip -f ", fnames[["json"]])
+      system(gzip_command, intern = F)
+    }else{
+      pcgrr::log4r_info("JSON output not possible - report contents too large (> 750Mb)")
+
+    }
+  }
+
+  if (output_format == "snv_tsv" | output_format == "snv_tsv_unfiltered") {
+    output_format_slim <- stringr::str_replace(output_format, "snv_", "")
+    if (NROW(
+      report[["content"]][["snv_indel"]][["variant_set"]][[output_format_slim]]) > 0) {
+      pcgrr::log4r_info("------")
+      if (tier_model == "pcgr_acmg") {
+        pcgrr::log4r_info(
+          paste0("Writing SNV/InDel tab-separated output file with ",
+                 "PCGR annotations - ('",
+                 output_format_slim, "')"))
+      }
+      if (tier_model == "cpsr") {
+        pcgrr::log4r_info(
+          paste0("Writing SNV/InDel tab-separated output file ",
+                 "with CPSR annotations - ('",
+                 output_format_slim, "')"))
+      }
+      utils::write.table(
+        report[["content"]][["snv_indel"]][["variant_set"]][[output_format_slim]],
+        file = fnames[[output_format]], sep = "\t", col.names = T,
+        row.names = F, quote = F)
+
+      # if (tier_model == "pcgr_acmg") {
+      #   pcgrr::log4r_info(
+      #     paste0("Writing SNV/InDel Excel output file with ",
+      #            "PCGR annotations"))
+      #   workbook <- openxlsx::createWorkbook()
+      #   openxlsx::addWorksheet(workbook,
+      #                          sheetName = "SNV_INDELS")
+      #
+      #   ## set automatic column widths
+      #   openxlsx::setColWidths(
+      #     workbook,
+      #     sheet = "SNV_INDELS",
+      #     cols = 1:ncol(report[["content"]][["snv_indel"]][["variant_set"]][[output_format_slim]]),
+      #     widths = "auto")
+      #
+      #   ## write with default Excel Table style
+      #   openxlsx::writeDataTable(
+      #     workbook,
+      #     sheet = "SNV_INDELS",
+      #     x = report[["content"]][["snv_indel"]][["variant_set"]][[output_format_slim]],
+      #     startRow = 1,
+      #     startCol = 1,
+      #     colNames = TRUE,
+      #     tableStyle = "TableStyleMedium15")
+      #
+      #   openxlsx::saveWorkbook(
+      #     workbook,
+      #     fnames[['excel']],
+      #     overwrite = TRUE)
+      # }
+    }
+  }
+
+  if (output_format == "msigs_tsv") {
+    if (
+      NROW(report[["content"]][["m_signature_mp"]][["result"]][["tsv"]]) > 0) {
+      pcgrr::log4r_info("------")
+      pcgrr::log4r_info(paste0(
+        "Writing tab-separated output file with details ",
+        "of contributing mutational signatures - ('tsv')"))
+      utils::write.table(report[["content"]][["m_signature_mp"]][["result"]][["tsv"]],
+                  file = fnames[[output_format]], sep = "\t", col.names = T,
+                  row.names = F, quote = F)
+    }
+  }
+
+  if (output_format == "cna_tsv") {
+    if (NROW(report[["content"]][["cna"]][["variant_set"]][["tsv"]]) > 0) {
+      pcgrr::log4r_info("------")
+      pcgrr::log4r_info(
+        "Writing CNA tab-separated output file with PCGR annotations (.tsv.gz)")
+      utils::write.table(report[["content"]][["cna"]][["variant_set"]][["tsv"]],
+                  file = fnames[["cna_tsv"]], sep = "\t", col.names = T,
+                  row.names = F, quote = F)
+      gzip_command <- paste0("gzip -f ", fnames[["cna_tsv"]])
+      system(gzip_command, intern = F)
+    }
+  }
+
+}
+
diff --git a/pcgrr/R/msi.R b/pcgrr/R/msi.R
index f1e2a7ae..06a489b5 100644
--- a/pcgrr/R/msi.R
+++ b/pcgrr/R/msi.R
@@ -1,7 +1,7 @@
 #' Function that predicts MSI status based on fraction of indels among calls
 #'
-#' @param vcf_data_df data frame with somatic mutations/indels
-#' @param pcgr_data object with PCGR datasets
+#' @param variant_set data frame with somatic mutations/indels
+#' @param ref_data PCGR reference data object
 #' @param msi_prediction_model statistical model for MSI prediction
 #' @param msi_prediction_dataset underlying dataset from TCGA used for
 #' development of statistical classifier
@@ -10,40 +10,66 @@
 #' @return msi_data
 #'
 #' @export
-predict_msi_status <- function(vcf_data_df, pcgr_data,
+predict_msi_status <- function(variant_set,
+                               ref_data,
                                msi_prediction_model,
                                msi_prediction_dataset,
-                               target_size_mb, sample_name = "Test") {
+                               target_size_mb,
+                               sample_name = "Test") {
 
   mutations_valid <- pcgrr::get_valid_chromosomes(
-    vcf_data_df,
+    variant_set,
     chromosome_column = "CHROM",
-    bsg = pcgr_data[["assembly"]][["bsg"]])
+    bsg = ref_data[["assembly"]][["bsg"]])
   mutations_valid <- mutations_valid |>
-    dplyr::select(.data$CHROM, .data$POS, .data$REF, .data$ALT, .data$CONSEQUENCE, .data$SYMBOL,
-                  .data$GENOMIC_CHANGE, .data$VARIANT_CLASS, .data$PROTEIN_DOMAIN,
-                  .data$GENE_NAME, .data$PROTEIN_CHANGE, .data$MUTATION_HOTSPOT,
-                  .data$CLINVAR, .data$TCGA_FREQUENCY, .data$AF_TUMOR, .data$DP_TUMOR,
-                  .data$AF_CONTROL, .data$DP_CONTROL, .data$CALL_CONFIDENCE,
-                  .data$SIMPLEREPEATS_HIT, .data$WINMASKER_HIT)
+    dplyr::select(
+      .data$CHROM,
+      .data$POS,
+      .data$REF,
+      .data$ALT,
+      .data$CONSEQUENCE,
+      .data$SYMBOL,
+      .data$GENOMIC_CHANGE,
+      .data$VARIANT_CLASS,
+      .data$PROTEIN_DOMAIN,
+      .data$GENENAME,
+      .data$PROTEIN_CHANGE,
+      .data$MUTATION_HOTSPOT,
+      .data$CLINVAR,
+      .data$TCGA_FREQUENCY,
+      .data$AF_TUMOR,
+      .data$DP_TUMOR,
+      .data$AF_CONTROL,
+      .data$DP_CONTROL,
+      .data$CALL_CONFIDENCE,
+      .data$SIMPLEREPEATS_HIT,
+      .data$WINMASKER_HIT)
 
   vcf_df_repeatAnnotated <- mutations_valid |>
-    dplyr::mutate(repeatStatus =
-                    dplyr::if_else(.data$SIMPLEREPEATS_HIT == T,
-                                   "simpleRepeat", as.character(NA))) |>
-    dplyr::mutate(winMaskStatus =
-                    dplyr::if_else(.data$WINMASKER_HIT == T,
-                                   "winMaskDust", as.character(NA)))
-
-  msi_stats <- data.frame("sample_name" = sample_name, stringsAsFactors = F)
+    dplyr::mutate(
+      repeatStatus =
+        dplyr::if_else(
+          .data$SIMPLEREPEATS_HIT == T,
+          "simpleRepeat", as.character(NA))) |>
+    dplyr::mutate(
+      winMaskStatus =
+        dplyr::if_else(
+          .data$WINMASKER_HIT == T,
+          "winMaskDust", as.character(NA)))
+
+  msi_stats <- data.frame(
+    "sample_name" = sample_name, stringsAsFactors = F)
 
   msi_stats1 <- vcf_df_repeatAnnotated |>
-    dplyr::filter(!is.na(.data$repeatStatus) & (.data$VARIANT_CLASS == "insertion" |
-                                            .data$VARIANT_CLASS == "deletion")) |>
+    dplyr::filter(
+      !is.na(.data$repeatStatus) &
+        (.data$VARIANT_CLASS == "insertion" |
+           .data$VARIANT_CLASS == "deletion")) |>
     dplyr::summarise(repeat_indels = dplyr::n())
 
   msi_stats2 <- vcf_df_repeatAnnotated |>
-    dplyr::filter(!is.na(.data$repeatStatus) & .data$VARIANT_CLASS == "SNV") |>
+    dplyr::filter(!is.na(.data$repeatStatus) &
+                    .data$VARIANT_CLASS == "SNV") |>
     dplyr::summarise(repeat_SNVs = dplyr::n())
 
   msi_stats3 <- vcf_df_repeatAnnotated |>
@@ -57,7 +83,8 @@ predict_msi_status <- function(vcf_data_df, pcgr_data,
     dplyr::summarise(winmask_indels = dplyr::n())
 
   winmask_snvs <- vcf_df_repeatAnnotated |>
-    dplyr::filter(!is.na(.data$winMaskStatus) & .data$VARIANT_CLASS == "SNV") |>
+    dplyr::filter(!is.na(.data$winMaskStatus) &
+                    .data$VARIANT_CLASS == "SNV") |>
     dplyr::summarise(winmask_SNVs = dplyr::n())
 
   winmask_tot <- vcf_df_repeatAnnotated |>
@@ -71,7 +98,8 @@ predict_msi_status <- function(vcf_data_df, pcgr_data,
     dplyr::summarise(nonRepeat_indels = dplyr::n())
 
   msi_stats5 <- vcf_df_repeatAnnotated |>
-    dplyr::filter(is.na(.data$repeatStatus) & .data$VARIANT_CLASS == "SNV") |>
+    dplyr::filter(is.na(.data$repeatStatus) &
+                    .data$VARIANT_CLASS == "SNV") |>
     dplyr::summarise(nonRepeat_SNVs = dplyr::n())
 
   msi_stats6 <- vcf_df_repeatAnnotated |>
@@ -118,42 +146,48 @@ predict_msi_status <- function(vcf_data_df, pcgr_data,
     dplyr::filter(
       .data$SYMBOL == "MSH3" &
         stringr::str_detect(
-          .data$CONSEQUENCE, "frameshift_|missense_|splice_|stop_|start_|frame_")) |>
+          .data$CONSEQUENCE,
+          "frameshift_|missense_|splice_|stop_|start_|frame_")) |>
     dplyr::summarise(MSH3 = dplyr::n())
 
   msi_stats14 <- vcf_df_repeatAnnotated |>
     dplyr::filter(
       .data$SYMBOL == "MSH6" &
         stringr::str_detect(
-          .data$CONSEQUENCE, "frameshift_|missense_|splice_|stop_|start_|frame_")) |>
+          .data$CONSEQUENCE,
+          "frameshift_|missense_|splice_|stop_|start_|frame_")) |>
     dplyr::summarise(MSH6 = dplyr::n())
 
   msi_stats15 <- vcf_df_repeatAnnotated |>
     dplyr::filter(
       .data$SYMBOL == "PMS1" &
         stringr::str_detect(
-          .data$CONSEQUENCE, "frameshift_|missense_|splice_|stop_|start_|frame_")) |>
+          .data$CONSEQUENCE,
+          "frameshift_|missense_|splice_|stop_|start_|frame_")) |>
     dplyr::summarise(PMS1 = dplyr::n())
 
   msi_stats16 <- vcf_df_repeatAnnotated |>
     dplyr::filter(
       .data$SYMBOL == "PMS2" &
         stringr::str_detect(
-          .data$CONSEQUENCE, "frameshift_|missense_|splice_|stop_|start_|frame_")) |>
+          .data$CONSEQUENCE,
+          "frameshift_|missense_|splice_|stop_|start_|frame_")) |>
     dplyr::summarise(PMS2 = dplyr::n())
 
   msi_stats17 <- vcf_df_repeatAnnotated |>
     dplyr::filter(
       .data$SYMBOL == "POLE" &
         stringr::str_detect(
-          .data$CONSEQUENCE, "frameshift_|missense_|splice_|stop_|start_|frame_")) |>
+          .data$CONSEQUENCE,
+          "frameshift_|missense_|splice_|stop_|start_|frame_")) |>
     dplyr::summarise(POLE = dplyr::n())
 
   msi_stats18 <- vcf_df_repeatAnnotated |>
     dplyr::filter(
       .data$SYMBOL == "POLD1" &
         stringr::str_detect(
-          .data$CONSEQUENCE, "frameshift_|missense_|splice_|stop_|start_|frame_")) |>
+          .data$CONSEQUENCE,
+          "frameshift_|missense_|splice_|stop_|start_|frame_")) |>
     dplyr::summarise(POLD1 = dplyr::n())
 
   msi_stats1$sample_name <- sample_name
@@ -214,9 +248,14 @@ predict_msi_status <- function(vcf_data_df, pcgr_data,
   msi_stats$tmb <- as.numeric(msi_stats$indelSNVs) / target_size_mb
   msi_stats$tmb_indel <- as.numeric(msi_stats$indels) / target_size_mb
   msi_stats$tmb_snv <- as.numeric(msi_stats$SNVs) / target_size_mb
-  for (stat in c("fracWinMaskIndels", "fracWinMaskSNVs", "fracRepeatIndels",
-                 "fracRepeatIndels", "fracNonRepeatIndels", "fracIndels",
-                 "tmb", "tmb_snv", "tmb_indel")) {
+  for (stat in c("fracWinMaskIndels",
+                 "fracWinMaskSNVs",
+                 "fracRepeatIndels",
+                 "fracRepeatIndels",
+                 "fracNonRepeatIndels",
+                 "fracIndels",
+                 "tmb", "tmb_snv",
+                 "tmb_indel")) {
     if (nrow(msi_stats[is.na(msi_stats[stat]), ]) > 0) {
       msi_stats[is.na(msi_stats[stat]), ][stat] <- 0
     }
@@ -224,34 +263,45 @@ predict_msi_status <- function(vcf_data_df, pcgr_data,
 
   mmr_pol_df <- mutations_valid |>
     dplyr::filter(
-      stringr::str_detect(.data$SYMBOL,
-                          "^(MLH1|MLH3|MSH2|MSH3|MSH6|PMS1|PMS2|POLD1|POLE)$") &
-        stringr::str_detect(.data$CONSEQUENCE,
-                            "frameshift_|missense_|splice_|stop_|inframe_"))
-  mmr_pol_df <- dplyr::select(mmr_pol_df, -c(.data$CHROM, .data$POS, .data$REF, .data$ALT))
-  mmr_pol_df <- dplyr::rename(mmr_pol_df, GENE = .data$SYMBOL)
-  mmr_pol_df <- mmr_pol_df |>
-    dplyr::select(.data$GENE, .data$CONSEQUENCE, .data$PROTEIN_CHANGE,
-                  .data$GENE_NAME, .data$VARIANT_CLASS, .data$PROTEIN_DOMAIN,
-                  dplyr::everything())
-
-  msi_predictors <- c("fracWinMaskIndels", "fracWinMaskSNVs",
-                      "fracRepeatIndels",
-                      "fracNonRepeatIndels",
-                      "fracIndels", "MLH1", "MLH3", "MSH2",
-                      "MSH3", "MSH6", "PMS1",
-                      "PMS2", "POLD1", "POLE", "tmb",
-                      "tmb_indel", "tmb_snv")
-  msi_class <- stats::predict(msi_prediction_model,
-                       dplyr::select(msi_stats, msi_predictors))
+      stringr::str_detect(
+        .data$SYMBOL,
+        "^(MLH1|MLH3|MSH2|MSH3|MSH6|PMS1|PMS2|POLD1|POLE)$") &
+        stringr::str_detect(
+          .data$CONSEQUENCE,
+          "frameshift_|missense_|splice_|stop_|inframe_")) |>
+    dplyr::select(-c("CHROM","POS","REF","ALT")) |>
+    dplyr::rename(GENE = .data$SYMBOL) |>
+    dplyr::select(
+      .data$GENE,
+      .data$CONSEQUENCE,
+      .data$PROTEIN_CHANGE,
+      .data$GENENAME,
+      .data$VARIANT_CLASS,
+      .data$PROTEIN_DOMAIN,
+      dplyr::everything())
+
+  msi_predictors <- c(
+    "fracWinMaskIndels",
+    "fracWinMaskSNVs",
+    "fracRepeatIndels",
+    "fracNonRepeatIndels",
+    "fracIndels", "MLH1",
+    "MLH3", "MSH2",
+    "MSH3", "MSH6", "PMS1",
+    "PMS2", "POLD1",
+    "POLE", "tmb",
+    "tmb_indel", "tmb_snv")
+
+  msi_class <- stats::predict(
+    msi_prediction_model,
+    dplyr::select(msi_stats, msi_predictors))
+
   if (msi_class == "MSS") {
     msi_stats$predicted_class <- "MSS (Microsatellite stable)"
-    #msi_stats$vb <- "MSI status:\nMSS"
     msi_stats$vb <- "MSS"
   }
   else{
     msi_stats$predicted_class <- "MSI.H (Microsatellite instability - high)"
-    #msi_stats$vb <- "MSI status:\nMSI - High"
     msi_stats$vb <- "MSI - High"
   }
   pcgrr::log4r_info(paste0("Predicted MSI status: ",
@@ -268,34 +318,38 @@ predict_msi_status <- function(vcf_data_df, pcgr_data,
 
 #' Function that generates MSI prediction data for PCGR report
 #'
-#' @param sample_calls variant calls subject to mutational signature analysis
-#' @param pcgr_data object with PCGR annotation data
-#' @param sample_name sample identifier
-#' @param pcgr_config Object with PCGR configuration parameters
+#' @param variant_set variant calls subject to MSI classification
+#' @param ref_data PCGR reference data object
+#' @param settings PCGR run configuration settings
 #'
 #' @export
-generate_report_data_msi <- function(sample_calls,
-                                     pcgr_data,
-                                     sample_name,
-                                     pcgr_config) {
+generate_report_data_msi <- function(
+    variant_set,
+    ref_data = NULL,
+    settings = NULL) {
+
+  pcg_report_msi <- pcgrr::init_msi_content()
 
-  pcg_report_msi <- pcgrr::init_report(config =pcgr_config,
-                                       class = "msi")
   pcgrr::log4r_info("------")
   pcgrr::log4r_info("Predicting microsatellite instability status")
 
-  msi_sample_calls <- sample_calls |> dplyr::filter(.data$EXONIC_STATUS == "exonic")
-  pcgrr::log4r_info(paste0("n = ",
-                           nrow(msi_sample_calls),
-                           " exonic variants used for MSI prediction"))
+  msi_sample_calls <- variant_set |>
+    dplyr::filter(.data$EXONIC_STATUS == "exonic")
+  pcgrr::log4r_info(
+    paste0("n = ",
+           nrow(msi_sample_calls),
+           " exonic variants used for MSI prediction"))
   if (nrow(msi_sample_calls) >= 1) {
     pcg_report_msi[["prediction"]] <-
       pcgrr::predict_msi_status(
-        msi_sample_calls, pcgr_data,
-        msi_prediction_model = pcgr_data[["msi"]][["model"]][["model"]],
-        msi_prediction_dataset = pcgr_data[["msi"]][["model"]][["tcga_dataset"]],
-        target_size_mb = pcgr_config$assay_props$target_size_mb,
-        sample_name = sample_name)
+        variant_set = msi_sample_calls,
+        ref_data,
+        msi_prediction_model = ref_data[["msi"]][["model"]],
+        msi_prediction_dataset = ref_data[["msi"]][["tcga_dataset"]],
+        target_size_mb =
+          settings$conf$assay_properties$effective_target_size_mb,
+        sample_name = settings$sample_id)
+
     pcg_report_msi[["eval"]] <- TRUE
   }
   else{
@@ -318,14 +372,16 @@ generate_report_data_msi <- function(sample_calls,
 
 msi_indel_fraction_plot <- function(tcga_msi_dataset, indel_fraction) {
 
-  color_vec <- utils::head(pcgrr::color_palette[["tier"]][["values"]],2)
+  color_vec <- utils::head(
+    pcgrr::color_palette[["tier"]][["values"]], 2)
   names(color_vec) <- c("MSS", "MSI.H")
 
   p <- ggplot2::ggplot(data = tcga_msi_dataset) +
-    ggplot2::geom_histogram(mapping = ggplot2::aes(x = .data$fracIndels,
-                                                   color = .data$MSI_status,
-                                                   fill = .data$MSI_status),
-                            position = "dodge", binwidth = 0.01) +
+    ggplot2::geom_histogram(
+      mapping = ggplot2::aes(x = .data$fracIndels,
+                             color = .data$MSI_status,
+                             fill = .data$MSI_status),
+      position = "dodge", binwidth = 0.01) +
     ggplot2::ylab("Number of TCGA samples") +
     ggplot2::scale_fill_manual(values = color_vec) +
     ggplot2::scale_color_manual(values = color_vec) +
diff --git a/pcgrr/R/mutation.R b/pcgrr/R/mutation.R
index 75056746..04721cff 100644
--- a/pcgrr/R/mutation.R
+++ b/pcgrr/R/mutation.R
@@ -44,95 +44,3 @@ assign_mutation_type <- function(var_df) {
   return(var_df)
 }
 
-
-#' Function that transforms a tier-structured variant data frame
-#' into a MAF-like data frame (for input to 2020plus, MutSigCV)
-#'
-#' @param maf_df data frame with somatic mutations
-#' @param genome_seq BSgenome object
-#' @param seqinfo seqinfo object
-
-#' @return maf_all
-#'
-#' @export
-get_proper_maf_alleles <- function(maf_df, genome_seq, seqinfo) {
-
-  maf_df_valid <-
-    pcgrr::get_valid_chromosomes(maf_df,
-                                 chromosome_column = "Chromosome",
-                                 bsg = genome_seq)
-  if ("end" %in% colnames(maf_df_valid)) {
-    maf_df_valid <- dplyr::select(maf_df_valid, -.data$end)
-  }
-
-  maf_snv <- maf_df_valid |>
-    dplyr::filter(.data$Variant_Type == "SNP") |>
-    dplyr::mutate(REF = .data$Reference_Allele,
-                  ALT = .data$Tumor_Seq_Allele2, POS = .data$Start_Position)
-
-  maf_all <- maf_snv
-  maf_ins <- dplyr::filter(maf_df_valid, .data$Variant_Type == "INS")
-  maf_del <- dplyr::filter(maf_df_valid, .data$Variant_Type == "DEL")
-
-  if (nrow(maf_del) > 0) {
-    ## get appropriate alleles (VCF-like) of reference and alternate (DELETIONS)
-    maf_del_gr <-
-      GenomicRanges::makeGRangesFromDataFrame(maf_del, keep.extra.columns = T,
-                                              seqinfo = seqinfo,
-                                              seqnames.field = "Chromosome",
-                                              start.field = "Start_Position",
-                                              end.field = "End_Position",
-                                              ignore.strand = T,
-                                              starts.in.df.are.0based = F)
-
-    maf_del_flank_gr <- GenomicRanges::flank(maf_del_gr, width = 1, start = T)
-    maf_del_flank_seq <- Biostrings::getSeq(genome_seq, maf_del_flank_gr)
-    maf_del_seq <- Biostrings::getSeq(genome_seq, maf_del_gr)
-    vcf_alleles_alt <-
-      data.frame(ALT =
-                   toupper(unlist(strsplit(toString(maf_del_flank_seq), ", "))),
-                 stringsAsFactors = F)
-    vcf_alleles_ref <-
-      data.frame(REF =
-                   toupper(unlist(strsplit(toString(maf_del_seq), ", "))),
-                 stringsAsFactors = F)
-    vcf_alleles <- cbind(vcf_alleles_ref, vcf_alleles_alt)
-    vcf_alleles$REF <- paste0(vcf_alleles$ALT, vcf_alleles$REF)
-    maf_del <- cbind(maf_del, vcf_alleles)
-    maf_del$POS <- maf_del$Start_Position - 1
-
-    maf_all <- rbind(maf_all, maf_del)
-  }
-
-  if (nrow(maf_ins) > 0) {
-    ## get appropriate alleles (VCF-like) of reference and alternate (INSERTIONS)
-    maf_ins_gr <-
-      GenomicRanges::makeGRangesFromDataFrame(maf_ins,
-                                              keep.extra.columns = T,
-                                              seqinfo = seqinfo,
-                                              seqnames.field = "Chromosome",
-                                              start.field = "Start_Position",
-                                              end.field = "Start_Position",
-                                              ignore.strand = T,
-                                              starts.in.df.are.0based = F)
-    maf_ins_seq <- Biostrings::getSeq(genome_seq, maf_ins_gr)
-    vcf_alleles_alt <-
-      data.frame(REF =
-                   toupper(unlist(strsplit(toString(maf_ins_seq), ", "))),
-                 stringsAsFactors = F)
-    maf_ins <- cbind(maf_ins, vcf_alleles_alt)
-    maf_ins$ALT <- paste0(maf_ins$REF, maf_ins$Tumor_Seq_Allele2)
-    maf_ins$POS <- maf_ins$Start_Position
-
-    maf_all <- rbind(maf_all, maf_ins)
-  }
-
-
-  maf_all$CHROM <- stringr::str_replace(maf_all$Chromosome, "chr", "")
-  maf_all$GENOMIC_CHANGE <-
-    paste(paste(paste(paste0("g.chr", maf_all$CHROM),
-                      maf_all$POS, sep = ":"), maf_all$REF, sep = ":"),
-          maf_all$ALT, sep = ">")
-  return(maf_all)
-
-}
diff --git a/pcgrr/R/mutational_burden.R b/pcgrr/R/mutational_burden.R
index 8b5c2469..64dcbd52 100644
--- a/pcgrr/R/mutational_burden.R
+++ b/pcgrr/R/mutational_burden.R
@@ -30,7 +30,7 @@ generate_report_data_tmb <- function(sample_calls,
 
   pcg_report_tmb[["eval"]] <- TRUE
 
-  if(NROW(sample_calls) > 0){
+  if (NROW(sample_calls) > 0) {
     pcg_report_tmb[["v_stat"]][["n_tmb"]] <-
       sample_calls |>
       dplyr::filter(
@@ -132,7 +132,7 @@ plot_tmb_primary_site_tcga <- function(tcga_tmb, p_site = "Liver",
                    legend.text = ggplot2::element_text(family = "Helvetica",
                                                        size = 14))
 
-  if(tmb_estimate > 0){
+  if (tmb_estimate > 0) {
     tmb_plot_site <- tmb_plot_site +
       ggplot2::geom_hline(
         yintercept = as.numeric(tmb_estimate), size = 0.9,
diff --git a/pcgrr/R/mutational_signatures.R b/pcgrr/R/mutational_signatures.R
index 5a2353f1..841debca 100644
--- a/pcgrr/R/mutational_signatures.R
+++ b/pcgrr/R/mutational_signatures.R
@@ -1,62 +1,84 @@
 #' Function that generates mutational signatures data for PCGR report
 #'
-#' @param vcf_fname VCF file processed with PCGR annotation pipeline -
-#' possibly filtered for depth/allelic fraction
-#' @param pcgr_data object with PCGR annotation data
-#' @param sample_name sample identifier
-#' @param pcgr_config Object with PCGR configuration parameters
-#' @param type_specific logical indicating if all reference signatures are to be
-#' included (F) rather than those known to be prevalent in the tumor (T)
+#' @param callset_snv Somatic callset (SNV)
+#' @param ref_data PCGR reference data object
+#' @param settings PCGR configuration settings object
 #'
 #' @export
 generate_report_data_signatures_mp <-
-  function(vcf_fname,
-           pcgr_data,
-           sample_name,
-           pcgr_config,
-           type_specific = T) {
+  function(callset_snv = NULL,
+           ref_data = NULL,
+           settings = NULL) {
+
+  cosmic_metadata <-
+    ref_data$metadata |>
+    dplyr::filter(.data$source_abbreviation == "cosmic_mutsigs") |>
+    dplyr::select(c("source_version")) |>
+    dplyr::mutate(
+      source_version = stringr::str_replace_all(
+        .data$source_version, "[\r\n]" , ""))
 
   pcgrr::log4r_info("------")
-  pcgrr::log4r_info(paste0("Identifying weighted contributions of reference ",
-                    "mutational signatures (COSMIC v3.2) using ",
-                    "MutationalPatterns"))
-  assay <- tolower(pcgr_config$assay_props$type)
+  pcgrr::log4r_info(
+    paste0("Identifying weighted contributions of reference ",
+           "mutational signatures (COSMIC ",
+           cosmic_metadata$source_version,") using ",
+           "MutationalPatterns"))
+  #assay <- tolower(pcgr_config$assay_props$type)
+  assay <- tolower(settings$conf$assay_properties$type)
+
+  vcf_name_mutsig_analysis <-
+    file.path(settings$output_dir,
+              paste(
+                settings$sample_id,
+                stringi::stri_rand_strings(
+                  1, 15, pattern = "[A-Za-z0-9]"),
+                "mutational_patterns_input.vcf",
+                sep="."))
+
+  pcgrr::write_processed_vcf(
+      calls = callset_snv$variant,
+      sample_name = settings$sample_id,
+      output_directory = settings$output_dir,
+      vcf_fname = vcf_name_mutsig_analysis)
 
 
   pcg_report_signatures <-
-    pcgrr::init_report(config = pcgr_config,
-                       class = "m_signature_mp")
+    pcgrr::init_m_signature_content()
+
+  fit_signatures_to_ttype <- !as.logical(
+    settings$conf$somatic_snv$mutational_signatures$all_reference_signatures
+  )
 
   ## Retrieve relevant signatures for the tumor in question
   prevalent_site_signatures <- NULL
-  if(type_specific == T){
+  if (fit_signatures_to_ttype == T) {
     prevalent_site_signatures <-
-      pcgrr::get_prevalent_site_signatures(
-        site = pcgr_config[["t_props"]][["tumor_type"]],
+      pcgrr::get_prevalent_site_signatures2(
+        site = settings$conf$sample_properties$site,
         min_prevalence_pct =
-          pcgr_config[["msigs"]][["prevalence_reference_signatures"]],
-        pcgr_data = pcgr_data,
+          settings$conf$somatic_snv$mutational_signatures$prevalence_reference_signatures,
+        ref_data = ref_data,
         incl_poss_artifacts =
-          pcgr_config[["msigs"]][["include_artefact_signatures"]])
-  }
-  if(type_specific == F){
+          settings$conf$somatic_snv$mutational_signatures$include_artefact_signatures)
+  }else{
     prevalent_site_signatures <-
-      pcgrr::get_prevalent_site_signatures(
+      pcgrr::get_prevalent_site_signatures2(
         site = "Any",
         min_prevalence_pct =
-          pcgr_config[["msigs"]][["prevalence_reference_signatures"]],
-        pcgr_data = pcgr_data,
+          settings$conf$somatic_snv$mutational_signatures$prevalence_reference_signatures,
+        ref_data = ref_data,
         incl_poss_artifacts =
-          pcgr_config[["msigs"]][["include_artefact_signatures"]])
+          settings$conf$somatic_snv$mutational_signatures$include_artefact_signatures)
   }
 
   ## read MutationalPattern VCF file
-  if(file.exists(vcf_fname)){
+  if (file.exists(glue::glue("{vcf_name_mutsig_analysis}.gz"))) {
     vcfs <- suppressMessages(suppressWarnings(
       MutationalPatterns::read_vcfs_as_granges(
-        vcf_files = vcf_fname,
-        sample_names = sample_name,
-        genome = pcgr_data[["assembly"]][["ref_genome"]],
+        vcf_files = glue::glue("{vcf_name_mutsig_analysis}.gz"),
+        sample_names = settings$sample_id,
+        genome = ref_data$assembly$bsg,
         predefined_dbs_mbs = T),
       )
     )
@@ -66,7 +88,7 @@ generate_report_data_signatures_mp <-
 
     pcg_report_signatures[["eval"]] <- TRUE
 
-    if (length(vcfs[[1]]) >= pcgr_config[["msigs"]][["mutation_limit"]]) {
+    if (length(vcfs[[1]]) >= settings$conf$somatic_snv$mutational_signatures[["mutation_limit"]]) {
 
       ## assign variants to variant set
       pcg_report_signatures[["variant_set"]][["all"]] <-
@@ -84,25 +106,27 @@ generate_report_data_signatures_mp <-
       mut_mat <-
         MutationalPatterns::mut_matrix(
           vcf_list = vcfs,
-          ref_genome = pcgr_data[["assembly"]][["ref_genome"]],
+          ref_genome = ref_data$assembly$bsg,
           extension = 1)
       mut_mat <- mut_mat + 0.0001
 
-      ## get reference signatures (COSMIC v3.2)
+      ## get reference signatures (COSMIC v3.4)
       all_reference_signatures <-
         MutationalPatterns::get_known_signatures(
         muttype = "snv",
         genome = stringr::str_replace(
-          pcgr_data[["assembly"]][["grch_name"]], "grc", "GRC"
+          ref_data$assembly$grch_name, "grc", "GRC"
         ),
         incl_poss_artifacts =
-          pcgr_config[["msigs"]][["include_artefact_signatures"]]
+          as.logical(
+            settings$conf$somatic_snv$mutational_signatures$include_artefact_signatures
+          )
       )
 
       ## select subset of signatures based on those prevalent in tumor type/tissue
       selected_sigs <- intersect(
         colnames(all_reference_signatures),
-        unique(prevalent_site_signatures$aetiology$signature_id)
+        unique(prevalent_site_signatures$aetiology$SIGNATURE_ID)
       )
       selected_reference_signatures <-
         all_reference_signatures[, selected_sigs]
@@ -124,33 +148,38 @@ generate_report_data_signatures_mp <-
       ## assess the relative contribution of each reference mutational signature
       tot <- as.data.frame(
         stats::setNames(reshape2::melt(colSums(fit_ref[["contribution"]])),
-                 c("tot"))) |>
-        dplyr::mutate(sample_id = as.character(rownames(.))) |>
-        magrittr::set_rownames(NULL)
+                 c("tot")))
+      tot$sample_id <- rownames(tot)
+      rownames(tot) <- NULL
 
       ## add information on aetiologies, and aggregate contributions
       ## pr. aetiology
       contributions_per_signature <-
         as.data.frame(stats::setNames(reshape2::melt(fit_ref[["contribution"]]),
-                               c("signature_id", "sample_id",
+                               c("SIGNATURE_ID", "sample_id",
                                  "contribution_raw"))) |>
-        dplyr::mutate(signature_id = as.character(.data$signature_id)) |>
+        dplyr::mutate(SIGNATURE_ID = as.character(.data$SIGNATURE_ID)) |>
         dplyr::mutate(sample_id = as.character(.data$sample_id)) |>
         dplyr::left_join(tot, by = "sample_id") |>
         dplyr::mutate(prop_signature = round(as.numeric(.data$contribution_raw) / tot,
                                              digits = 3)) |>
-        dplyr::select(.data$signature_id, .data$sample_id, .data$prop_signature) |>
+        dplyr::select(.data$SIGNATURE_ID,
+                      .data$sample_id,
+                      .data$prop_signature) |>
         dplyr::filter(.data$prop_signature > 0) |>
         dplyr::arrange(dplyr::desc(.data$prop_signature)) |>
         dplyr::left_join(
           dplyr::select(
-            pcgr_data[["mutational_signature"]],
-            .data$signature_id,
-            .data$aetiology,
-            .data$comments,
-            .data$aetiology_keyword),
-          by = c("signature_id")) |>
-        dplyr::rename(group = .data$aetiology_keyword) |>
+            ref_data$misc$mutational_signature,
+            .data$SIGNATURE_ID,
+            .data$AETIOLOGY,
+            .data$COMMENTS,
+            .data$AETIOLOGY_KEYWORD),
+          by = c("SIGNATURE_ID")) |>
+        dplyr::rename(group = .data$AETIOLOGY_KEYWORD,
+                      signature_id = .data$SIGNATURE_ID,
+                      aetiology = .data$AETIOLOGY,
+                      comments = .data$COMMENTS) |>
         dplyr::mutate(
           contribution =
             paste0(round(.data$prop_signature * 100, digits = 2), "%")) |>
@@ -160,7 +189,7 @@ generate_report_data_signatures_mp <-
         contributions_per_signature |>
           dplyr::group_by(.data$group) |>
           dplyr::summarise(prop_group = sum(.data$prop_signature),
-                         signature_id_group = paste(.data$signature_id, collapse=", "),
+                         signature_id_group = paste(.data$SIGNATURE_ID, collapse=", "),
                          .groups = "drop")
 
       )
@@ -187,7 +216,7 @@ generate_report_data_signatures_mp <-
       ## choose only signatures attributed to 25 different aetiologies
       missing_aetiologies <- contributions_per_signature |>
         dplyr::filter(is.na(.data$col))
-      if(nrow(missing_aetiologies) > 0){
+      if (nrow(missing_aetiologies) > 0) {
         log4r_warn(paste0("Found contributions from more than 25 aetiologies - ",
                           "showing signatures from 25 different aetiologies only"))
         contributions_per_signature <- contributions_per_signature |>
@@ -201,31 +230,33 @@ generate_report_data_signatures_mp <-
       contributions <- list()
       contributions[["per_group"]] <-  contributions_per_group
       contributions[["per_signature"]] <-  contributions_per_signature
+      tsv_data <- data.frame()
 
       ## Get output for tab-separated file
       ## - contribution per signature id and reference signatures used
-      if(!is.null(prevalent_site_signatures$aetiology) &
-         NROW(contributions[["per_signature"]]) > 0){
-        if("signature_id" %in% colnames(prevalent_site_signatures$aetiology)){
-          reference_sigs <- paste(sort(prevalent_site_signatures$aetiology$signature_id),
+      if (!is.null(prevalent_site_signatures$aetiology) &
+         NROW(contributions[["per_signature"]]) > 0) {
+        if ("SIGNATURE_ID" %in% colnames(prevalent_site_signatures$aetiology)) {
+          reference_sigs <- paste(sort(prevalent_site_signatures$aetiology$SIGNATURE_ID),
                                   collapse=",")
           tsv_data <- contributions[["per_signature"]] |>
             pcgrr::remove_cols_from_df(
-              cnames = c("contribution","col","aetiology","comments")) |>
+              cnames = c("contribution","col","AETIOLOGY","COMMENTS")) |>
             dplyr::mutate(
-              all_reference_signatures = !type_specific,
-              tumor_type = pcgr_config[["t_props"]][["tumor_type"]],
-              reference_collection = "COSMIC_v32",
+              all_reference_signatures = !fit_signatures_to_ttype,
+              tumor_type = settings$conf$sample_properties$site,
+              reference_collection = "COSMIC_v34",
               reference_signatures = reference_sigs,
               fitting_accuracy =
-                round(sim_original_reconstructed$cosine_sim * 100, digits = 1))
+                round(sim_original_reconstructed$cosine_sim * 100, digits = 1)) |>
+            dplyr::rename(signature_id = SIGNATURE_ID)
         }
       }
 
-      vr <- vcfs[[sample_name]]
+      vr <- vcfs[[settings$sample_id]]
       GenomeInfoDb::seqlengths(vr) <-
-        GenomeInfoDb::seqlengths(pcgr_data[["assembly"]][["bsg"]])[GenomeInfoDb::seqlevels(pcgr_data[["assembly"]][["bsg"]]) %in% unique(GenomeInfoDb::seqlevels(vr))]
-      chromosomes <- utils::head(GenomeInfoDb::seqnames(pcgr_data[["assembly"]][["bsg"]]), 24)
+        GenomeInfoDb::seqlengths(ref_data$assembly$bsg)[GenomeInfoDb::seqlevels(ref_data$assembly$bsg) %in% unique(GenomeInfoDb::seqlevels(vr))]
+      chromosomes <- utils::head(GenomeInfoDb::seqnames(ref_data$assembly$bsg), 24)
 
       pcg_report_signatures[["result"]][["vr"]] <- vr
       pcg_report_signatures[["result"]][["mut_mat"]] <- mut_mat
@@ -252,21 +283,23 @@ generate_report_data_signatures_mp <-
                  nrow(pcg_report_signatures[["variant_set"]][["all"]]),
                  ") for reconstruction of mutational signatures by ",
                  "MutationalPatterns, limit set to ",
-                 pcgr_config[["msigs"]][["mutation_limit"]]))
+                 settings$conf$somatic_snv$mutational_signatures$mutation_limit))
       }
     }
   }
 
+  system(glue::glue("rm -f {vcf_name_mutsig_analysis}*"))
+
   return(pcg_report_signatures)
 }
 
 
 #' Function that retrieves prevalent signatures for a given tumor type/primary site
-#' Data is collected from COSMIC v3.2.
+#' Data is collected from COSMIC v3.4.
 #'
 #' @param site Primary tumor site
 #' @param custom_collection Custom collection of signatures from COSMIC
-#' @param pcgr_data PCGR data object
+#' @param ref_data PCGR reference data object
 #' @param min_prevalence_pct Minimum prevalence (pct) of signature in
 #' cohorts associated with primary site -
 #' used to select reference signatures for inclusion in signature reconstruction
@@ -277,15 +310,25 @@ generate_report_data_signatures_mp <-
 get_prevalent_site_signatures <-
   function(site = "Any",
            custom_collection = NULL,
-           pcgr_data = NULL,
+           ref_data = NULL,
            min_prevalence_pct = 5,
            incl_poss_artifacts = T) {
 
-    if(is.null(custom_collection)){
+    cosmic_metadata <-
+      ref_data$metadata |>
+      dplyr::filter(source_abbreviation == "cosmic_mutsigs") |>
+      dplyr::select(source_version) |>
+      dplyr::mutate(
+        source_version = stringr::str_replace_all(
+          source_version, "[\r\n]" , ""))
+
+    if (is.null(custom_collection)) {
       pcgrr::log4r_info(paste0(
         "Retrieving prevalent (prevalence >= ",
         min_prevalence_pct, " percent) reference signatures for ",
-        site, ", using COSMIC v3.2 collection"))
+        site, ", using COSMIC ",
+        cosmic_metadata$source_version,
+        " collection"))
     }
     pcgrr::log4r_info(paste0(
       "Inclusion of mutational signature artefacts (e.g. sequencing artefacts): ",
@@ -293,136 +336,146 @@ get_prevalent_site_signatures <-
 
     invisible(
       assertthat::assert_that(
-        !is.null(pcgr_data[["mutational_signature"]]),
+        !is.null(ref_data$misc$mutational_signature),
         msg =
-          "Cannot load ref. aetiologies (COSMIC v3.2) of mutational signatures"))
+          paste0(
+            "Cannot load ref. aetiologies (COSMIC ",
+            cosmic_metadata$source_version,
+            ") of mutational signatures")))
     invisible(
       assertthat::assert_that(
-        is.data.frame(pcgr_data[["mutational_signature"]]),
+        is.data.frame(ref_data$misc$mutational_signature),
         msg = "Reference aetiologies must be of type data.frame()"))
     invisible(
       assertthat::assert_that(
         min_prevalence_pct == 1 |
-          min_prevalence_pct == 2 | min_prevalence_pct == 5 |
-          min_prevalence_pct == 10 | min_prevalence_pct == 15 |
+          min_prevalence_pct == 2 |
+          min_prevalence_pct == 5 |
+          min_prevalence_pct == 10 |
+          min_prevalence_pct == 15 |
           min_prevalence_pct == 20,
         msg = "Argument 'min_prevalence_pct' must be any of '0, 2, 5, 10, 15 or 20'"))
 
     valid_signature_ids <-
-      unique(pcgr_data[["mutational_signature"]]$signature_id)
+      unique(ref_data$misc$mutational_signature$SIGNATURE_ID)
     signatures_prevalence <- data.frame()
 
-    if(!is.null(custom_collection)){
+    if (!is.null(custom_collection)) {
       invisible(
         assertthat::assert_that(
           is.character(custom_collection),
           msg = "Argument 'custom_collection' must be a character vector"))
 
       pcgrr::log4r_info(paste0(
-        "Retrieving reference signatures from COSMIC v3.2 collection based on user-defined collection (",
+        "Retrieving reference signatures from COSMIC ",
+        cosmic_metadata$source_version,
+        " collection based on user-defined collection (",
         paste(unique(custom_collection), collapse=", "), ")")
       )
       i <- 1
-      while(i <= length(custom_collection)){
-        if(!(custom_collection[i] %in% valid_signature_ids)){
+      while(i <= length(custom_collection)) {
+        if (!(custom_collection[i] %in% valid_signature_ids)) {
           log4r_warn(paste0(
             "Could not find specified custom signature id  '",
-            custom_collection[i], "' in COSMIC v3.2 reference collection",
+            custom_collection[i], "' in COSMIC ",
+            cosmic_metadata$source_version,
+            " reference collection",
             " - ignoring"))
         }
         i <- i + 1
       }
 
       signatures_prevalence <-
-        pcgr_data[["mutational_signature"]] |>
-        dplyr::select(c("signature_id",
-                      "aetiology_keyword",
-                      "aetiology",
-                      "associated_signatures",
-                      "comments")) |>
-        dplyr::filter(.data$signature_id %in% custom_collection) |>
+        ref_data$misc$mutational_signature |>
+        dplyr::select(c("SIGNATURE_ID",
+                      "AETIOLOGY_KEYWORD",
+                      "AETIOLOGY",
+                      "ASSOCIATED_SIGNATURES",
+                      "COMMENTS")) |>
+        dplyr::filter(.data$SIGNATURE_ID %in% custom_collection) |>
         dplyr::distinct()
 
     }else{
 
       unique_sites_with_signature_prevalence <-
-        unique(pcgr_data[["mutational_signatures"]][["primary_site"]])
+        unique(ref_data$misc$mutational_signature[["PRIMARY_SITE"]])
       if (!(site %in% unique_sites_with_signature_prevalence)) {
         pcgrr::log4r_info(
           paste0("Primary tumor site '", site, "' ",
                  "does not have any signatures with significant ",
                  "prevalence - considering all"))
         signatures_prevalence <-
-          pcgr_data[["mutational_signature"]] |>
-          dplyr::select(.data$signature_id,
-                        .data$aetiology_keyword,
-                        .data$aetiology,
-                        .data$associated_signatures,
-                        .data$comments) |>
+          ref_data$misc$mutational_signature |>
+          dplyr::select(c("SIGNATURE_ID",
+                          "AETIOLOGY_KEYWORD",
+                          "AETIOLOGY",
+                          "ASSOCIATED_SIGNATURES",
+                          "COMMENTS")) |>
           dplyr::distinct()
       }else{
         signatures_prevalence <-
-          pcgr_data[["mutational_signature"]] |>
-          dplyr::filter(.data$primary_site == site) |>
-          dplyr::select(.data$signature_id,
-                        .data$primary_site,
-                        .data$prevalence_pct,
-                        .data$prevalence_above_5pct,
-                        .data$prevalence_above_10pct,
-                        .data$prevalence_above_15pct,
-                        .data$prevalence_above_20pct,
-                        .data$aetiology_keyword,
-                        .data$aetiology,
-                        .data$associated_signatures,
-                        .data$comments) |>
+          ref_data$misc$mutational_signature |>
+          dplyr::filter(.data$PRIMARY_SITE == site) |>
+          dplyr::select(.data$SIGNATURE_ID,
+                        .data$PRIMARY_SITE,
+                        .data$PREVALENCE_PCT,
+                        .data$PREVALENCE_ABOVE_5PCT,
+                        .data$PREVALENCE_ABOVE_10PCT,
+                        .data$PREVALENCE_ABOVE_15PCT,
+                        .data$PREVALENCE_ABOVE_20PCT,
+                        .data$AETIOLOGY_KEYWORD,
+                        .data$AETIOLOGY,
+                        .data$ASSOCIATED_SIGNATURES,
+                        .data$COMMENTS) |>
           dplyr::distinct()
 
         if (min_prevalence_pct > 0) {
           if (min_prevalence_pct == 5) {
             signatures_prevalence <- signatures_prevalence |>
-              dplyr::filter(.data$prevalence_above_5pct == T |
-                              is.na(.data$prevalence_above_5pct))
+              dplyr::filter(.data$PREVALENCE_ABOVE_5PCT == T |
+                              is.na(.data$PREVALENCE_ABOVE_5PCT))
           }else if (min_prevalence_pct == 10) {
             signatures_prevalence <- signatures_prevalence |>
-              dplyr::filter(.data$prevalence_above_10pct == T |
-                              is.na(.data$prevalence_above_10pct))
+              dplyr::filter(.data$PREVALENCE_ABOVE_10PCT == T |
+                              is.na(.data$PREVALENCE_ABOVE_10PCT))
           }
           else if (min_prevalence_pct == 15) {
             signatures_prevalence <- signatures_prevalence |>
-              dplyr::filter(.data$prevalence_above_15pct == T |
-                              is.na(.data$prevalence_above_15pct))
+              dplyr::filter(.data$PREVALENCE_ABOVE_15PCT == T |
+                              is.na(.data$PREVALENCE_ABOVE_15PCT))
           }else if (min_prevalence_pct == 20) {
             signatures_prevalence <- signatures_prevalence |>
-              dplyr::filter(.data$prevalence_above_20pct == T |
-                              is.na(.data$prevalence_above_20pct))
-          }else if (min_prevalence_pct == 2 | min_prevalence_pct == 1){
+              dplyr::filter(.data$PREVALENCE_ABOVE_20PCT == T |
+                              is.na(.data$PREVALENCE_ABOVE_20PCT))
+          }else if (min_prevalence_pct == 2 | min_prevalence_pct == 1) {
             signatures_prevalence <- signatures_prevalence |>
-              dplyr::filter(!is.na(.data$prevalence_pct)) |>
-              dplyr::filter(.data$prevalence_pct >= min_prevalence_pct)
+              dplyr::filter(!is.na(.data$PREVALENCE_PCT)) |>
+              dplyr::filter(.data$PREVALENCE_PCT >= min_prevalence_pct)
           }
         }
         signatures_prevalence <- signatures_prevalence |>
-          dplyr::select(-c(.data$primary_site,
-                           .data$prevalence_above_5pct,
-                           .data$prevalence_above_10pct,
-                           .data$prevalence_above_15pct,
-                           .data$prevalence_above_20pct)) |>
+          dplyr::select(-c(.data$PRIMARY_SITE,
+                           .data$PREVALENCE_ABOVE_5PCT,
+                           .data$PREVALENCE_ABOVE_10PCT,
+                           .data$PREVALENCE_ABOVE_15PCT,
+                           .data$PREVALENCE_ABOVE_20PCT)) |>
           dplyr::distinct() |>
-          dplyr::arrange(dplyr::desc(.data$prevalence_pct)) |>
-          dplyr::select(-.data$prevalence_pct)
+          dplyr::arrange(dplyr::desc(.data$PREVALENCE_PCT)) |>
+          dplyr::select(-.data$PREVALENCE_PCT)
       }
     }
 
-    if(incl_poss_artifacts == F){
+    if (incl_poss_artifacts == F) {
       signatures_prevalence <- signatures_prevalence |>
-        dplyr::filter(!stringr::str_detect(.data$aetiology_keyword,"artefact"))
+        dplyr::filter(!stringr::str_detect(
+          .data$AETIOLOGY_KEYWORD,"artefact"))
     }
     signatures_prevalence <- signatures_prevalence |>
       dplyr::distinct()
 
     ## Subset signature matrix - keeping only columns (signatures)
     ## to those defined by primary site/custom collection
-    sigs <- unique(signatures_prevalence$signature_id)
+    sigs <- unique(signatures_prevalence$SIGNATURE_ID)
     pcgrr::log4r_info(paste0("Limiting reference collection to signatures: ",
                               paste(sigs, collapse = ", ")))
 
@@ -443,11 +496,13 @@ get_prevalent_site_signatures <-
 #' @param build genome assembly (grch37/grch38)
 #'
 #' @export
-generate_report_data_rainfall <- function(variant_set, colors = NULL,
-                                          autosomes = FALSE, build = NULL) {
+generate_report_data_rainfall <- function(variant_set,
+                                          colors = NULL,
+                                          autosomes = FALSE,
+                                          build = NULL) {
 
-  pcg_report_rainfall <- pcgrr::init_report(class = "rainfall")
-  if(NROW(variant_set) == 0){
+  pcg_report_rainfall <- pcgrr::init_rainfall_content()
+  if (NROW(variant_set) == 0) {
     return(pcg_report_rainfall)
   }
 
diff --git a/pcgrr/R/reference_data.R b/pcgrr/R/reference_data.R
index 32013882..2889e5e8 100644
--- a/pcgrr/R/reference_data.R
+++ b/pcgrr/R/reference_data.R
@@ -7,7 +7,7 @@
 #'
 load_reference_data <- function(
     pcgr_db_assembly_dir = NULL,
-    genome_assembly = "grch38"){
+    genome_assembly = "grch38") {
 
   pcgr_ref_data <- list()
 
@@ -17,11 +17,11 @@ load_reference_data <- function(
 
   pcgr_ref_data[["assembly"]] <- list()
   pcgr_ref_data[["assembly"]][["grch_name"]] <- genome_assembly
-  pcgr_ref_data[["assembly"]][["grch_name"]] <- "hg19"
+  pcgr_ref_data[["assembly"]][["hg_name"]] <- "hg19"
   pcgr_ref_data[["assembly"]][["ref_genome"]] <- "BSgenome.Hsapiens.UCSC.hg19"
   if (genome_assembly == "grch38") {
     pcgr_ref_data[["assembly"]][["grch_name"]] <- genome_assembly
-    pcgr_ref_data[["assembly"]][["grch_name"]] <- "hg38"
+    pcgr_ref_data[["assembly"]][["hg_name"]] <- "hg38"
     pcgr_ref_data[["assembly"]][["ref_genome"]] <- "BSgenome.Hsapiens.UCSC.hg38"
   }
 
@@ -36,7 +36,7 @@ load_reference_data <- function(
 
 
   pcgr_ref_data[['vcf_infotags']] <- data.frame()
-  for(t in c('vep','other')){
+  for(t in c('vep','other')) {
     infotag_fname <- file.path(
       pcgr_db_assembly_dir,
       paste0("vcf_infotags_", t, ".tsv"))
@@ -53,32 +53,32 @@ load_reference_data <- function(
     )
   }
   for(cat in c('tcga','clinvar','gwas','gnomad_non_cancer',
-               'dbmts','dbnsfp','panel_of_normals')){
+               'dbmts','dbnsfp','panel_of_normals')) {
     vcfanno_fname <- file.path(
       pcgr_db_assembly_dir,"variant","vcf",cat,
       paste0(cat,".vcfanno.vcf_info_tags.txt"))
 
     raw_lines <- readLines(vcfanno_fname)
-    for(l in raw_lines){
-      if(startsWith(l,"##INFO")){
+    for(l in raw_lines) {
+      if (startsWith(l,"##INFO")) {
         tag <- stringr::str_replace(
           stringr::str_match(l,"ID=[A-Za-z|_]{1,}")[,1],
           "ID=","")
         number <- NA
-        if(stringr::str_detect(l, "Number=1,")){
+        if (stringr::str_detect(l, "Number=1,")) {
           number <- 1
         }
-        if(stringr::str_detect(l, "Number=0,")){
+        if (stringr::str_detect(l, "Number=0,")) {
           number <- 0
         }
         type <- "String"
-        if(stringr::str_detect(l, "Type=Integer,")){
+        if (stringr::str_detect(l, "Type=Integer,")) {
           type <- "Integer"
         }
-        if(stringr::str_detect(l, "Type=Float,")){
+        if (stringr::str_detect(l, "Type=Float,")) {
           type <- "Float"
         }
-        if(stringr::str_detect(l, "Type=Flag,")){
+        if (stringr::str_detect(l, "Type=Flag,")) {
           type <- "Flag"
         }
         description <-
@@ -89,10 +89,10 @@ load_reference_data <- function(
             "Description=\\\"|\\\">","")
 
         category <- "pcgr_cpsr"
-        if(cat == "dbmts" | cat == "gnomad_non_cancer"){
+        if (cat == "dbmts" | cat == "gnomad_non_cancer") {
           category <- "cpsr"
         }
-        if(cat == "panel_of_normals"){
+        if (cat == "panel_of_normals") {
           category <- "pcgr"
         }
         df <- data.frame(
@@ -116,6 +116,7 @@ load_reference_data <- function(
   pcgr_ref_data[["gene"]][["cpg"]] <- data.frame()
   pcgr_ref_data[['gene']][['gene_xref']] <- data.frame()
   pcgr_ref_data[['gene']][['transcript_xref']] <- data.frame()
+  pcgr_ref_data[['gene']][['otp_rank']] <- data.frame()
 
   cpg_tsv_fname <- file.path(
     pcgr_db_assembly_dir, "gene", "tsv",
@@ -170,9 +171,23 @@ load_reference_data <- function(
     ) |>
     dplyr::distinct()
 
-  colnames(pcgr_ref_data[['gene']][['transcript_xref']]) <-
-    toupper(colnames(pcgr_ref_data[['gene']][['transcript_xref']]))
+  otp_rank_tsv_fname <- file.path(
+    pcgr_db_assembly_dir, "gene", "tsv",
+    "gene_transcript_xref",
+    "otp_rank.tsv.gz"
+  )
+  check_file_exists(otp_rank_tsv_fname)
 
+  pcgr_ref_data[['gene']][['otp_rank']] <- as.data.frame(
+    readr::read_tsv(
+      otp_rank_tsv_fname, show_col_types = F,
+      na = c('.'))) |>
+    dplyr::filter(!is.na(.data$entrezgene)) |>
+    dplyr::mutate(entrezgene = as.character(.data$entrezgene)) |>
+    dplyr::distinct()
+
+  colnames(pcgr_ref_data[['gene']][['otp_rank']]) <-
+    toupper(colnames(pcgr_ref_data[['gene']][['otp_rank']]))
 
   pcgr_ref_data[['gene']][['gene_xref']] <- as.data.frame(
     readr::read_tsv(gene_xref_tsv_fname, show_col_types = F)) |>
@@ -193,7 +208,7 @@ load_reference_data <- function(
       "cancergene_evidence")
     ) |>
     dplyr::rename(
-      genename = name
+      genename = .data$name
     ) |>
     dplyr::mutate(
       entrezgene = as.character(.data$entrezgene)
@@ -273,14 +288,14 @@ load_reference_data <- function(
   ## Get variant statistics
   for(vardb in c('clinvar','gwas','tcga',
                  'gnomad_non_cancer','dbmts',
-                 'dbnsfp')){
+                 'dbnsfp')) {
     varstats_fname <-
       file.path(
         pcgr_db_assembly_dir, "variant", "vcf", vardb,
         paste0(vardb,".vcf_varstats.tsv")
       )
 
-    if(file.exists(varstats_fname)){
+    if (file.exists(varstats_fname)) {
       pcgr_ref_data[['variant']][['varstats']][[vardb]] <-
         as.data.frame(
           readr::read_tsv(
@@ -346,14 +361,14 @@ load_reference_data <- function(
                 'mutational_signature',
                 'pathway',
                 'hotspot',
-                'protein_domain')){
+                'protein_domain')) {
 
     fname_misc <- file.path(
       pcgr_db_assembly_dir, "misc", "tsv", elem,
       paste0(elem,".tsv.gz")
     )
 
-    # if(elem == 'hotspot'){
+    # if (elem == 'hotspot') {
     #   fname_misc <- file.path(
     #     pcgr_db_assembly_dir, "misc", "tsv", elem,
     #     paste0(elem,".tsv.gz")
@@ -409,9 +424,9 @@ load_reference_data <- function(
 
   ## 7. Biomarkers
   pcgr_ref_data[['biomarker']] <- list()
-  for(elem in c('clinical','variant','literature')){
+  for(elem in c('clinical','variant','literature')) {
     pcgr_ref_data[['biomarker']][[elem]] <- data.frame()
-    for(db in c('cgi','civic')){
+    for(db in c('cgi','civic')) {
       fname <-
         file.path(
           pcgr_db_assembly_dir, "biomarker", "tsv",
@@ -420,12 +435,23 @@ load_reference_data <- function(
       check_file_exists(fname)
       bm_data <- as.data.frame(
         readr::read_tsv(fname, show_col_types = F, na = "."))
-      if("source_id" %in% colnames(bm_data)){
+      if ("source_id" %in% colnames(bm_data)) {
         bm_data <- bm_data |>
           dplyr::mutate(
             source_id = as.character(.data$source_id))
       }
 
+      if ('entrezgene' %in% colnames(bm_data)) {
+        bm_data <- bm_data |>
+          dplyr::mutate(
+            entrezgene = as.character(.data$entrezgene))
+      }
+      if ('variant_id' %in% colnames(bm_data)) {
+        bm_data <- bm_data |>
+          dplyr::mutate(
+            variant_id = as.character(.data$variant_id))
+      }
+
       pcgr_ref_data[['biomarker']][[elem]] <- dplyr::bind_rows(
         pcgr_ref_data[['biomarker']][[elem]],
         bm_data
@@ -439,7 +465,7 @@ load_reference_data <- function(
   ## Metadata
   pcgr_ref_data[['metadata']] <- data.frame()
   for(dtype in c('gene','gwas','hotspot','other',
-                 'phenotype','biomarker','drug')){
+                 'phenotype','biomarker','drug')) {
 
     fname <- file.path(
       pcgr_db_assembly_dir, ".METADATA", "tsv",
@@ -451,12 +477,12 @@ load_reference_data <- function(
       dplyr::mutate(datatype = dtype) |>
       dplyr::mutate(wflow = dplyr::case_when(
         stringr::str_detect(
-          source_abbreviation,
+          .data$source_abbreviation,
           paste0(
             "^(gepa|cpg_other|maxwell2016|acmg_sf|dbmts|",
             "woods_dnarepair|gerp|tcga_pancan_2018|gwas_catalog)")) ~ "cpsr",
         stringr::str_detect(
-          source_abbreviation,
+          .data$source_abbreviation,
           "^(cytoband|mitelmandb|tcga|nci|intogen|opentargets|dgidb|pubchem)$") ~ "pcgr",
         TRUE ~ as.character("pcgr_cpsr")
       ))
@@ -465,8 +491,8 @@ load_reference_data <- function(
       pcgr_ref_data[['metadata']] |>
       dplyr::bind_rows(metadata_dtype) |>
       dplyr::filter(
-        source_abbreviation != "foundation_one" &
-          source_abbreviation != "illumina"
+        .data$source_abbreviation != "foundation_one" &
+          .data$source_abbreviation != "illumina"
       )
   }
 
diff --git a/pcgrr/R/report.R b/pcgrr/R/report.R
index f397b509..2e7a9dca 100644
--- a/pcgrr/R/report.R
+++ b/pcgrr/R/report.R
@@ -48,7 +48,7 @@ init_report <- function(yaml_fname = NULL,
         vcf_tag_AN <- "gnomADe_non_cancer_AN"
         vcf_tag_AC <- "gnomADe_non_cancer_AC"
         vcf_tag_NHOMALT <- "gnomADe_non_cancer_NHOMALT"
-        if(population != "global"){
+        if (population != "global") {
           vcf_tag_AF <-
             paste0("gnomADe_non_cancer_",toupper(population),"_AF")
           vcf_tag_AN <-
@@ -61,7 +61,7 @@ init_report <- function(yaml_fname = NULL,
         pop_desc_df <-
           report$ref_data$vcf_infotags[
             report$ref_data$vcf_infotags$tag == vcf_tag_AF,]
-        if(NROW(pop_desc_df) == 1){
+        if (NROW(pop_desc_df) == 1) {
           population_description <- pop_desc_df$description
           report[["settings"]][["conf"]][["variant_classification"]][["vcftag_gnomad_AF"]] <-
             vcf_tag_AF
@@ -98,7 +98,7 @@ init_report <- function(yaml_fname = NULL,
       report[["content"]][[a_elem]] <- list()
       report[["content"]][[a_elem]][["eval"]] <- FALSE
 
-      if(a_elem == "tumor_purity" | a_elem == "tumor_ploidy"){
+      if (a_elem == "tumor_purity" | a_elem == "tumor_ploidy") {
         report[["content"]][[a_elem]][["eval"]] <- TRUE
       }
 
@@ -203,7 +203,7 @@ update_report <- function(report, report_data,
 #'
 #' @export
 init_tmb_content <- function(tcga_tmb = NULL,
-                             config = NULL){
+                             config = NULL) {
 
   invisible(assertthat::assert_that(!is.null(tcga_tmb)))
   invisible(assertthat::assert_that(is.data.frame(tcga_tmb) &
@@ -235,7 +235,7 @@ init_tmb_content <- function(tcga_tmb = NULL,
 #
 #' @return rep updated PCGR report structure - initialized for CNA content
 #' @export
-init_cna_content <- function(rep = NULL){
+init_cna_content <- function(rep = NULL) {
 
   invisible(assertthat::assert_that(!is.null(rep)))
   invisible(assertthat::assert_that(!is.null(rep[['disp']])))
@@ -271,7 +271,7 @@ init_cna_content <- function(rep = NULL){
 #
 #' @return rep updated PCGR report structure - initialized for SNV/InDel content
 #' @export
-init_snv_indel_content <- function(rep = NULL){
+init_snv_indel_content <- function(rep = NULL) {
 
   invisible(assertthat::assert_that(!is.null(rep)))
   invisible(assertthat::assert_that(!is.null(rep[['disp']])))
@@ -306,7 +306,7 @@ init_snv_indel_content <- function(rep = NULL){
 #'
 #' @return rep Report structure initialized for signature data
 #' @export
-init_m_signature_content <- function(){
+init_m_signature_content <- function() {
 
   rep <- list()
   rep[["eval"]] <- FALSE
@@ -332,14 +332,38 @@ init_m_signature_content <- function(){
   return(rep)
 }
 
-#init_msi_content <- function(){}
-#init_kataegis_content <- function(){}
+#' Function that initiates report element with MSI classification
+#'
+#' @export
+init_msi_content <- function() {
+  rep <- list()
+
+  rep[["eval"]] <- FALSE
+  rep[["missing_data"]] <- FALSE
+  rep[["prediction"]] <- list()
+
+  return(rep)
+
+}
+
+#' Function that initiates report element with kataegis information
+#'
+#' @export
+init_kataegis_content <- function() {
+  rep <- list()
+
+  rep[["eval"]] <- FALSE
+  rep[["events"]] <- data.frame()
+
+  return(rep)
+
+}
 
 #' Function that initiates report element with rainfall information
 #'
 #' @return rep Report structure initialized for rainfall data
 #' @export
-init_rainfall_content <- function(){
+init_rainfall_content <- function() {
 
   rep <- list()
 
@@ -367,7 +391,7 @@ init_rainfall_content <- function(){
 #'
 #' @return rep Report structure initialized for tumor-only data
 #' @export
-init_tumor_only_content <- function(){
+init_tumor_only_content <- function() {
 
   rep <- list()
   rep[["eval"]] <- FALSE
@@ -399,7 +423,7 @@ init_tumor_only_content <- function(){
 #'
 #' @return rep Report structure initialized for value box data
 #' @export
-init_valuebox_content <- function(){
+init_valuebox_content <- function() {
   rep <- list()
 
   rep[["eval"]] <- FALSE
@@ -431,7 +455,7 @@ init_valuebox_content <- function(){
 #'
 #' @return rep Report structure initialized for ranked display
 #' @export
-init_report_display_content <- function(){
+init_report_display_content <- function() {
 
   rep <- list()
   rep[["eval"]] <- FALSE
@@ -451,7 +475,7 @@ init_report_display_content <- function(){
 #'
 #' @return rep Report structure initialized for variant data
 #' @export
-init_var_content <- function(){
+init_var_content <- function() {
 
   rep <- list()
 
@@ -461,7 +485,7 @@ init_var_content <- function(){
   rep[["variant_set"]] <- list()
   rep[["v_stat"]] <- list()
   rep[["zero"]] <- FALSE
-  for (tumorclass in c("any_ttype", "other_ttype", "specific_ttype")) {
+  for (tumorclass in c("any_ttype", "other_ttype", "query_ttype")) {
     rep[["clin_eitem"]][[tumorclass]] <- list()
     for (e_type in c("prognostic", "diagnostic", "predictive")) {
       for (e_level in c("A_B", "C_D_E", "any")) {
@@ -477,7 +501,7 @@ init_var_content <- function(){
 #'
 #' @return rep Report structure initialized for germline data (CPSR)
 #' @export
-init_germline_content <- function(){
+init_germline_content <- function() {
   rep <- list()
 
   rep[["max_dt_rows"]] <- 0
@@ -498,12 +522,12 @@ init_germline_content <- function(){
   rep[["clin_eitem"]] <- list()
   for (evidence_type in pcgrr::evidence_types) {
     rep[["clin_eitem"]][[evidence_type]] <- list()
-    for(level in pcgrr::evidence_levels){
+    for(level in pcgrr::evidence_levels) {
       rep[["clin_eitem"]][[evidence_type]][[level]] <-
         data.frame()
     }
     rep[['clin_eitem']][['all']] <- list()
-    for(level in pcgrr::evidence_levels){
+    for(level in pcgrr::evidence_levels) {
       rep[["clin_eitem"]][['all']][[level]] <-
         data.frame()
     }
@@ -532,7 +556,7 @@ init_germline_content <- function(){
 #' @export
 load_yaml <- function(yml_fname, report_mode = "CPSR") {
 
-  if(!file.exists(yml_fname)){
+  if (!file.exists(yml_fname)) {
     log4r_fatal(
       paste0("YAML file '",yml_fname,"' does not exist - exiting"))
   }
@@ -541,11 +565,11 @@ load_yaml <- function(yml_fname, report_mode = "CPSR") {
   for(t in c('sample_id',
              'genome_assembly',
              'workflow',
-             'output_dir')){
-    if(is.null(report_settings[[t]])){
+             'output_dir')) {
+    if (is.null(report_settings[[t]])) {
       missing_yaml_info <- T
     }else{
-      if(identical(typeof(report_settings[[t]]),"character") == F){
+      if (identical(typeof(report_settings[[t]]),"character") == F) {
         missing_yaml_info <- T
       }
     }
@@ -553,16 +577,16 @@ load_yaml <- function(yml_fname, report_mode = "CPSR") {
   for(t in c('conf',
              'molecular_data',
              'reference_data',
-             'software')){
-    if(is.null(report_settings[[t]])){
+             'software')) {
+    if (is.null(report_settings[[t]])) {
       missing_yaml_info <- T
     }else{
-      if(identical(typeof(report_settings[[t]]),"list") == F){
+      if (identical(typeof(report_settings[[t]]),"list") == F) {
         missing_yaml_info <- T
       }
     }
   }
-  if(missing_yaml_info == F){
+  if (missing_yaml_info == F) {
     log4r_info(paste0(
       "Successfully parsed YAML configuration file - reporting mode: ", report_mode))
   }else{
@@ -574,7 +598,7 @@ load_yaml <- function(yml_fname, report_mode = "CPSR") {
   ## check that it matches the report_mode
   ## return it
 
-  if(report_settings[['workflow']] != report_mode){
+  if (report_settings[['workflow']] != report_mode) {
     log4r_fatal(
       paste0("Cannot read YAML file from ",
              report_settings[['workflow']],
@@ -583,9 +607,9 @@ load_yaml <- function(yml_fname, report_mode = "CPSR") {
   }
 
   ref_data <- list()
-  if(dir.exists(
+  if (dir.exists(
     report_settings[['reference_data']][['path']]
-  )){
+  )) {
     ref_data <- load_reference_data(
       pcgr_db_assembly_dir = report_settings[['reference_data']][['path']],
       genome_assembly = report_settings[['genome_assembly']]
@@ -609,7 +633,7 @@ load_yaml <- function(yml_fname, report_mode = "CPSR") {
     if (identical(
       typeof(
         report_settings[['conf']][['sample_properties']][['phenotype']]),
-      "list")){
+      "list")) {
       report_settings[['conf']][['sample_properties']][['phenotype']] <-
         as.data.frame(
           rrapply::rrapply(
@@ -619,26 +643,26 @@ load_yaml <- function(yml_fname, report_mode = "CPSR") {
 
     for(col in c('do_id','do_name','efo_id','efo_name',
                  'icd10_code','ot_name','ot_primary_site',
-                 'primary_site','ot_code','ot_code_path')){
+                 'primary_site','ot_code','ot_code_path')) {
 
-      if(NROW(report_settings[['conf']][['sample_properties']][['phenotype']][
-        report_settings[['conf']][['sample_properties']][['phenotype']][[col]] == "NaN",]) > 0){
+      if (NROW(report_settings[['conf']][['sample_properties']][['phenotype']][
+        report_settings[['conf']][['sample_properties']][['phenotype']][[col]] == "NaN",]) > 0) {
           report_settings[['conf']][['sample_properties']][['phenotype']][
             report_settings[['conf']][['sample_properties']][['phenotype']][[col]] == "NaN",col] <-
           as.character(NA)
       }
 
-      if(NROW(report_settings[['conf']][['sample_properties']][['phenotype']][
-        is.nan(report_settings[['conf']][['sample_properties']][['phenotype']][[col]]),]) > 0){
+      if (NROW(report_settings[['conf']][['sample_properties']][['phenotype']][
+        is.nan(report_settings[['conf']][['sample_properties']][['phenotype']][[col]]),]) > 0) {
         report_settings[['conf']][['sample_properties']][['phenotype']][
           is.nan(report_settings[['conf']][['sample_properties']][['phenotype']]),col] <-
           as.character(NA)
       }
     }
 
-    for(col in c('do_cancer_slim','ot_level')){
-      if(NROW(report_settings[['conf']][['sample_properties']][['phenotype']][
-        is.nan(report_settings[['conf']][['sample_properties']][['phenotype']][[col]]),]) > 0){
+    for(col in c('do_cancer_slim','ot_level')) {
+      if (NROW(report_settings[['conf']][['sample_properties']][['phenotype']][
+        is.nan(report_settings[['conf']][['sample_properties']][['phenotype']][[col]]),]) > 0) {
         report_settings[['conf']][['sample_properties']][['phenotype']][
           is.nan(report_settings[['conf']][['sample_properties']][['phenotype']][[col]]),col] <-
           as.numeric(NA)
@@ -656,47 +680,47 @@ load_yaml <- function(yml_fname, report_mode = "CPSR") {
                'source_license',
                'source_license_url',
                'source_url',
-               'source_citation')){
+               'source_citation')) {
 
-    if(NROW(report_settings[['reference_data']][['source_metadata']][
-      report_settings[['reference_data']][['source_metadata']][[col]] == "NaN",]) > 0){
+    if (NROW(report_settings[['reference_data']][['source_metadata']][
+      report_settings[['reference_data']][['source_metadata']][[col]] == "NaN",]) > 0) {
       report_settings[['reference_data']][['source_metadata']][
         report_settings[['reference_data']][['source_metadata']][[col]] == "NaN",col] <-
         as.character(NA)
     }
   }
 
-  if(report_mode == "CPSR"){
+  if (report_mode == "CPSR") {
     report_settings[['conf']][['gene_panel']][['panel_genes']] <-
       as.data.frame(
         rrapply::rrapply(
           report_settings$conf$gene_panel$panel_genes,
           how = "bind"))
 
-    if(NROW(report_settings[['conf']][['gene_panel']][['panel_genes']]) == 1){
-      for(e in c('panel_id','panel_url','panel_version')){
+    if (NROW(report_settings[['conf']][['gene_panel']][['panel_genes']]) == 1) {
+      for(e in c('panel_id','panel_url','panel_version')) {
         report_settings[['conf']][['gene_panel']][['panel_genes']][,e] <- NA
       }
-      for(e in c('mod','moi')){
-        if(is.nan(report_settings$conf$gene_panel$panel_genes[,e])){
+      for(e in c('mod','moi')) {
+        if (is.nan(report_settings$conf$gene_panel$panel_genes[,e])) {
           report_settings[['conf']][['gene_panel']][['panel_genes']][,e] <- NA
         }
       }
     }else{
 
-      for(col in c('panel_id','panel_version')){
+      for(col in c('panel_id','panel_version')) {
 
-        if(NROW(report_settings[['conf']][['gene_panel']][['panel_genes']][
-          is.nan(report_settings[['conf']][['gene_panel']][['panel_genes']][[col]]),]) > 0){
+        if (NROW(report_settings[['conf']][['gene_panel']][['panel_genes']][
+          is.nan(report_settings[['conf']][['gene_panel']][['panel_genes']][[col]]),]) > 0) {
           report_settings[['conf']][['gene_panel']][['panel_genes']][
             is.nan(report_settings[['conf']][['gene_panel']][['panel_genes']][[col]]),col] <-
             as.numeric(NA)
         }
       }
-      for(col in c('mod','moi')){
+      for(col in c('mod','moi')) {
 
-        if(NROW(report_settings[['conf']][['gene_panel']][['panel_genes']][
-          is.nan(report_settings[['conf']][['gene_panel']][['panel_genes']][[col]]),]) > 0){
+        if (NROW(report_settings[['conf']][['gene_panel']][['panel_genes']][
+          is.nan(report_settings[['conf']][['gene_panel']][['panel_genes']][[col]]),]) > 0) {
           report_settings[['conf']][['gene_panel']][['panel_genes']][
             is.nan(report_settings[['conf']][['gene_panel']][['panel_genes']][[col]]),col] <-
             as.character(NA)
@@ -717,9 +741,9 @@ load_yaml <- function(yml_fname, report_mode = "CPSR") {
     pcgrr::color_palette[["none"]][["values"]][1]
   report_settings$conf$visual_reporting[["color_value_box"]] <-
     pcgrr::color_palette[["report_color"]][["values"]][1]
-  if(report_mode == "PCGR" &
+  if (report_mode == "PCGR" &
      !is.null(report_settings$conf$assay_properties)) {
-    if(report_settings$conf$assay_properties$vcf_tumor_only == 1) {
+    if (report_settings$conf$assay_properties$vcf_tumor_only == 1) {
       report_settings$conf$visual_reporting[["color_value_box"]] <-
         pcgrr::color_palette[["report_color"]][["values"]][2]
     }
diff --git a/pcgrr/R/utils.R b/pcgrr/R/utils.R
index 5c2ca658..31f37a26 100644
--- a/pcgrr/R/utils.R
+++ b/pcgrr/R/utils.R
@@ -466,7 +466,7 @@ append_tcga_var_link <- function(var_df,
 #'
 #' @export
 append_tfbs_annotation <-
-  function(var_df){
+  function(var_df) {
 
     if (any(grepl(paste0("^CONSEQUENCE$"), names(var_df))) &
         any(grepl(paste0("^VAR_ID$"), names(var_df))) &
@@ -485,7 +485,7 @@ append_tfbs_annotation <-
                         )) |>
         dplyr::distinct()
 
-      if(nrow(var_df_unique_slim) > 0){
+      if (nrow(var_df_unique_slim) > 0) {
         var_df_unique_slim_melted <- as.data.frame(
           var_df_unique_slim |>
           tidyr::separate_rows(.data$REGULATORY_ANNOTATION, sep=",") |>
@@ -495,7 +495,7 @@ append_tfbs_annotation <-
             ))
         )
 
-        if(nrow(var_df_unique_slim_melted) > 0){
+        if (nrow(var_df_unique_slim_melted) > 0) {
 
           pcgrr::log4r_info(paste0(
             "Found TF binding site annotations for ",
@@ -595,7 +595,7 @@ append_dbmts_var_link <-
                             sep = "\\|", convert = T) |>
             dplyr::filter(.data$ens_trans_id == .data$ENSEMBL_TRANSCRIPT_ID)
         )
-        if(nrow(var_df_unique_slim_melted) > 0){
+        if (nrow(var_df_unique_slim_melted) > 0) {
           var_df_unique_slim_melted <- var_df_unique_slim_melted |>
             dplyr::select(-c(.data$ENSEMBL_TRANSCRIPT_ID, .data$algorithms_call)) |>
             dplyr::mutate(miRNA_TARGET_HIT = dplyr::case_when(
@@ -707,7 +707,7 @@ append_dbnsfp_var_link <- function(var_df) {
 #' @export
 append_drug_var_link <- function(
     vcf_data_df,
-    ref_data = NULL){
+    ref_data = NULL) {
 
   pcgrr::log4r_info("Adding annotation links - targeted cancer drugs")
 
@@ -719,27 +719,38 @@ append_drug_var_link <- function(
       dplyr::filter(!is.na(.data$SYMBOL)) |>
       dplyr::distinct()
     if (nrow(var_drug_df) > 0) {
+
       cancer_drugs <-
-        dplyr::select(ref_data[["drug"]],
-                      c("SYMBOL","DRUG_NAME",
+        dplyr::select(ref_data[["drug"]][['targeted']],
+                      c("SYMBOL",
+                        "ATC_TREATMENT_CATEGORY",
+                        "ATC_LEVEL3",
+                        "DRUG_NAME",
                         "DRUG_MAX_PHASE_INDICATION",
                         "DRUG_ACTION_TYPE",
                         "DRUG_LINK")) |>
+        dplyr::filter(
+          .data$ATC_TREATMENT_CATEGORY != "cancer_unclassified") |>
         dplyr::distinct() |>
         dplyr::mutate(
           DRUG_ACTION_TYPE = stringr::str_to_title(
             .data$DRUG_ACTION_TYPE
         )) |>
+        dplyr::filter(
+          .data$DRUG_ACTION_TYPE != "Other") |>
+        dplyr::filter(
+          .data$DRUG_MAX_PHASE_INDICATION > 1) |>
         dplyr::arrange(
           .data$SYMBOL,
           dplyr::desc(.data$DRUG_MAX_PHASE_INDICATION)) |>
         dplyr::select(-c("DRUG_MAX_PHASE_INDICATION")) |>
         dplyr::distinct() |>
         dplyr::group_by(
-          .data$SYMBOL,
-          .data$DRUG_ACTION_TYPE
+          .data$SYMBOL
         ) |>
         dplyr::summarise(
+          DRUG_ACTION_TYPE = paste(
+            unique(.data$DRUG_ACTION_TYPE), collapse=", "),
           TARGETED_CANCER_DRUGS = paste(
             .data$DRUG_LINK, collapse=", "),
           TARGETED_CANCER_DRUGS2 = paste(
@@ -749,13 +760,17 @@ append_drug_var_link <- function(
       var_drug_df <- var_drug_df |>
         dplyr::left_join(
           cancer_drugs,
-          by = c("SYMBOL" = "SYMBOL")) |>
+          by = c("SYMBOL")) |>
         dplyr::filter(!is.na(.data$TARGETED_CANCER_DRUGS2)) |>
+        dplyr::select(-c("DRUG_ACTION_TYPE")) |>
         dplyr::distinct()
       if (NROW(var_drug_df) > 0) {
-        vcf_data_df <- dplyr::left_join(
-          vcf_data_df, var_drug_df,
-          by = c("VAR_ID" = "VAR_ID"))
+        vcf_data_df <-
+          dplyr::left_join(
+            vcf_data_df,
+            var_drug_df,
+            by = c("VAR_ID","SYMBOL")
+          )
       }else{
         vcf_data_df$TARGETED_CANCER_DRUGS <- NA
         vcf_data_df$TARGETED_CANCER_DRUGS2 <- NA
@@ -896,18 +911,33 @@ append_otargets_pheno_link <- function(var_df,
 #'
 #' @param vcf_data_df Data frame of sample variants from VCF
 #' @param ref_data PCGR reference data bundle object
+#' @param site Primary tumor site
+#' @param pos_var variable reflecting chromosome order (POS/SEGMENT_START)
 #' @return vcf_data_df
 #'
 #' @export
 append_cancer_gene_evidence <-
   function(vcf_data_df = NULL,
-           ref_data = NULL){
+           ref_data = NULL,
+           site = 'Any',
+           pos_var = 'POS') {
 
     if (any(grepl(paste0("^ENTREZGENE$"), names(vcf_data_df))) &
-        any(grepl(paste0("^ENSEMBL_GENE_ID$"), names(vcf_data_df)))){
+        any(grepl(paste0("^ENSEMBL_GENE_ID$"), names(vcf_data_df)))) {
 
       pcgrr::log4r_info(paste0("Adding literature evidence for cancer-relevant genes"))
 
+
+      tissue_gene_ranks <- ref_data[['gene']][['otp_rank']] |>
+        dplyr::select(c("ENTREZGENE", "PRIMARY_SITE", "TISSUE_ASSOC_RANK")) |>
+        dplyr::filter(.data$PRIMARY_SITE == site) |>
+        dplyr::distinct()
+
+      global_gene_ranks <- ref_data[['gene']][['otp_rank']] |>
+        dplyr::select(c("ENTREZGENE", "GLOBAL_ASSOC_RANK")) |>
+        dplyr::distinct()
+
+
       vcf_data_df_1 <- vcf_data_df |>
         dplyr::filter(!is.na(.data$ENTREZGENE))
       vcf_data_df_2 <- vcf_data_df |>
@@ -919,7 +949,7 @@ append_cancer_gene_evidence <-
           is.na(.data$ENTREZGENE) &
             is.na(.data$ENSEMBL_GENE_ID))
 
-      if(NROW(vcf_data_df_1) > 0){
+      if (NROW(vcf_data_df_1) > 0) {
 
         vcf_data_df_1 <- vcf_data_df_1 |>
           dplyr::left_join(
@@ -931,20 +961,48 @@ append_cancer_gene_evidence <-
                   "CANCERGENE_EVIDENCE")),
               !is.na(.data$ENTREZGENE)),
             by = c("ENTREZGENE" = "ENTREZGENE",
-                   "ENSEMBL_GENE_ID" = "ENSEMBL_GENE_ID"))
+                   "ENSEMBL_GENE_ID" = "ENSEMBL_GENE_ID")) |>
+          dplyr::distinct()
+
+        ## Add gene ranks (Open Targets Platform)
+        ## - according to primary tumor types/sites
+        ## - globally (across all tumor types/sites)
+        vcf_data_df_1 <- vcf_data_df_1 |>
+          dplyr::left_join(
+            global_gene_ranks, by = "ENTREZGENE") |>
+          dplyr::mutate(GLOBAL_ASSOC_RANK = dplyr::if_else(
+            is.na(.data$GLOBAL_ASSOC_RANK),
+            as.numeric(0),
+            as.numeric(.data$GLOBAL_ASSOC_RANK)
+          ))
+        if (NROW(tissue_gene_ranks) > 0) {
+          tissue_gene_ranks$PRIMARY_SITE <- NULL
+          vcf_data_df_1 <- vcf_data_df_1 |>
+            dplyr::left_join(
+              tissue_gene_ranks, by = "ENTREZGENE") |>
+            dplyr::mutate(TISSUE_ASSOC_RANK = dplyr::if_else(
+              is.na(.data$TISSUE_ASSOC_RANK),
+              as.numeric(0),
+              as.numeric(.data$TISSUE_ASSOC_RANK)
+            ))
+        }else{
+          vcf_data_df_1 <- vcf_data_df_1 |>
+            dplyr::mutate(TISSUE_ASSOC_RANK = as.numeric(0))
+        }
 
       }
 
-      if(NROW(vcf_data_df_2) > 0){
+      if (NROW(vcf_data_df_2) > 0) {
 
         vcf_data_df_2 <- vcf_data_df_2 |>
           dplyr::left_join(
             dplyr::filter(
               dplyr::select(
                 ref_data[["gene"]][["gene_xref"]],
-                c("ENTREZGENE", "ENSEMBL_GENE_ID","CANCERGENE_EVIDENCE")),
-              !is.na(.data$ENTREZGENE)),
-            by = c("ENSEMBL_GENE_ID" = "ENSEMBL_GENE_ID"))
+                c("ENSEMBL_GENE_ID","CANCERGENE_EVIDENCE")),
+              !is.na(.data$ENSEMBL_GENE_ID)),
+            by = c("ENSEMBL_GENE_ID")) |>
+          dplyr::distinct()
 
       }
 
@@ -953,7 +1011,12 @@ append_cancer_gene_evidence <-
         vcf_data_df_2,
         vcf_data_df_3) |>
         dplyr::distinct() |>
-        pcgrr::order_variants()
+        dplyr::mutate(CANCERGENE_EVIDENCE = dplyr::if_else(
+          .data$CANCERGENE_EVIDENCE == ".",
+          as.character(NA),
+          as.character(.data$CANCERGENE_EVIDENCE)
+        )) |>
+        pcgrr::order_variants(pos_var = pos_var)
 
     }
 
@@ -971,7 +1034,7 @@ append_cancer_gene_evidence <-
 #' @export
 append_gwas_citation_phenotype <-
   function(vcf_data_df = NULL,
-           ref_data = NULL){
+           ref_data = NULL) {
 
 
     invisible(assertthat::assert_that(
@@ -1435,11 +1498,11 @@ get_calls <- function(tsv_gz_file,
 
 
   ## convert all columns with only NA values to character type
-  if(NROW(vcf_data_df) > 0){
+  if (NROW(vcf_data_df) > 0) {
     num_rows <- NROW(vcf_data_df)
-    for (n in colnames(vcf_data_df)){
-      if(length(vcf_data_df[is.na(vcf_data_df[,n]),n]) == num_rows){
-        if(typeof(vcf_data_df[,n]) == "logical"){
+    for (n in colnames(vcf_data_df)) {
+      if (length(vcf_data_df[is.na(vcf_data_df[,n]),n]) == num_rows) {
+        if (typeof(vcf_data_df[,n]) == "logical") {
           vcf_data_df[,n] <- as.character(vcf_data_df[,n])
         }
       }
@@ -1450,9 +1513,9 @@ get_calls <- function(tsv_gz_file,
   af_pop_columns_numeric <-
     colnames(vcf_data_df)[stringr::str_detect(colnames(vcf_data_df), "_AF_[0-9A-Z]{1,}$")]
 
-  if(NROW(vcf_data_df) > 0){
-    for (col in af_pop_columns_numeric){
-      if(typeof(vcf_data_df[, col]) != "double"){
+  if (NROW(vcf_data_df) > 0) {
+    for (col in af_pop_columns_numeric) {
+      if (typeof(vcf_data_df[, col]) != "double") {
         vcf_data_df[, col] <- as.numeric(vcf_data_df[, col])
       }
     }
@@ -1463,9 +1526,9 @@ get_calls <- function(tsv_gz_file,
       colnames(vcf_data_df),
       "NON_CANCER_(AC|AN|NHOMALT)")]
 
-  if(NROW(vcf_data_df) > 0){
-    for (col in af_pop_columns_integer){
-      if(typeof(vcf_data_df[, col]) != "integer"){
+  if (NROW(vcf_data_df) > 0) {
+    for (col in af_pop_columns_integer) {
+      if (typeof(vcf_data_df[, col]) != "integer") {
         vcf_data_df[, col] <- as.integer(vcf_data_df[, col])
       }
     }
@@ -1488,7 +1551,8 @@ get_calls <- function(tsv_gz_file,
 #'
 #'
 #' @export
-write_processed_vcf <- function(calls, sample_name = NULL,
+write_processed_vcf <- function(calls,
+                                sample_name = NULL,
                                 output_directory = NULL,
                                 vcf_fname = NULL) {
 
@@ -1512,7 +1576,9 @@ write_processed_vcf <- function(calls, sample_name = NULL,
   sample_vcf_content_fname <-
     file.path(output_directory,
               paste0(sample_name, ".",
-                     sample(100000, 1), ".vcf_content.tsv"))
+                     stringi::stri_rand_strings(
+                        1, 15, pattern = "[A-Za-z0-9]"),
+                     ".vcf_content.tsv"))
   write(header_lines, file = vcf_fname, sep = "\n")
 
   sample_vcf <- vcf_df[, c("CHROM", "POS", "ID", "REF",
@@ -1579,7 +1645,7 @@ detect_vcf_sample_name <- function(df, sample_name = NULL, cpsr = FALSE) {
 #' @export
 targeted_drugs_pr_ttype <- function(ttype,
                                     pcgr_data,
-                                    ignore_on_label_early_phase = T){
+                                    ignore_on_label_early_phase = T) {
 
   pcgrr::log4r_info(
     paste0("Retrieving targeted drugs (on-label and off-label) for ",
@@ -1604,7 +1670,7 @@ targeted_drugs_pr_ttype <- function(ttype,
     site_candidates[["off_label"]]
 
   ## If tumor type not specified, off-label indications make no sense
-  if(ttype == "Any"){
+  if (ttype == "Any") {
     drug_candidates[["off_label"]] <- data.frame()
   }
 
@@ -1634,7 +1700,7 @@ targeted_drugs_pr_ttype <- function(ttype,
   }
 
   all_candidates <- drug_candidates[["on_label_early_phase"]]
-  if(NROW(drug_candidates[['off_label']]) > 0){
+  if (NROW(drug_candidates[['off_label']]) > 0) {
     all_candidates <- dplyr::full_join(drug_candidates[["off_label"]],
                                        drug_candidates[["on_label_early_phase"]],
                                        by = "symbol")
@@ -1677,7 +1743,7 @@ targeted_drugs_pr_ttype <- function(ttype,
 targeted_drugs_summarise <- function(
   candidate_drugs = NULL,
   link_label = "DRUGS_ON_LABEL",
-  indication_label = "DRUGS_ON_LABEL_INDICATIONS"){
+  indication_label = "DRUGS_ON_LABEL_INDICATIONS") {
 
   invisible(assertthat::assert_that(!is.null(candidate_drugs)))
   invisible(assertthat::assert_that(is.data.frame(candidate_drugs)))
@@ -1814,10 +1880,10 @@ pkg_exists <- function(p) {
 #' @param fname Name of file to check
 #'
 #' @export
-check_file_exists <- function(fname){
+check_file_exists <- function(fname) {
 
-  if(file.exists(fname)){
-    if(file.size(fname) == 0){
+  if (file.exists(fname)) {
+    if (file.size(fname) == 0) {
       log4r_fatal(
         paste0("File ", fname, " has zero size - exiting")
       )
diff --git a/pcgrr/R/validate.R b/pcgrr/R/validate.R
deleted file mode 100644
index f6dedc1c..00000000
--- a/pcgrr/R/validate.R
+++ /dev/null
@@ -1,10 +0,0 @@
-#'
-#'
-#'
-validate_settings <- function(settings = NULL, type = "PCGR") {
-
-}
-
-validate_ref_data <- function(ref_data = NULL) {
-
-}
diff --git a/pcgrr/R/value_boxes.R b/pcgrr/R/value_boxes.R
index 44d259b2..68a28a6a 100644
--- a/pcgrr/R/value_boxes.R
+++ b/pcgrr/R/value_boxes.R
@@ -92,10 +92,10 @@ generate_report_data_value_box <- function(pcg_report,
     }
   }
 
-  if (rep_cont[['kataegis']][["eval"]]){
+  if (rep_cont[['kataegis']][["eval"]]) {
     pcg_report_value_box[["kataegis"]] <- "None"
       num_events <- NROW(rep_cont$kataegis$events)
-      if(num_events > 0){
+      if (num_events > 0) {
         num_events <- NROW(rep_cont$kataegis$events |>
                              dplyr::filter(.data$confidence == 3))
         # pcg_report_value_box[["kataegis"]] <-
diff --git a/pcgrr/data-raw/data-raw.R b/pcgrr/data-raw/data-raw.R
index 0dfc1a56..3683bf30 100755
--- a/pcgrr/data-raw/data-raw.R
+++ b/pcgrr/data-raw/data-raw.R
@@ -82,6 +82,7 @@ data_coltype_defs[['cna_somatic_raw']] <- readr::cols_only(
   SEGMENT_START = readr::col_double(),
   SEGMENT_END = readr::col_double(),
   VAR_ID = readr::col_character(),
+  VARIANT_CLASS = readr::col_character(),
   N_MAJOR = readr::col_integer(),
   N_MINOR = readr::col_integer(),
   CHROMOSOME_ARM = readr::col_character(),
@@ -178,6 +179,7 @@ data_coltype_defs[['snv_indel_somatic_raw']] <- readr::cols_only(
   CLINVAR_CONFLICTED = readr::col_logical(),
   CLINVAR_REVIEW_STATUS_STARS = readr::col_integer(),
   CLINVAR_NUM_SUBMITTERS = readr::col_integer(),
+  CLINVAR_VARIANT_ORIGIN = readr::col_character(),
   PANEL_OF_NORMALS = readr::col_logical(),
   DBSNPRSID = readr::col_character(),
   COSMIC_MUTATION_ID = readr::col_character(),
@@ -358,6 +360,200 @@ data_coltype_defs[['snv_indel_germline_raw']] <- readr::cols_only(
 
 usethis::use_data(data_coltype_defs, overwrite = T)
 
+tsv_cols <-
+  c('CHROM',
+    'POS',
+    'REF',
+    'ALT',
+    'GENOMIC_CHANGE',
+    'GENOME_VERSION',
+    'SAMPLE_ID',
+    'VARIANT_CLASS',
+    'SYMBOL',
+    'PROTEIN_CHANGE',
+    'CONSEQUENCE',
+    'LOSS_OF_FUNCTION',
+    'GENENAME',
+    'PROTEIN_DOMAIN',
+    'CDS_CHANGE',
+    'CODING_STATUS',
+    'EXONIC_STATUS',
+    'MUTATION_HOTSPOT',
+    'MUTATION_HOTSPOT_CANCERTYPE',
+    'HGVSc',
+    'HGVSp',
+    'ENTREZGENE',
+    'CANONICAL',
+    'CCDS',
+    'UNIPROT_ACC',
+    'ENSEMBL_TRANSCRIPT_ID',
+    'ENSEMBL_PROTEIN_ID',
+    'REFSEQ_TRANSCRIPT_ID',
+    'REFSEQ_PROTEIN_ID',
+    'TRANSCRIPT_MANE_SELECT',
+    'ONCOGENE',
+    'TUMOR_SUPPRESSOR',
+    'PREDICTED_EFFECT',
+    'REGULATORY_ANNOTATION',
+    'ONCOGENICITY',
+    'ONCOGENICITY_CLASSIFICATION_CODE',
+    'ONCOGENICITY_SCORE',
+    'VEP_ALL_CSQ',
+    'WINMASKER_HIT',
+    'SIMPLEREPEATS_HIT',
+    'gnomADe_AF',
+    'DBSNPRSID',
+    'COSMIC_MUTATION_ID',
+    'TCGA_FREQUENCY',
+    'TCGA_PANCANCER_COUNT',
+    'CLINVAR',
+    'CLINVAR_CLNSIG',
+    'BIOMARKER_MATCH',
+    'TARGETED_CANCER_DRUGS2',
+    'CALL_CONFIDENCE',
+    'DP_TUMOR',
+    'AF_TUMOR',
+    'DP_CONTROL',
+    'AF_CONTROL',
+    'TIER',
+    'TIER_DESCRIPTION',
+    'GENOMIC_CHANGE',
+    'GENOME_VERSION')
+
+display_cols <- list()
+display_cols[['tier1_2']] <-
+  c('SYMBOL',
+  'PROTEIN_CHANGE',
+  'CONSEQUENCE',
+  'CANCER_TYPE',
+  'EVIDENCE_LEVEL',
+  'CLINICAL_SIGNIFICANCE',
+  'EVIDENCE_TYPE',
+  'THERAPEUTIC_CONTEXT',
+  'EVIDENCE_DIRECTION',
+  'VARIANT_ORIGIN',
+  'DISEASE_ONTOLOGY_ID',
+  'DESCRIPTION',
+  'BIOMARKER_MATCH',
+  'BIOMARKER_SOURCE_DB',
+  'EVIDENCE_ID',
+  'CITATION',
+  'RATING',
+  'GENENAME',
+  'PROTEIN_DOMAIN',
+  'CDS_CHANGE',
+  'MUTATION_HOTSPOT',
+  'MUTATION_HOTSPOT_CANCERTYPE',
+  'TCGA_FREQUENCY',
+  'HGVSc',
+  'HGVSp',
+  'ENSEMBL_TRANSCRIPT_ID',
+  'ENSEMBL_PROTEIN_ID',
+  'REFSEQ_TRANSCRIPT_ID',
+  'PREDICTED_EFFECT',
+  'ONCOGENICITY',
+  'ONCOGENICITY_CLASSIFICATION_CODE',
+  'ONCOGENICITY_SCORE',
+  'VEP_ALL_CSQ',
+  'DBSNP',
+  'COSMIC',
+  'CLINVAR',
+  'TARGETED_CANCER_DRUGS',
+  'CALL_CONFIDENCE',
+  'DP_TUMOR',
+  'AF_TUMOR',
+  'DP_CONTROL',
+  'AF_CONTROL',
+  'GENOMIC_CHANGE',
+  'GENOME_VERSION')
+
+display_cols[['tier3']] <-
+  c('SYMBOL',
+    'PROTEIN_CHANGE',
+    'GENENAME',
+    'CONSEQUENCE',
+    'ONCOGENICITY',
+    'PROTEIN_DOMAIN',
+    'MUTATION_HOTSPOT',
+    'COSMIC',
+    'CDS_CHANGE',
+    'HGVSc',
+    'HGVSp',
+    'MUTATION_HOTSPOT_CANCERTYPE',
+    'TCGA_FREQUENCY',
+    'ENSEMBL_TRANSCRIPT_ID',
+    'ENSEMBL_PROTEIN_ID',
+    'REFSEQ_TRANSCRIPT_ID',
+    'PREDICTED_EFFECT',
+    'ONCOGENICITY_CLASSIFICATION_CODE',
+    'ONCOGENICITY_SCORE',
+    'VEP_ALL_CSQ',
+    'DBSNP',
+    'CLINVAR',
+    'TARGETED_CANCER_DRUGS',
+    'ONCOGENE',
+    'TUMOR_SUPPRESSOR',
+    'CANCERGENE_EVIDENCE',
+    'CALL_CONFIDENCE',
+    'DP_TUMOR',
+    'AF_TUMOR',
+    'DP_CONTROL',
+    'AF_CONTROL',
+    'GENOMIC_CHANGE',
+    'GENOME_VERSION')
+
+display_cols[['tier4']] <-
+  c('SYMBOL',
+    'PROTEIN_CHANGE',
+    'GENENAME',
+    'CONSEQUENCE',
+    'ONCOGENICITY',
+    'PROTEIN_DOMAIN',
+    'COSMIC',
+    'CDS_CHANGE',
+    'TCGA_FREQUENCY',
+    'HGVSc',
+    'HGVSp',
+    'ENSEMBL_TRANSCRIPT_ID',
+    'ENSEMBL_PROTEIN_ID',
+    'REFSEQ_TRANSCRIPT_ID',
+    'PREDICTED_EFFECT',
+    'REGULATORY_ANNOTATION',
+    'ONCOGENICITY_CLASSIFICATION_CODE',
+    'ONCOGENICITY_SCORE',
+    'VEP_ALL_CSQ',
+    'DBSNP',
+    'CLINVAR',
+    'TARGETED_CANCER_DRUGS',
+    'CALL_CONFIDENCE',
+    'DP_TUMOR',
+    'AF_TUMOR',
+    'DP_CONTROL',
+    'AF_CONTROL',
+    'GENOMIC_CHANGE',
+    'GENOME_VERSION')
+
+display_cols[['tier5']] <-
+  c('SYMBOL',
+    'GENENAME',
+    'CONSEQUENCE',
+    'COSMIC',
+    'DBSNP',
+    'CLINVAR',
+    'TCGA_FREQUENCY',
+    'ENSEMBL_TRANSCRIPT_ID',
+    'ENSEMBL_PROTEIN_ID',
+    'REFSEQ_TRANSCRIPT_ID',
+    'REGULATORY_ANNOTATION',
+    'VEP_ALL_CSQ',
+    'CALL_CONFIDENCE',
+    'DP_TUMOR',
+    'AF_TUMOR',
+    'DP_CONTROL',
+    'AF_CONTROL',
+    'GENOMIC_CHANGE',
+    'GENOME_VERSION')
+
 #---- variant_db_url ----#
 variant_db_url <-
   data.frame(
diff --git a/pcgrr/data/cancer_phenotypes_regex.rda b/pcgrr/data/cancer_phenotypes_regex.rda
index d6b05ba290992d4886bbaf1dee218a850d11eb7e..381c553036f30093d8addab85afe820ff0eb37cc 100644
GIT binary patch
delta 525
zcmV+o0`mQv1e^p8LRx4!F+o`-Q(4Q`YUz;<8-M3ZUAEl~OeFM;G@hC%>7q1fH1eLN
zo{)*<GH7WUqttqU001-*Gent9l=PmE00006r=WsmHld0c8UO|Xvp;BB&N6eDpa|;*
z;7o2rFzh%O<-fk2b?dE(%U@3!bcB2D<4zrhlz!M&ul;)n=>%B}YclLXR!H|L49!Rq
zIDZv5@-RZ6ldI?M>?-0pF~O1SRJz5>OKVi1nxZviBDudBV(TikRdE~hKkn&JH7+nq
zh4<y0Sfgb%!WddNgz1aq^QGsFb5)7a#m*)&3s49}D4c=R3SS07RDyA?N~1Vn@y@Yp
zJK1RPmL&<(ewBuCcN;z*Z8VkT2CkGBIe%?ZWJ0>muUR`J`X?%hL!?B`=&Pu0hm#0v
zFsX$|N(q%yNsTOk;~+>DSYXu@Rh7VvuLUckI$;EuDi<Jn+AT?;sRE!?>4n}75m~Qz
zwo%}3YE_eNqkV*6iy4NlEs?;?HN{wKNh2{wU7J(`mSaLEZPa_cAtKYc?gb#iQ+9(f
zMBEGmrC5nwax1~fY<dxbM1cdU-xNn7=n%ri5Q@P`(n$9i<kMYj%oU;f=8uycbR*1^
ziZ}6bx<-QMtLL0hB&j|RvJi{xz#|z>>t$-A5L6i&GM(ng6D><rnWEQ(`G@dlYv(^-
PAL8yvrwS4odhJ~xXrAtM

delta 524
zcmV+n0`vWx1e*j7LRx4!F+o`-Q(0*zao~{-8-M*$*KN09s(2wDrjye}JvA}_Y3el7
z1WcJUG>u2700002CTM~Q={+WZ0002fK}<}hLliwgXa}fip=BuhourCUCP*`&RFMi%
zl|p5Tje2?f^4pfzw!3+D)6P%RpKZ5t8&$dGYPa)5KGH>)&eJYL6=scMkks6PBY9(o
zGJlF#5_fxiV&bqa$<6MuV#XTiFD@$u1sD;mP>Tru#)Au?daC9+@IU^E0BT$0m<(~#
zSBX8<{K{EgI?9#JrpH`QJ7}usTML7j&@2E#DY?uYz^*sClEf2*a%LH&@pg5&?Ga|A
zB(N$@ow`-kq||J*TU~Re<|wE@W>rhU<bNz`Z;Hjrfc)=tMH8jMXIq*WKLgFeZz)oh
zA!sI9jV3g*gM@)3Ap-`eqKvKtZFnm#m9xYNDxh2g*0EYzhNKFjXQ7WH6#&sEHkCnG
za!L_EwvwHNs6!%nq-iRTLooP?xZbi?a<JSuum>#0gihMhF^VLM&lA)NMTM5dn02IX
z1_92DM6S6N;OtmE2*DymfzfY~0+6f(35}oxF=7Y7z_S|Sy8GmaCXBkC3sP(_F~pRK
z@!w(DGeLtZQ&&Kftv5B&leO6diIp(Ecc^4EQ%57&Qvk$eHhiLp31jqSKgQ5%r$0Ua
Oi@744C`dGuxbOfWFzw9%

diff --git a/pcgrr/data/color_palette.rda b/pcgrr/data/color_palette.rda
index ebcda276f2b43c8fc0d7c41d8dfb07c561423fbc..3fd337f6dc478c20e85e26d48bf23b68576a52df 100644
GIT binary patch
delta 648
zcmV;30(bqc1+E1SLRx4!F+o`-Q(3cL=`4{B8Gn4phSUR6BTrKvsL7ycHlP3i$TVaP
z0BAKdm^D2nfKbrT27#fbfHY_T0me+4X@tng7!ir11Yrh^FaRKtK&FO`29)&CJv7jb
zsgMBEO{s)v4XJ`H4#c2>y6!ZhFhM9mB!)2){QY*s+!AMud>m;5lB8W4l}T>GfiTL+
zRDYDV5AEpL(aj640ZIxGAOO#mxX%SLmj?{EvFJuIGcz;ZV;M#Z6slCLuC*5wziu&Y
zb<j;<Ai&Iy$i2wWM)k$ZXf>GFFRRnv6s_yIs?9u<(+>DYLJBYeY(he52{EM7=71KA
zK`B5;1a~`F0U#K@NI$tlt~KFw#Dtp?o-+R({36u#jMC4U!qGMzRG6Z|1M?Z@?@(zg
ze1_H`h^I=Lh>028RWD9}sgP9S2#GX`lUV^De;o&KcoYj2Or<1=2igc1tut83r1F80
z#h^4+#jX#0?jX1e7AJs#kdg|P=hBm|@aotdbGv$LfPML>70eki2goV9CKo$vOG<O-
zUXpqM0{{>KB|!s3!hG$Z=nTO{TwX}_c7&#Dm|L$Mz2TxbffLW&%byi7ZITgQp5h=O
zf9!xFdQsZa9pLgpt&Qd%3&{Lqb#^e>L+C82jTlra$llk%#^Oi{Tz4K=F4$y=y_l7m
zQvVPag6S!P#|l_$GXaAe4ZRdME5z}t$$@tMF-}K0I1L!^-Y@ndx73r*PbzdGl#pJs
zBy2MrvKKu@1redDi1d$gjb&zdWZoXBI&j~-+Ty|4ptU*iTmjz#4sbWS%Q4-eGSJCY
i>o!>u?gX>I-%6w_3Cn^D?t{TE_`8xR!i0sI|4Crj$0e-*

delta 648
zcmV;30(bqc1+E1SLRx4!F+o`-Q(2nOrs0te8GoF}hSU(!h|@-TqfCLI+JFE9AkmOC
z01Zth4NpV?AT$A>XlbAg8UO%slO~#BGBO4PVran_L8D9n2qcgom`xf}#7&bzHl{!W
zO*W^aQR)L~R`x?$laN(Z%#+3h5Ob19VT4EX_1N=(L5~;jXUWGr=%8f@(&1tR!HtMc
zlz%mi_VjD(>2(ePN(vYV05j!oGj%jegN9sK@J2B+Gc(>}87(W+s#L75wHFk>ZZU0j
z$eO@GftekVdy$bF*B33I)?;A4uTOk1zq)NU@^j0P+noI9B)|gJgoMcg>0lf(Op^h4
z!U#RWKqJAgU;+RzzDPf~!@2cX(!zw9gfyeY&4R&cZ9^Q4e9a7quy)EKdji33ndk3N
zX)An&)**<eN}7m?8QfJbPC-*3sl*WyY6_EA0Uv)Hj^OYlELAj7LPUe@1Pj)gtYp+Y
zpk#4q4HdC#gWo%dE&|1g;2>luK~miMQgz-PTLZ3lZ%uFyzcjGSVKj_BVNJ0xx~;7x
zCSOAIQ_cVvz<>zq2pTpM<!KXWFu6TUypP-75Q(ZGeygwC8Q+8v9KIbHaLW$JAp>)n
zm=J&UKoLD5R<s9qypXA1aSvzUeIn@Z;Mv3EEhvkDK&l^3u0CdY03|a_*xb9_iRNsC
zEykDlaJR0)RruJIl^Fo8<_(>4I4Vfosv-p#E)-yO5pc-5%1j$kQ+A};W#vsF=|KfH
zynUuaq5|ritvqBhoPS{46Y455yB1-z!$vp?S(hz)bC;%%EWkDZ9MN%`ac$H|802D}
in|0dqiqgvCi7C*nBMvt&x((!C#oUoj6eK3JDY!tCCmzWF

diff --git a/pcgrr/data/data_coltype_defs.rda b/pcgrr/data/data_coltype_defs.rda
index fd1eff8b5cdb8c4681d687993819e471f4645b6a..fa531718093fa55b1227b69b9e8382127c2931f0 100644
GIT binary patch
literal 1922
zcmV-|2YvWLT4*^jL0KkKS)$8{@Bkl_f1v;W|GPi{5CA{_|M36s-=Gix00H0$e|ogS
zfHVLA0009hKpL7!>KZa>f&c&j0MGyc00#{;!3>6)WMsktrhqV-G|7Mj4GtP<f*B1o
z$jO8QO#opuX_Ej58VHJ&Jd;mT)Mx`EKmZ1S83(8UeyQqunGPChf*B1o$jO8QO#opu
zX_Ej58U;xRCP0MAqe*~N^lEKSQ`9u|Jx!w{3IG{9q#(PpRY*a{5)fdy#u`)scoY{H
zC?thMRLPQ|Y?!*cTZRcBe-(d8Yc0kkLBAQ!A`6d>K_}tCKm^MSh-DjbXweiA!XTnP
zBM8bI7jGnR_{u1%e!}80%uOZ%0tyHDnK3#GXE+WVdoV8~%<G3gEj2qY;D`p8iwMFn
zgo`4;k`aW7As_^ll!Anc*rig{Bvl~khc%~OQ~ck5ecP{RD(dFz!FLAJb={QVx|(dq
zZQ|NlUw!V%(?w5I=oxMIR~|Uvx2vb_-Hh|>T*nz9(>7SbLFdh;nmP2cn9@NNb8gSN
z^YUzP<L3B(4$fwLn^QdTuC}$UYg*fFw%cvVudHy6Trfkdro=&ez#xoL5fqX^-)IPz
zp?ZlD3sh1NRDvP-Qcmhb21yWLvJy=CXSM%H9+&OvyY(bWkj{hY`u=Y3N#w5sMN-by
zLzg)^#!1ZKXk^s&*{SCnrfk-deosV~`ubu`W}cGdSucswjH%0|N|j%ot(tXHc8aA?
z(Vdg8%v(RpkBt8ZyCM0#GD}Kwd6`0uHFm$fY#BYx9;S~Psxv(9r`Y=*wd08UC)>k>
z#6N=%xw7x%yY0I<_c#*MlAhB=otKhX=6cR;SF)F54$Pl2l%PP2kRXCFfe<h`7~$Mj
zH8Qr7$JU5Vp{CIwD>1d(U}0qN@Uzr<xFk<Miw|4pz9O#0p9M6@jF!KZ^p9V&biU7t
zk*VxdcNDr=X4|egT6@^0>_}}x=9@h8uXOA?G&kpWHI*J0b+>OkYcg56^`G0rj;>r>
zU7r`eF+4fZ!tPC;6jLgU>4i+*myWd}Ztf|-(J;-(zEV@u)g{@cah=!6bvIdda?7JZ
zmVczQ@|Tu}u)T}BwQg%qp)}re$!W$X$oDsn=5VrPUQcIdbeCp#9p$AxPXg_c$w^9v
zdOVnm6yZLFzGoIVxT<{|y$_S0zc@Naif*^jIl97G=I%O%`+6f&XGXI{@S4e2nOTF>
zYWG`~oFw@AJq+EBo>q2R8#*>;P**tMyZV=9QZ#VsHki86cJ6hh8{KGFso7}t-EBw0
zZ6}e8qs+GzZ0p{pe%Hy$ZzEQ&7Iwc|=U!%K+r`A~rc~Udv@5o?EiHAW7`ZcIVwh4{
zNlKKdl4UM0h9OKZDUp^cD_b%teA%>R)pl}Ai#a6jVA+;j3rj4-yrn%&n25<zu%Tj~
zLek5<UNeq{E=(NxN>6Fw4W|jz%Jb=#vOZJio_Xi|{T(LFoSQJJU1=4+>F8<CL%Dae
zx$H_^V<w(Uu-P?Ck1}JECTdew_t(?GrQOeBc;DK?Z4+ZA>HUi?p0wk6?9b%6%&x0R
zIlSI_94XsU{8>ZRs-;y|gJh{EJN_9ty1u95KBmoiEZ%Tiq?T;&l2=A1wUW<r+G?ky
zvsoGB(Ph-Uy-Kppyz-@0VKFLHrSE1HB|P2SUQY$x=N|;=^U5Z2PJUh=FNSQ<+Z=3@
zcy_q3XOo5(b~|LzXT{x;{4L_9>Ehr!G-Qj-Y(K7CXs3g{;e1nz57?P?n^ikYTe}xw
zT^7p_=EH%N+!1u?>w27%Q$)|3c1%a>t2<UOyG2T>Q^`Z$%);#*?p9CH?|z>i5_x<l
z!h5G;b~)3E$$vtZ#IeBb@4b?(b<3=`YZtv)_L%QsuY%7fC+u$7D>KxmYCM}g@|&l&
zRExFCcVhZ5=O%V?_SkiKy3UDzGSeyhuKii{%jdp}`LCn*v{`nIS}x(1cg6D0CVDDp
z@mlIMX_k8He{%Y2sl#`lM<$J#PA!?N$16B^*>+gi;L*E^o6ECb3Yqk7*J;VWAF9>G
zGv!ugEL|>HW|ZZN87W;cjyPSiu-(g=jNN(X%X{k0XD25Ol`YZ7X6!HNYPWpOZdSUr
z;@fpj-LTbab#He!1=xLV)#j59ZoO$ux7#c!qyGqVkVpFUvVH%}WP;=>1%5B$yw5ZD
zzaAM#BSaek4Jra<$3gscnt*SPcWBfL*#tPCgisSkj{~}(fr@~egSsdY0R6M^oIGR^
z%^?PV%43t1vUjJ_hm{b@xC#gy;MxTuPwYr8sQ|J|Xch?&tzr@kioy&avMC5HfXhWo
z0>V<rr$i9LXN&;m-x2a0pbg{v>5R3pk&&C#v6++v(q}V>>^xOOkLNO;D4iWA4#d?V
z1}A1o2sk2C_e%E45?TijeaLO-!3^g#iKz<`DhDSNO(H}dDH7@-)gcGP;+oHa{WP59
zAnhUwkb|6pIZNG(e7Ve{@ATR=?qx01k=Y=NDIo>-OUy_rc6{K6{X{|dC;yANBAh5l
IQDwyV0C#4#YybcN

literal 1898
zcmV-w2bK6jT4*^jL0KkKSx9-_e*hm?|DgZ>|GPi{5CA{_|M0)>-=Gix00H0$e|r1e
z=4R-1=l}o!20-QE1{$8F9-sgopc()G0000007^!aCX*(J^%?*G00z|2=zstKI0g}f
z(-6QQ82~ViGGxLq5YR%EDWGYiO#z?)000Jn0BG8PMMS3cK*$;cCV&9Zk)upa4^RQ7
zfC`cjOoYUlG-)sjo~G2vwKN$9pQ<+0fCf(ZAiJ!pf(|%>3>P@VN=YI=<ptY;1fu{I
zH8n9z%G-4Hyat6p|0RDJYc0kR2K;9^2rfQZ1fO5%ilHbJ5Rk?=7>q?gMw}E89#Z+z
zR&nwxOG!l*<Mv||H5pVvg9*mjj*M|m7KLp+It<u1?!a<)cxiS|wMYl<MTjCIDQRG+
zn5Qg?R<gnrDO{CQwMi97I$_Oe)0F<_*TU`Bvz2snbl|&#X}a#T;kue^$7`0-%KF~y
ztu$2j&fv>$@Tuo?M(0yyq0fEMXkjG?h!ntkoFV~v<gkHES}=4F0R|{dcM7M0Fh+W{
zZgpEn&`iPdI$2uQwXMv|%*@Qp%*{>q@ZB6((1%G)c?InV0x~LsaiPFe2}MEC0g@n~
zTB3pqks%MYB=4yp84(5oDx%9SSrcw4Q0UAiH^mqzN&pq?)~8TB)#7NXS=mtK&Q6h%
zb2wTVH9fX!dPeD+HKd=D)ffDGu^P=iW#lZE!@7*A%etjXueGh3bW?VUrBKnGlc(HU
zKg*Al{+Gic_Il)&l;rd>g&Jz?{YzlU@M!ilc~w!EUM>Y?W`L+p1!;mYM*)a&VKAp?
zrUiOfa3!ZDJ?4u#-brVn?K!nx%3X{)<olE$f(jWDLWn>lAS9t7q-YwTz0#Q))eUNd
zwMh=<FRzDY7Ed1=J%_V`MD+PE_P+b#D(qSEQ%so2Yx&<u_WQR>?D?4*p5;e*OQn`=
zy5p6n#fomkhSWakv(q|v&Zk`s`Q6QBM~&Sr+nzO<EZqKs@!`i;E-o&=H^wnMdC|h|
zO`bGUDvarcOy0MawIXisDZtS%&B(q|Q`gZY*`{%w*U5D^S$1;Eqd}H`zFK+9%R}5=
z#ohYuYfr&6-gC)m#wXDDH;(pjvSeOQcV76rGraIEDeQQcY>rAwR5Q`$#8{^X;J4ky
zjxH*nhc8p;=kd%tqscd0@SNQtEcADs1AV<wsk6DP(R?PdRpwS;_8Ptx<);Ms`n^ou
zUEX$ev^IA(XF*)!g74&Al}ORUx@|FaqV3%4N;kUDu~V(1+jX@cDYTwuGLJ&sRkNpp
znfu>IDZGtZxLMixpL+8%KHOYR)XJO6OG3L=rKPU4qZcM@Oj8O=DJfEwGEAk#@Wd&F
z<uWqGMQdh7FPk=ux-QO1ac3l*>>D!6foWx!UR6Dfm?I@h!i9={3rjBcc}_YQxiE9(
zDLv<mHk>CzE6>p_WPK;!Ju}nx^><C1IW}Qby3#9u-|A`4L%#2Kd)$<|#!WoeVX|tO
z9)!mxOw^{W@2{_eOS|6W^1r=?+9t+L)A`n2J*md?-Jj8OnOzo=b9y~AI8(Jt@nsKc
zs+Cn<Es~_1@Ab*e(fVJyewNL7EZ%Tix-GN9NnOlMYbBorwAD{pX0kJ6(QB!BdX;6H
zdSyzg!eUgZOWw>WDd^qi@pwEB9#tLRe4=M0=jY?}`DV=>qm7bp4$dqY<l%+gU8K=x
z&EJyzE##)@<l!e>86xvr59XH|DdO;W-xT7*^QK)U)lS8&-;1!W<+8*%u;68P1YJA5
zuVa#GXqoeF$%y>bXJw2o(Ne0E^HBKnFuO;Cm6PiDzTZ9)dHko!d?!@u=T0jn{YqaF
z#|hiQ_DZ$aF15Qcd)1%XW5I^LD?Hqvox5bL&tjdC=GpO9>F<<_waa%^^kL3S?B(yU
zTq?Mfh_ww#68U3|xLtnI$}U2{#6W1I2}lY?N2``c$UXxS23XY?5FsPwm30M#qY^fZ
zs20dJWa8PH%yP4bs>`y*#|DkuRNh^h_*BoMcDqhZ`2IGoDW59J>R7s5vdt;W7BW)1
zv5q)hvasFDnvC5!=gWKQ&1WYr8!B75$7bv==GAW*oZPK-YsI$eowmbOsnNaM+!tZ`
zotK(SIl6SEHs5TptsnS9oCyD3mQSznSs=LzfnSsS*O}&iTH*v|iE1EWKoW)t&zBX7
z0s1suL+lrH5TXb{07>Vr-atV3KoZE_6bK+4FN=?)gmXa#f4XCnl(Kjy=tI{;GPJq^
z2Ov2>q?Y86x@ZimGXTg*3~`X5i5P~3r6Ly~Y{63?$gs*&m<VXu;D9>6(H{xK0NOOa
z$+H}qX|}P+wNM1yHH~2%cYsha{3cV$6S>_ubxlw~iPt0$a73x_mG6`!v<@E|klWRQ
z8O~`FQWhjs4o)bVLLl-;mr)LgAo;wLS@J)QlbjHCLJEQoa3hqx*tgT1%09lkM!oE%
kx)M6Tizx&b+|1%A5OZ#dpkYuzoPWjKkxmpO5*~M-z`*jXvH$=8

diff --git a/pcgrr/data/effect_prediction_algos.rda b/pcgrr/data/effect_prediction_algos.rda
index 0f3e66f20ba697d5c15e44e3ee9274af4332bac2..dad8df10e53f247a00f2d7c296d6158178359be6 100644
GIT binary patch
delta 991
zcmV<510ekO2kr+BLRx4!F+o`-Q(3ocwC#}&AAh={>dm0)8YI(3nx3URh6y&Aq}14_
z>S*;RsL`g>^o;celgQF(gHI`-8YG^km>QUv(m(@1GyrL)hJa`Q^$$?cXc9<FfelTj
zr>Uk<>S?E>G-v<-Xk;`102*i#D5t7vp`bJaKmY&$GynhtKmnlPp`#{^8Z^+z$YB6v
z0Ds6d&;SD<WC$x4TVH1~daGgs)rp=Fr!h%13-+Z+=qEMZ#Ndd51sN6V^eS`no<tq^
zkJ$Skbw=#)DNZ8YiH%Pk=&G`RJJgaWKJHE)Iw6l{aXd5{g})|r$kR@^77P$8q(lHR
zAd?GtD6%aGNb+<Tf=orpy?(7}z>*E3+J9@+9V+^en-Z9L1m7k+JS|)dUWdGky6>&z
z1qGNBiKRF;pl6$75IBfxaO28Pvh2R}A;y5QYLBw3+erb8ENwLuSfKOH>6j51^&?S&
z%vH?;Ffgkk-2K}3C<uh;13KI@U0=YSV-XBQjmY%*recCa6BJ<`Oec9!hA_}t41XDs
z8IhGihrhUZ4L-`eeX+?aW{)}bW5<sc0~LhXZH(d~;H#-MFWwjG(|W<LdSL*`n*%M_
zGeMfSGP5QOOXR6{LuvDDPlYq(&wMV*1tEUgZbi8wj?Xm^@6iYnew==~B!GNRK#g!(
zP3q8Ax0sUFfejSCDT>^5UZIhDP=E19hkwm0gP&#vdaRLV<8@`iNCsniRkvI`6Ha13
z(19>8A&LBj(o=R&Dn07p!cB$LNG>D6S$M-RFwR-j6v?d%&$fy4>Abk~!s;0bGMR$J
zNU#*q9^D`%Y)G3cDkLe1O%CBI>tUIgcN0klK?Vkss&2X*aBPPOuoX~I1b>0$9Jg}I
zFHo3t{vd<TM`wSa(x<UXBCIyTwLm?d2<!o9mkC~;A6zOAxI~s;NGNmW*e9ZMaFt8D
z8eUdY0&ElQ;?r=!wP%8GnR+p}wuxbZc9;~&gbL+YG$SVvHDF-l(6sqgAn;E`OL7L-
zjhB$dC#y3UXvso|^&&*>VSmHZGT@+w@e)B+?ev-4MjV9&Fbr#KEdch!3t+FVN$S9)
ziDpt7;!Og*k%$-W<<=mTE?NlOYU3i$a<$J!|0!De*c1pmLY#WTIVPlI6Hkr>_ezAN
zld$Z?N*mcWS`sK6v--I<_}IXSt2Em(hD}%HCb+=z(TI?mXw*3|AxNT?10+Clkkn#_
z2{@BjP>twO5}Y)kgi*K$MR95s>&?u@Y45p;kx0YT?5r_KJ(CqUS%r*&LeYEwdklZ#
N?ntK!5*F>2ouE+o#Z~|S

delta 1000
zcmV<E0~h@62lodKLRx4!F+o`-Q&}hAzEhD7AAh}iJnK7M9zszjfK4gn(+LkuCZ~x%
zRMTobMk6PrH>ouB9#d*GVtQ>;)X*|S5vEM1qfMlPKxhVl4Kx5W&;g;LpaM}E1Vd7L
z6x7qyG%*1683Q2EkkDutKmn!+6jc3FO*GM_fEoY*000000iXcTB@$?e$?AHI57j+K
zsDJe|VgLYSGyr4(WY7RXP;;y3)lXH7Kzgw=!ZhlXlR&>}RGxxsUEEFxkVHsL%lDzW
zo>ylnFUFp!*8}uX`>{g}1zN7eh6K4vCsX5st4dHio0poI8~Pk?Eh!lc6KQPHD|W*I
zh$a+70YpJtG(Z}QAkc)5CuYPFVlEBq<$q~T1dwc%pMJZkXJ+EY*4*bO@<#dFcL5wa
z-|kMlJ^QR4AyA-_B?9rnyk`nD{TqGve;GF^k@z5k2|^^o9*T38n9)d7zauHe@cZu6
zP?b*fSCmZ&Wmsqwdf9r9qRt9Lr3i}m+$!ILfSd~ku#uWN-D4?2LXatlbxk*wn17<y
zih_!$im0hv<m~G^N}X0YzgVW#B>1$iPF%TiV^e6KJ>y9#aoE|OufLKD6`!=Q@J>W#
z(T+RTl~}6w{&@3a$qzkzI^?a<xklq?>eIYKZG}^M^KeVQGs|zHnD3>GCigi7mQ4}j
zU1lg^y4=~GeDff5v7pswmf=A9hJTXFRL^k>1<$^FZpa3NP#H_%)0bPPCNiLiHzi}$
zL;<%ZDfkp72n2Mf<1wgSu%S8+Im9FIvg%-9kaW)+D5?r85=3e~i$ENDOEyYwBpp${
zr$cCFS;K5s5YH*H&=R~VH(`eLLotSU?Gmz{5-N`5kr=2kLt)pX+hdLw$A1z?lE9LU
z4(wX6=hdsEocnP_7xQ0NXY|;uxmHx#JmcOFTl{#f1=NP(ZqXoy<HQakHayG}bK%h^
zNtuaOo;cL=0aFmeKE0ZkiXA-fEW!!x#-U2|vEVYw!-Z-UO))%3XeepmEhNsfM8re^
zyK}8n4=zbza+W<avY4rL5`QDoM2Wda4@FenpxcB=1$%iaBw+(`5vceIVbMuJ<-G)=
zPuV6skp<ZS6k~A&MPmk#qZu~zIb$MJ<k2H}pNxvV$!ng6t4%F@)DebuicET9nI))F
z#hx}YUn(L>?*p=xqi$WKts;YyoA2pc=Tiz0z-n6rYqQ^=Lh}pDMNuL&AsWXbk`^gV
z86pFehNBcXNyM1K&NqQbN@&u85lY-4R;pN)*D}VUVeP?_ky<h;v8n}x#TJ=50=g9u
Wi>*)3AK+1W1>BKN6eJ1wFO<MNvBNh2

diff --git a/pcgrr/data/evidence_levels.rda b/pcgrr/data/evidence_levels.rda
index d9869a9fcba399649baa8d09120fe26c155572ef..31d598d91a3866cd9e2ed0edc45d6d6fa97eb2c9 100644
GIT binary patch
delta 80
zcmV-W0I&aUZVp0PXgM)KSte6iVA@(Ykq#w0C#IktpwIxXWnyG^lYov>*~PUnxB>xX
m5;7<gSWvA<p^%-rvHXjRg$tyRhNekh#oUoj6eJinmW{yY8X;Q%

delta 80
zcmV-W0I&aUZVp0PXgM)KSte6iqKIpRkq#w0Ob}`64FJfozCr|hq(Dg&tQpS0;sHmM
mBI2b2Zpsz(Y8eUJs~^a?xKO%D2x?@N{9VZu;X*=+A+8YJJ0SZ2

diff --git a/pcgrr/data/evidence_types.rda b/pcgrr/data/evidence_types.rda
index cf30753dff6ddcedcf487ab4528b50c97fddb00c..50aef647f05ad60385292a556c15e28d80cb7142 100644
GIT binary patch
delta 121
zcmV-<0EYjH0gC|+LRx4!F+o`-Q(3=AyI+wG8)DQNV1Q&a$TFEmL7+WA8UZ0SR1+<r
zNrMezqQ&oZgt!pspjALXY!H-hqqcIHAdo>x6@3SK(s*T0O8<hcWYm%~X;tZHd)4F|
bEa-eoLmP<$3>C7@GW=c16yZWb{UYsr<YzJM

delta 120
zcmeBX>|*3|ipsDwbc|FBj_eOyuy!J!bfV+3jD{4G6Glo#Qp{Hv7!!Ua`CA>BEGFe`
zZkMT=vXIGRh0H1r-l|D!Gv3Zw=Aj}G<m%VFH`B&w`O^A>;bvK$hJ9MIVtB4Tk<<B6
Y<Lw;V$;Q@oGpA?2c#*3`zyy#b00f&YdH?_b

diff --git a/pcgrr/data/tcga_cohorts.rda b/pcgrr/data/tcga_cohorts.rda
index b5f4d6b5256ddf6b8513a837202f5b2c79fbaae0..5b7656cc2279caef5a5a29069058ed12d0357f21 100644
GIT binary patch
delta 725
zcmV;`0xJFg1^NXJLRx4!F+o`-Q&|&K9QctA9x+X+n=Fhp)iezh*iq>N(s>gRpa5y7
zks37F5YPdpp^-Er)YQsvDD_4JJwpbB0BMksVHtl3l+`^)qI#NSF&K|fFpU9(^*u%=
znhhRN3k$^;--V*hAU_S2$t00cftN;nA}rCPEy@Ui1H=K!ALQ($-=V&yTs_#aGShD;
z1{<N394$&Cje!m6$`V+bL}TrR8cA!QK+*)k_nIXrLP;FH*b<`<b940H)z8%_rvH5~
ziDG}7(C&&-)TLr7M`NJT4RWY~s5PIZ6O>Ls2TQx3tZxHvKg)Bkf3I(P?|+IV1)H2F
zBUs&d?83sZY!}qb5o=UyHd%@^Kr6<;$Wh>uvF9j%90ON8FU;KpPcqi`eAej@?P#WM
zIMWr?g&vb0hMB8YoGX!Xwla}95;m+dOt62^-*ji*o@gPZZ2GJ|&FlI~iOk)CoKD$o
zDsrbd8n(ew>#p=wUz@CUlaMB&5GRcZA1*tn^E}qM##uO=Eb5aynu`VaSvPx)iniD^
z*x1_oZ1?n4vd=eSS@W2K_(Bf1jCESEs3c(Pu<;13TrhMACRmsdsFt!Ye!%TlTYZ1g
z3!!ml&q3wYNSsQ7(K<9XApkAK#RiBRd_*%_R5A$jF>=Cg55L6)rKt*nuEH-cATzY6
z1P@AAAtlt#m=yh}T^0f$dSa8=hMVNX5J@DIKCHeF!WPSWLt~J*0kTBrGfF`K4$&q>
z&vD%~B330sCdZay_ZJbguOPAsfJ1)<&xS}gfbd$f@amd)@EzNdy^?ihSf<EiUj$B}
zR(V&uW9zcIgq0m5i?j#|IdMm#puO1SZ~+}{ADG1_g$C~e_Am|+HyWvUMl3LA3l;B?
zEPF>pAz+An<AdEj6}8>S`9DoG><d8OICtf%!L^*wNF1q?X3$@MFZQ58{}%~zML1B9
H6I2}dcKAx>

delta 730
zcmV<00ww+W1^)#OLRx4!F+o`-Q&}RW3ipu?9yO(@n@o%})ih|-)Yy^f14oH5003w`
zG@3m}i46c6YG@Nc)K4luDYY7m(mg{)o}kmz(WZcrT^WA}O;gf(nW3c7qY<=>5s9GF
zPgBt}Y3egc<uog<^jmG#iyA<G8!N*kkyyaRpEc1IG-t(9gg}DQ0OJ(+y#B+*%>9jl
z^zGW$K1u^0%+)-uuug0c-U&iV^GJ*OK}M2ZbO;!dJgpE6QiwuH9Ull1qY!>N`z^`W
zl2c9hc0zw8dT%?bDN89xG7clDjeG<1xxuD@`oB&Rj17PfgF6>+;0JQ=mv^UEOxQ4X
z<wWo;-`+Gik?TiR7L~uKUsG^JrBSTiZYzj@SCOGXNd%2bZ41cI`elw8pU_0JI+bV5
z+2TVtAep%#Je{){s#+dqc`H_YY>k(eGZ85ftyq8Kk%7aw@b%l4NFkk#`Y=A;99hbV
zrFP12J#}eQwL7}x%@s1b@WoasI!9SF1kgeR%g&?i&s82bj>?>3lZnF2sWWp?T(;{b
z?QyYNZG%mXjjo?YzIxj%c>Ge$n7ke2Aj<EKwSqMSi<x2+HV&i4)-;l1k!cP@t?2N(
zqpg3;uMF@Gz1cnYOmOs0B^Quw6J?7KfEeo{g2)|wL@Ak2#x7FT`)RN~{}d=pt|}LX
z(W!#NrORNlS`8NxVxL6TwB>9?VGv7vrkmTZ2)u#`B$9`1yg}i$$A<UiVPFG65|oVQ
z)By34WK{i5si_jMl?*!@vllT@8%oj(5KMmp7&d&c!L$dO%H`?6Xy3SM)RnB0t(CD&
zkjRq=l>&>SPX$X&RL~;iWKK-HG)<1l6&!;GWn%yc%I3L@5?IV|al5JDFhJ)tPXLcE
z7`=k0D#VwbvC!7kAQn)?Su5#&e#BU=x$E5(fvDrX9h>@YGZ6;VvqfOO{%`F7gZ>IG
M<ce^iAVo|S?&*M6iU0rr

diff --git a/pcgrr/data/variant_db_url.rda b/pcgrr/data/variant_db_url.rda
index a109ddf4a147d553f36271213119699884a76e14..87337e34a0ebc811919bc5ce675c32f4b7f75f2d 100644
GIT binary patch
delta 528
zcmV+r0`L8$1fT>CLRx4!F+o`-Q(5W9V5gA|9Dh)@SZZlZLrj6701W^QGzNeT21bnp
zO(7m6^qWu`00000000J>4l-oXh-smKni!fi$%q*kO&V#088DJUYB8yl^wU#Q)cqs|
zfHtE{(guyGrjI6|@`ozgnxiana%3<gTMSdle;F{A?fdIXvu+NkAOjl`yCKW!!iaNn
zxPOcqoe!4h%+qO&FmaQShN7_GWFv?KFoZB(<oCEq3*_Fm5+-3{?CagD@DX{3kP-q5
zVrztskY(k4chSyIl691VVJ<ph;y1!tPg7QPzY_b9J5X#m)J;DRpOuY-$t@hkN>x^z
zu{_qkG%2q(TcK^2Pd!m$08p6NX;Zs^#D6*yrxHxCtSQB$<JmwUIsq`+vnLo;7M)*B
zPa;sMt#se^A0sM=6PGKiF|OY`6m20nl~H7hN7g1U;kUU6nX0jw5E)=|^A~=wIY9Qw
zk#uckqi4#-fEpmH$Q<Y-1x>*v(*g2nI2bshb0MrBowPi-Jb`8*!Nj;IGy^OFmU$2Y
z%R2AJe$63fGR#4FTU&n@UULE6ptmlZuZT!H$j?r^-S!Eyvx7Lnf$xS#Ry2ojgI#TS
zD7G((H_1qAn#?ymkl{NFbQL03#006Nijvrfm<UyH0u;byQ78dOzJ_|RBmi>-6;_;g
SKX(KR_`8xR!i0xTI|Vp4v*B?7

delta 533
zcmV+w0_y#s1f>KHLRx4!F+o`-Q(4l0#io%C9Df4T4MJ#!jSVz3GH3t+ri~36001-w
zr=*iK(`t>XW}`p=01SXK0MHLp)Es2VqY%?W0W>i*X_F8#Fq$;e2r^+Lgw$;lDB7o~
z>SwAA4^g0PMwzI{$+a^{y){E+4pp@kMx1eSWH2MShA85<F))_RyK75T_&T6~3mB&(
z&VN6C1VfXJ#9-3S`Zhjhn?z`Xi#V8RRvtD&Jb)pPg4RQUlicAXFSC3YNSTF;S6<CM
zp%<dyh+x!Z*3*3jo58aG+nn24Tom9&P01hDUF~K*#le2BLhK-GL2;Qx(arbuv2ij>
zKOs`4s?(Myoy(p|YfG%oucxDysIdSfn18s?r*`3pbSF+JOt7pe#f0P8Kp;8+Fxq#M
zj4D<gT{>PwlA^WIZ{&R@RS+jKER~qnzhe@-gyvN(+DZr3CN1#U;)G4=w^@*IRe_DX
zTY<5;4^9yu15=L^E;ii(;)Ok&#yUVW_)5B9SZ%K@smt`}7=+W(`y>`T(Ry)xZ+@4$
z4rm=2KsB}3`CJVUD<~?U3<(8UiaZ$*g#^OOy1EGg7zT(Li`IQ2fQuyn&;$vZ*)Ju)
zU=u6@Rojz3?5&l8$T9%bV(OY85H2@P^iq<rEN!WZ(n)Y)N5nB49UWu=87EyCEfxTb
X5LMBq<NF{W|BJaIoG3_XKw{GX14ZUl

diff --git a/pcgrr/inst/templates/pcgr_flexdb/flexdb_scna_tier1.Rmd b/pcgrr/inst/templates/pcgr_flexdb/flexdb_scna_tier1.Rmd
index 8532115a..fa2c871a 100755
--- a/pcgrr/inst/templates/pcgr_flexdb/flexdb_scna_tier1.Rmd
+++ b/pcgrr/inst/templates/pcgr_flexdb/flexdb_scna_tier1.Rmd
@@ -31,7 +31,7 @@ flexdashboard::valueBox(length(unique(pcg_report$content$cna$disp$tier1$SEGMENT)
 ### Diagnostic evidence items
 
 ```{r}
-entries_diagnostic <- NROW(pcg_report$content$cna$clin_eitem$specific_ttype$diagnostic$A_B)
+entries_diagnostic <- NROW(pcg_report$content$cna$clin_eitem$query_ttype$diagnostic$A_B)
 flexdashboard::valueBox(entries_diagnostic, color = ifelse(entries_diagnostic > 0,
                         "#00a65a",
                         pcg_report$metadata$color_none), icon = "fa-file-prescription")
@@ -40,7 +40,7 @@ flexdashboard::valueBox(entries_diagnostic, color = ifelse(entries_diagnostic >
 ### Prognostic evidence items
 
 ```{r}
-entries_prognostic <- NROW(pcg_report$content$cna$clin_eitem$specific_ttype$prognostic$A_B)
+entries_prognostic <- NROW(pcg_report$content$cna$clin_eitem$query_ttype$prognostic$A_B)
 flexdashboard::valueBox(entries_prognostic, color = ifelse(entries_prognostic > 0,
                         "#00a65a",
                         pcg_report$metadata$color_none), icon = "fa-file-prescription")
@@ -49,7 +49,7 @@ flexdashboard::valueBox(entries_prognostic, color = ifelse(entries_prognostic >
 ### Predictive evidence items
 
 ```{r}
-entries_predictive <- NROW(pcg_report$content$cna$clin_eitem$specific_ttype$predictive$A_B)
+entries_predictive <- NROW(pcg_report$content$cna$clin_eitem$query_ttype$predictive$A_B)
 flexdashboard::valueBox(entries_predictive, color = ifelse(entries_predictive > 0,
                         "#00a65a",
                         pcg_report$metadata$color_none), icon = "fa-file-prescription")
@@ -69,9 +69,9 @@ Row
 cat('\nEvidence items associated with variants in tier 1 (right panel) can be filtered according to various criteria:\n')
 
 variants_tier1 <- dplyr::bind_rows(
-  pcg_report[['content']][['cna']][['clin_eitem']][['specific_ttype']][['diagnostic']][['A_B']],
-  pcg_report[['content']][['cna']][['clin_eitem']][['specific_ttype']][['predictive']][['A_B']],
-  pcg_report[['content']][['cna']][['clin_eitem']][['specific_ttype']][['prognostic']][['A_B']])
+  pcg_report[['content']][['cna']][['clin_eitem']][['query_ttype']][['diagnostic']][['A_B']],
+  pcg_report[['content']][['cna']][['clin_eitem']][['query_ttype']][['predictive']][['A_B']],
+  pcg_report[['content']][['cna']][['clin_eitem']][['query_ttype']][['prognostic']][['A_B']])
 
 variants_tier1_shared <- crosstalk::SharedData$new(variants_tier1)
 crosstalk::bscols(
diff --git a/pcgrr/inst/templates/pcgr_flexdb/flexdb_scna_tier2.Rmd b/pcgrr/inst/templates/pcgr_flexdb/flexdb_scna_tier2.Rmd
index d826ba35..6220c650 100755
--- a/pcgrr/inst/templates/pcgr_flexdb/flexdb_scna_tier2.Rmd
+++ b/pcgrr/inst/templates/pcgr_flexdb/flexdb_scna_tier2.Rmd
@@ -31,7 +31,7 @@ flexdashboard::valueBox(length(unique(pcg_report$content$cna$disp$tier2$SEGMENT)
 
 ```{r}
 entries_diagnostic <- NROW(pcg_report$content$cna$clin_eitem$other_ttype$diagnostic$A_B) + 
-  NROW(pcg_report$content$cna$clin_eitem$specific_ttype$diagnostic$C_D_E)
+  NROW(pcg_report$content$cna$clin_eitem$query_ttype$diagnostic$C_D_E)
 flexdashboard::valueBox(entries_diagnostic, color = ifelse(entries_diagnostic > 0,
                         pcg_report[['metadata']][['color_palette']][['success']][['values']][1],
                         pcg_report$metadata$color_none), icon = "fa-file-prescription")
@@ -41,7 +41,7 @@ flexdashboard::valueBox(entries_diagnostic, color = ifelse(entries_diagnostic >
 
 ```{r}
 entries_prognostic <- NROW(pcg_report$content$cna$clin_eitem$other_ttype$prognostic$A_B) + 
-  NROW(pcg_report$content$cna$clin_eitem$specific_ttype$prognosticc$C_D_E)
+  NROW(pcg_report$content$cna$clin_eitem$query_ttype$prognosticc$C_D_E)
 flexdashboard::valueBox(entries_prognostic, color = ifelse(entries_prognostic > 0,
                         pcg_report[['metadata']][['color_palette']][['success']][['values']][1],
                         pcg_report$metadata$color_none), icon = "fa-file-prescription")
@@ -51,7 +51,7 @@ flexdashboard::valueBox(entries_prognostic, color = ifelse(entries_prognostic >
 
 ```{r}
 entries_predictive <- NROW(pcg_report$content$cna$clin_eitem$other_ttype$predictive$A_B) + 
-  NROW(pcg_report$content$cna$clin_eitem$specific_ttype$predictive$C_D_E)
+  NROW(pcg_report$content$cna$clin_eitem$query_ttype$predictive$C_D_E)
 flexdashboard::valueBox(entries_predictive, color = ifelse(entries_predictive > 0,
                         "#00a65a",
                         pcg_report$metadata$color_none), icon = "fa-file-prescription")
@@ -74,9 +74,9 @@ variants_tier2 <- dplyr::bind_rows(
   pcg_report[['content']][['cna']][['clin_eitem']][['other_ttype']][['diagnostic']][['A_B']],
   pcg_report[['content']][['cna']][['clin_eitem']][['other_ttype']][['predictive']][['A_B']],
   pcg_report[['content']][['cna']][['clin_eitem']][['other_ttype']][['prognostic']][['A_B']],
-  pcg_report[['content']][['cna']][['clin_eitem']][['specific_ttype']][['diagnostic']][['C_D_E']],
-  pcg_report[['content']][['cna']][['clin_eitem']][['specific_ttype']][['predictive']][['C_D_E']],
-  pcg_report[['content']][['cna']][['clin_eitem']][['specific_ttype']][['prognostic']][['C_D_E']]) |>
+  pcg_report[['content']][['cna']][['clin_eitem']][['query_ttype']][['diagnostic']][['C_D_E']],
+  pcg_report[['content']][['cna']][['clin_eitem']][['query_ttype']][['predictive']][['C_D_E']],
+  pcg_report[['content']][['cna']][['clin_eitem']][['query_ttype']][['prognostic']][['C_D_E']]) |>
   dplyr::arrange(EVIDENCE_LEVEL, RATING)
 
 
diff --git a/pcgrr/inst/templates/pcgr_flexdb/flexdb_snv_tier1.Rmd b/pcgrr/inst/templates/pcgr_flexdb/flexdb_snv_tier1.Rmd
index 9a1368b6..9b66cb86 100755
--- a/pcgrr/inst/templates/pcgr_flexdb/flexdb_snv_tier1.Rmd
+++ b/pcgrr/inst/templates/pcgr_flexdb/flexdb_snv_tier1.Rmd
@@ -14,9 +14,9 @@ flexdashboard::valueBox("TIER 1", caption = NULL, color = '#000000', icon = NULL
 
 ```{r}
 
-tier1_evidence_items <- pcg_report$content$snv_indel$clin_eitem$specific_ttype$diagnostic$A_B |>
-     dplyr::bind_rows(pcg_report$content$snv_indel$clin_eitem$specific_ttype$prognostic$A_B) |>
-     dplyr::bind_rows(pcg_report$content$snv_indel$clin_eitem$specific_ttype$predictive$A_B)
+tier1_evidence_items <- pcg_report$content$snv_indel$clin_eitem$query_ttype$diagnostic$A_B |>
+     dplyr::bind_rows(pcg_report$content$snv_indel$clin_eitem$query_ttype$prognostic$A_B) |>
+     dplyr::bind_rows(pcg_report$content$snv_indel$clin_eitem$query_ttype$predictive$A_B)
 
 flexdashboard::valueBox(length(unique(tier1_evidence_items$SYMBOL)), 
                         color = pcg_report$metadata$color_value_box)
@@ -34,7 +34,7 @@ flexdashboard::valueBox(pcg_report$content$snv_indel$v_stat$n_tier1,
 ### Diagnostic evidence items
 
 ```{r}
-entries_diagnostic <- NROW(pcg_report$content$snv_indel$clin_eitem$specific_ttype$diagnostic$A_B)
+entries_diagnostic <- NROW(pcg_report$content$snv_indel$clin_eitem$query_ttype$diagnostic$A_B)
 flexdashboard::valueBox(entries_diagnostic, color = ifelse(entries_diagnostic > 0,
                         pcg_report[['metadata']][['color_palette']][['success']][['values']][1],
                         pcg_report$metadata$color_none), icon = "fa-file-prescription")
@@ -43,7 +43,7 @@ flexdashboard::valueBox(entries_diagnostic, color = ifelse(entries_diagnostic >
 ### Prognostic evidence items
 
 ```{r}
-entries_prognostic <- NROW(pcg_report$content$snv_indel$clin_eitem$specific_ttype$prognostic$A_B)
+entries_prognostic <- NROW(pcg_report$content$snv_indel$clin_eitem$query_ttype$prognostic$A_B)
 flexdashboard::valueBox(entries_prognostic, color = ifelse(entries_prognostic > 0,
                         pcg_report[['metadata']][['color_palette']][['success']][['values']][1],
                         pcg_report$metadata$color_none), icon = "fa-file-prescription")
@@ -52,7 +52,7 @@ flexdashboard::valueBox(entries_prognostic, color = ifelse(entries_prognostic >
 ### Predictive evidence items
 
 ```{r}
-entries_predictive <- NROW(pcg_report$content$snv_indel$clin_eitem$specific_ttype$predictive$A_B)
+entries_predictive <- NROW(pcg_report$content$snv_indel$clin_eitem$query_ttype$predictive$A_B)
 flexdashboard::valueBox(entries_predictive, color = ifelse(entries_predictive > 0,
                         pcg_report[['metadata']][['color_palette']][['success']][['values']][1],
                         pcg_report$metadata$color_none), icon = "fa-file-prescription")
@@ -71,9 +71,9 @@ Row
 cat('\nEvidence items associated with variants in tier 1 (right panel) can be filtered according to various criteria:\n')
 
 variants_tier1 <- dplyr::bind_rows(
-  pcg_report[['content']][['snv_indel']][['clin_eitem']][['specific_ttype']][['diagnostic']][['A_B']],
-  pcg_report[['content']][['snv_indel']][['clin_eitem']][['specific_ttype']][['predictive']][['A_B']],
-  pcg_report[['content']][['snv_indel']][['clin_eitem']][['specific_ttype']][['prognostic']][['A_B']])
+  pcg_report[['content']][['snv_indel']][['clin_eitem']][['query_ttype']][['diagnostic']][['A_B']],
+  pcg_report[['content']][['snv_indel']][['clin_eitem']][['query_ttype']][['predictive']][['A_B']],
+  pcg_report[['content']][['snv_indel']][['clin_eitem']][['query_ttype']][['prognostic']][['A_B']])
 
 variants_tier1_shared <- crosstalk::SharedData$new(variants_tier1)
 crosstalk::bscols(
diff --git a/pcgrr/inst/templates/pcgr_flexdb/flexdb_snv_tier2.Rmd b/pcgrr/inst/templates/pcgr_flexdb/flexdb_snv_tier2.Rmd
index 96c8dc6b..325b2534 100755
--- a/pcgrr/inst/templates/pcgr_flexdb/flexdb_snv_tier2.Rmd
+++ b/pcgrr/inst/templates/pcgr_flexdb/flexdb_snv_tier2.Rmd
@@ -16,11 +16,11 @@ flexdashboard::valueBox("TIER 2", caption = NULL, color = '#000000', icon = NULL
 ```{r}
 # flexdashboard::valueBox(stringr::str_replace_all(pcg_report$content$value_box$tier2,"Tier 2 variants:\n",""), color = pcg_report$metadata$color_value_box, icon = "fa-dna")
 
-tier2_evidence_items <- dplyr::bind_rows(pcg_report$content$snv_indel$clin_eitem$specific_ttype$diagnostic$C_D_E,
+tier2_evidence_items <- dplyr::bind_rows(pcg_report$content$snv_indel$clin_eitem$query_ttype$diagnostic$C_D_E,
                                        pcg_report$content$snv_indel$clin_eitem$other_ttype$diagnostic$A_B,
-                                       pcg_report$content$snv_indel$clin_eitem$specific_ttype$prognostic$C_D_E,
+                                       pcg_report$content$snv_indel$clin_eitem$query_ttype$prognostic$C_D_E,
                                        pcg_report$content$snv_indel$clin_eitem$other_ttype$prognostic$A_B,
-                                       pcg_report$content$snv_indel$clin_eitem$specific_ttype$predictive$C_D_E,
+                                       pcg_report$content$snv_indel$clin_eitem$query_ttype$predictive$C_D_E,
                                        pcg_report$content$snv_indel$clin_eitem$other_ttype$predictive$A_B)
 
 
@@ -39,7 +39,7 @@ flexdashboard::valueBox(pcg_report$content$snv_indel$v_stat$n_tier2, color = pcg
 ### Diagnostic evidence items
 
 ```{r}
-entries_diagnostic <- NROW(pcg_report$content$snv_indel$clin_eitem$specific_ttype$diagnostic$C_D_E) +
+entries_diagnostic <- NROW(pcg_report$content$snv_indel$clin_eitem$query_ttype$diagnostic$C_D_E) +
                       NROW(pcg_report$content$snv_indel$clin_eitem$other_ttype$diagnostic$A_B)
 flexdashboard::valueBox(entries_diagnostic, color = ifelse(entries_diagnostic > 0,
                         "#00a65a",
@@ -49,7 +49,7 @@ flexdashboard::valueBox(entries_diagnostic, color = ifelse(entries_diagnostic >
 ### Prognostic evidence items
 
 ```{r}
-entries_prognostic <- NROW(pcg_report$content$snv_indel$clin_eitem$specific_ttype$prognostic$C_D_E) +
+entries_prognostic <- NROW(pcg_report$content$snv_indel$clin_eitem$query_ttype$prognostic$C_D_E) +
                       NROW(pcg_report$content$snv_indel$clin_eitem$other_ttype$prognostic$A_B)
 flexdashboard::valueBox(entries_prognostic, color = ifelse(entries_prognostic > 0,
                         "#00a65a",
@@ -59,7 +59,7 @@ flexdashboard::valueBox(entries_prognostic, color = ifelse(entries_prognostic >
 ### Predictive evidence items
 
 ```{r}
-entries_predictive <- NROW(pcg_report$content$snv_indel$clin_eitem$specific_ttype$predictive$C_D_E) +
+entries_predictive <- NROW(pcg_report$content$snv_indel$clin_eitem$query_ttype$predictive$C_D_E) +
                       NROW(pcg_report$content$snv_indel$clin_eitem$other_ttype$predictive$A_B)
 flexdashboard::valueBox(entries_predictive, color = ifelse(entries_predictive > 0,
                         "#00a65a",
@@ -79,9 +79,9 @@ Row
 cat('\nEvidence items associated with variants in TIER 2 (right panel) can be interactively explored according to various criteria\n:')
 
 variants_tier2 <- dplyr::bind_rows(
-  pcg_report[['content']][['snv_indel']][['clin_eitem']][['specific_ttype']][['diagnostic']][['C_D_E']],
-  pcg_report[['content']][['snv_indel']][['clin_eitem']][['specific_ttype']][['predictive']][['C_D_E']],
-  pcg_report[['content']][['snv_indel']][['clin_eitem']][['specific_ttype']][['diagnostic']][['C_D_E']],
+  pcg_report[['content']][['snv_indel']][['clin_eitem']][['query_ttype']][['diagnostic']][['C_D_E']],
+  pcg_report[['content']][['snv_indel']][['clin_eitem']][['query_ttype']][['predictive']][['C_D_E']],
+  pcg_report[['content']][['snv_indel']][['clin_eitem']][['query_ttype']][['diagnostic']][['C_D_E']],
   pcg_report[['content']][['snv_indel']][['clin_eitem']][['other_ttype']][['diagnostic']][['A_B']],
   pcg_report[['content']][['snv_indel']][['clin_eitem']][['other_ttype']][['predictive']][['A_B']],
   pcg_report[['content']][['snv_indel']][['clin_eitem']][['other_ttype']][['prognostic']][['A_B']]) |>
diff --git a/pcgrr/inst/templates/pcgr_rmarkdown/cna_biomarkers.Rmd b/pcgrr/inst/templates/pcgr_rmarkdown/cna_biomarkers.Rmd
index 45371e4b..21675328 100644
--- a/pcgrr/inst/templates/pcgr_rmarkdown/cna_biomarkers.Rmd
+++ b/pcgrr/inst/templates/pcgr_rmarkdown/cna_biomarkers.Rmd
@@ -3,10 +3,10 @@
 
 A total of __`r NROW(pcg_report[['content']][['cna']][["disp"]][['tier1']]) + NROW(pcg_report[['content']][['cna']][["disp"]][['tier2']])`__ aberrations are associated with clinical evidence items in the [database for clinical interpretations of variants in cancer, CIViC](https://civic.genome.wustl.edu/#/home), with the following number of evidence items:
 
- *  Predictive: __`r NROW(pcg_report[['content']][['cna']][['clin_eitem']][['specific_ttype']][['predictive']][['A_B']]) + NROW(pcg_report[['content']][['cna']][['clin_eitem']][['other_ttype']][['predictive']][['A_B']]) + NROW(pcg_report[['content']][['cna']][['clin_eitem']][['specific_ttype']][['predictive']][['C_D_E']])`__ evidence items
- *  Prognostic: __`r NROW(pcg_report[['content']][['cna']][['clin_eitem']][['specific_ttype']][['prognostic']][['A_B']]) + NROW(pcg_report[['content']][['cna']][['clin_eitem']][['other_ttype']][['prognostic']][['A_B']]) + NROW(pcg_report[['content']][['cna']][['clin_eitem']][['specific_ttype']][['prognostic']][['C_D_E']])`__ evidence items
- *  Diagnostic: __`r NROW(pcg_report[['content']][['cna']][['clin_eitem']][['specific_ttype']][['diagnostic']][['A_B']]) + NROW(pcg_report[['content']][['cna']][['clin_eitem']][['other_ttype']][['diagnostic']][['A_B']]) +
-NROW(pcg_report[['content']][['cna']][['clin_eitem']][['specific_ttype']][['diagnostic']][['C_D_E']])`__ evidence items
+ *  Predictive: __`r NROW(pcg_report[['content']][['cna']][['clin_eitem']][['query_ttype']][['predictive']][['A_B']]) + NROW(pcg_report[['content']][['cna']][['clin_eitem']][['other_ttype']][['predictive']][['A_B']]) + NROW(pcg_report[['content']][['cna']][['clin_eitem']][['query_ttype']][['predictive']][['C_D_E']])`__ evidence items
+ *  Prognostic: __`r NROW(pcg_report[['content']][['cna']][['clin_eitem']][['query_ttype']][['prognostic']][['A_B']]) + NROW(pcg_report[['content']][['cna']][['clin_eitem']][['other_ttype']][['prognostic']][['A_B']]) + NROW(pcg_report[['content']][['cna']][['clin_eitem']][['query_ttype']][['prognostic']][['C_D_E']])`__ evidence items
+ *  Diagnostic: __`r NROW(pcg_report[['content']][['cna']][['clin_eitem']][['query_ttype']][['diagnostic']][['A_B']]) + NROW(pcg_report[['content']][['cna']][['clin_eitem']][['other_ttype']][['diagnostic']][['A_B']]) +
+NROW(pcg_report[['content']][['cna']][['clin_eitem']][['query_ttype']][['diagnostic']][['C_D_E']])`__ evidence items
 
 <br><br>
 
@@ -23,17 +23,17 @@ for(sig in c('strong_significance','potential_significance')){
     show_cna_filters[[sig]][[type]] <- F
     missing_cna_variants[[sig]][[type]] <- T
     cna_evidence_items[[sig]][[type]] <- data.frame()
-    if(sig == 'strong_significance' & NROW(pcg_report[['content']][['cna']][['clin_eitem']][['specific_ttype']][[type]][['A_B']]) > 0){
+    if(sig == 'strong_significance' & NROW(pcg_report[['content']][['cna']][['clin_eitem']][['query_ttype']][[type]][['A_B']]) > 0){
       show_cna_filters[[sig]][[type]] <- T
       missing_cna_variants[[sig]][[type]] <- F
-      cna_evidence_items[[sig]][[type]] <- pcg_report[['content']][['cna']][['clin_eitem']][['specific_ttype']][[type]][['A_B']]
+      cna_evidence_items[[sig]][[type]] <- pcg_report[['content']][['cna']][['clin_eitem']][['query_ttype']][[type]][['A_B']]
     }
     
-    if(sig == 'potential_significance' & (NROW(pcg_report[['content']][['cna']][['clin_eitem']][['specific_ttype']][[type]][['C_D_E']]) > 0 | NROW(pcg_report[['content']][['cna']][['clin_eitem']][['other_ttype']][[type]][['A_B']]) > 0)){
+    if(sig == 'potential_significance' & (NROW(pcg_report[['content']][['cna']][['clin_eitem']][['query_ttype']][[type]][['C_D_E']]) > 0 | NROW(pcg_report[['content']][['cna']][['clin_eitem']][['other_ttype']][[type]][['A_B']]) > 0)){
       show_cna_filters[[sig]][[type]] <- T
       missing_cna_variants[[sig]][[type]] <- F
       cna_evidence_items[[sig]][[type]] <- 
-        dplyr::bind_rows(pcg_report[['content']][['cna']][['clin_eitem']][['other_ttype']][[type]][['A_B']], pcg_report[['content']][['cna']][['clin_eitem']][['specific_ttype']][[type]][['C_D_E']])
+        dplyr::bind_rows(pcg_report[['content']][['cna']][['clin_eitem']][['other_ttype']][[type]][['A_B']], pcg_report[['content']][['cna']][['clin_eitem']][['query_ttype']][[type]][['C_D_E']])
     }
   }
 }
diff --git a/pcgrr/inst/templates/pcgr_rmarkdown/snv_tier1.Rmd b/pcgrr/inst/templates/pcgr_rmarkdown/snv_tier1.Rmd
index b892d41b..eb843213 100644
--- a/pcgrr/inst/templates/pcgr_rmarkdown/snv_tier1.Rmd
+++ b/pcgrr/inst/templates/pcgr_rmarkdown/snv_tier1.Rmd
@@ -2,9 +2,9 @@
 ### Tier 1 - Variants of strong clinical significance {.tabset}
 
 - Considering evidence items with strongest evidence levels (A & B) for the query tumor type (_`r pcg_report[['metadata']][['config']][["t_props"]][['tumor_type']]`_)  in the [database for clinical interpretations of variants in cancer, CIViC](https://civic.genome.wustl.edu/#/home) or [Cancer Biomarkers database](https://www.cancergenomeinterpreter.org/biomarkers), a total of __`r NROW(pcg_report[['content']][['snv_indel']][["disp"]][['tier1']])`__ unique, somatic variants were found, with the following number of evidence items:
-    * Tier 1 - Predictive/Therapeutic: __`r NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['specific_ttype']][['predictive']][['A_B']])`__ evidence items
-    * Tier 1 - Prognostic: __`r NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['specific_ttype']][['prognostic']][['A_B']])`__ evidence items
-    * Tier 1 - Diagnostic: __`r NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['specific_ttype']][['diagnostic']][['A_B']])`__ evidence items
+    * Tier 1 - Predictive/Therapeutic: __`r NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['query_ttype']][['predictive']][['A_B']])`__ evidence items
+    * Tier 1 - Prognostic: __`r NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['query_ttype']][['prognostic']][['A_B']])`__ evidence items
+    * Tier 1 - Diagnostic: __`r NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['query_ttype']][['diagnostic']][['A_B']])`__ evidence items
 
 <br><br>
 
@@ -28,7 +28,7 @@ missing_tier1_items <- list()
 for(type in c('diagnostic','prognostic','predictive')){
   show_tier1_filters[[type]] <- F
   missing_tier1_items[[type]] <- T
-  if(NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['specific_ttype']][[type]][['A_B']]) > 0){
+  if(NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['query_ttype']][[type]][['A_B']]) > 0){
     show_tier1_filters[[type]] <- T
     missing_tier1_items[[type]] <- F
   }
@@ -55,7 +55,7 @@ if(missing_tier1_items[['predictive']] == F |
 
 ```{r tier1_predictive, echo=F, results = 'asis', eval = show_tier1_filters[['predictive']]}
 
-variants_tier1_predictive_shared <- crosstalk::SharedData$new(pcg_report[['content']][['snv_indel']][['clin_eitem']][['specific_ttype']][['predictive']][['A_B']])
+variants_tier1_predictive_shared <- crosstalk::SharedData$new(pcg_report[['content']][['snv_indel']][['clin_eitem']][['query_ttype']][['predictive']][['A_B']])
 crosstalk::bscols(
   list(
     crosstalk::filter_select("CANCER_TYPE", "Cancer type", variants_tier1_predictive_shared, ~CANCER_TYPE),
@@ -116,7 +116,7 @@ if(missing_tier1_items[['prognostic']] == F &
 
 ```{r tier1_prognostic, echo=F, results='asis', eval = show_tier1_filters[['prognostic']]}
 
-variants_tier1_prognostic_shared <- crosstalk::SharedData$new(pcg_report[['content']][['snv_indel']][['clin_eitem']][['specific_ttype']][['prognostic']][['A_B']])
+variants_tier1_prognostic_shared <- crosstalk::SharedData$new(pcg_report[['content']][['snv_indel']][['clin_eitem']][['query_ttype']][['prognostic']][['A_B']])
 crosstalk::bscols(
   list(
     crosstalk::filter_select("CONSEQUENCE", "Consequence", variants_tier1_prognostic_shared, ~CONSEQUENCE),
@@ -175,7 +175,7 @@ if(missing_tier1_items[['diagnostic']] == F &
 
 ```{r tier1_diagnostic, echo=F, results='asis', eval = show_tier1_filters[['diagnostic']]}
 
-variants_tier1_diagnostic_shared <- crosstalk::SharedData$new(pcg_report[['content']][['snv_indel']][['clin_eitem']][['specific_ttype']][['diagnostic']][['A_B']])
+variants_tier1_diagnostic_shared <- crosstalk::SharedData$new(pcg_report[['content']][['snv_indel']][['clin_eitem']][['query_ttype']][['diagnostic']][['A_B']])
 crosstalk::bscols(
   list(
     crosstalk::filter_select("CONSEQUENCE", "Consequence", variants_tier1_diagnostic_shared, ~CONSEQUENCE),
diff --git a/pcgrr/inst/templates/pcgr_rmarkdown/snv_tier2.Rmd b/pcgrr/inst/templates/pcgr_rmarkdown/snv_tier2.Rmd
index 0109a3d9..73029250 100644
--- a/pcgrr/inst/templates/pcgr_rmarkdown/snv_tier2.Rmd
+++ b/pcgrr/inst/templates/pcgr_rmarkdown/snv_tier2.Rmd
@@ -2,9 +2,9 @@
 ### Tier 2 - Variants of potential clinical significance {.tabset}
 
 - Tier 2 considers evidence items of _i)_ strong evidence levels (A & B) in other tumor types, and _ii)_ weak evidence levels (C, D & E) in the query tumor type (_`r pcg_report[['metadata']][['config']][["t_props"]][['tumor_type']]`_). Using the [database for clinical interpretations of variants in cancer (CIViC) ](https://civic.genome.wustl.edu/#/home) and [Cancer Biomarkers database](https://www.cancergenomeinterpreter.org/biomarkers), a total of __`r NROW(pcg_report[['content']][['snv_indel']][["disp"]][['tier2']])`__ unique, somatic variants were found in the tumor sample:
-    * Tier 2 - Predictive/Therapeutic: __`r NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['other_ttype']][['predictive']][['A_B']]) + NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['specific_ttype']][['predictive']][['C_D_E']])`__ evidence items
-    * Tier 2 - Prognostic: __`r NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['other_ttype']][['prognostic']][['A_B']]) + NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['specific_ttype']][['prognostic']][['C_D_E']])`__ evidence items
-    * Tier 2 - Diagnostic: __`r NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['other_ttype']][['diagnostic']][['A_B']]) + NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['specific_ttype']][['diagnostic']][['C_D_E']])`__ evidence items
+    * Tier 2 - Predictive/Therapeutic: __`r NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['other_ttype']][['predictive']][['A_B']]) + NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['query_ttype']][['predictive']][['C_D_E']])`__ evidence items
+    * Tier 2 - Prognostic: __`r NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['other_ttype']][['prognostic']][['A_B']]) + NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['query_ttype']][['prognostic']][['C_D_E']])`__ evidence items
+    * Tier 2 - Diagnostic: __`r NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['other_ttype']][['diagnostic']][['A_B']]) + NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['query_ttype']][['diagnostic']][['C_D_E']])`__ evidence items
 
 <br><br>
 
@@ -29,8 +29,8 @@ for(type in c('diagnostic','prognostic','predictive')){
   show_tier2_filters[[type]] <- F
   missing_tier2_items[[type]] <- T
   eitems[[type]] <- data.frame()
-  if(NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['other_ttype']][[type]][['A_B']]) > 0 | NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['specific_ttype']][[type]][['C_D_E']]) > 0){
-    eitems[[type]] <- dplyr::bind_rows(pcg_report[['content']][['snv_indel']][['clin_eitem']][['other_ttype']][[type]][['A_B']], pcg_report[['content']][['snv_indel']][['clin_eitem']][['specific_ttype']][[type]][['C_D_E']]) |>
+  if(NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['other_ttype']][[type]][['A_B']]) > 0 | NROW(pcg_report[['content']][['snv_indel']][['clin_eitem']][['query_ttype']][[type]][['C_D_E']]) > 0){
+    eitems[[type]] <- dplyr::bind_rows(pcg_report[['content']][['snv_indel']][['clin_eitem']][['other_ttype']][[type]][['A_B']], pcg_report[['content']][['snv_indel']][['clin_eitem']][['query_ttype']][[type]][['C_D_E']]) |>
       dplyr::arrange(desc(RATING))
     show_tier2_filters[[type]] <- T
     missing_tier2_items[[type]] <- F
diff --git a/pcgrr/man/append_cancer_gene_evidence.Rd b/pcgrr/man/append_cancer_gene_evidence.Rd
index cb61d000..6c87e938 100644
--- a/pcgrr/man/append_cancer_gene_evidence.Rd
+++ b/pcgrr/man/append_cancer_gene_evidence.Rd
@@ -4,12 +4,21 @@
 \alias{append_cancer_gene_evidence}
 \title{Function that appends cancer gene evidence links}
 \usage{
-append_cancer_gene_evidence(vcf_data_df = NULL, ref_data = NULL)
+append_cancer_gene_evidence(
+  vcf_data_df = NULL,
+  ref_data = NULL,
+  site = "Any",
+  pos_var = "POS"
+)
 }
 \arguments{
 \item{vcf_data_df}{Data frame of sample variants from VCF}
 
 \item{ref_data}{PCGR reference data bundle object}
+
+\item{site}{Primary tumor site}
+
+\item{pos_var}{variable reflecting chromosome order (POS/SEGMENT_START)}
 }
 \value{
 vcf_data_df
diff --git a/pcgrr/man/assign_acmg_tiers.Rd b/pcgrr/man/assign_acmg_tiers.Rd
new file mode 100644
index 00000000..6b0e10d2
--- /dev/null
+++ b/pcgrr/man/assign_acmg_tiers.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/acmg.R
+\name{assign_acmg_tiers}
+\alias{assign_acmg_tiers}
+\title{Function that assigns tier classifications to somatic CNA segments and
+SNVs/InDels, based on the presence of biomarker evidence found in
+the variant set}
+\usage{
+assign_acmg_tiers(
+  vartype = "snv_indel",
+  primary_site = "Any",
+  variants_df = NULL,
+  biomarker_items = NULL
+)
+}
+\arguments{
+\item{vartype}{variant type ('snv_indel' or 'cna')}
+
+\item{primary_site}{primary tumor site}
+
+\item{variants_df}{data frame with variants (SNVs/InDels or CNAs)}
+
+\item{biomarker_items}{data frame with biomarker evidence items}
+}
+\description{
+Function that assigns tier classifications to somatic CNA segments and
+SNVs/InDels, based on the presence of biomarker evidence found in
+the variant set
+}
diff --git a/pcgrr/man/assign_germline_popfreq_status.Rd b/pcgrr/man/assign_germline_popfreq_status.Rd
index 8eaa2aca..dc454718 100644
--- a/pcgrr/man/assign_germline_popfreq_status.Rd
+++ b/pcgrr/man/assign_germline_popfreq_status.Rd
@@ -8,7 +8,7 @@ if any population frequency exceeds max_tolerated_af}
 \usage{
 assign_germline_popfreq_status(
   sample_calls,
-  pop = "EUR",
+  pop = "NFE",
   dbquery = "gnomADe",
   max_tolerated_af = 0.01
 )
diff --git a/pcgrr/man/assign_somatic_classification.Rd b/pcgrr/man/assign_somatic_classification.Rd
index 2ae141f9..c0af6ed2 100644
--- a/pcgrr/man/assign_somatic_classification.Rd
+++ b/pcgrr/man/assign_somatic_classification.Rd
@@ -6,12 +6,12 @@
 based on evidence found in variant set,
 potentially limited by user-defined options}
 \usage{
-assign_somatic_classification(sample_calls, config)
+assign_somatic_classification(sample_calls, settings)
 }
 \arguments{
-\item{sample_calls}{data frame with variants}
+\item{sample_calls}{data frame with putative somatic variants}
 
-\item{config}{configuration object}
+\item{settings}{PCGR configuration settings}
 }
 \value{
 sample_calls
diff --git a/pcgrr/man/assign_somatic_germline_evidence.Rd b/pcgrr/man/assign_somatic_germline_evidence.Rd
index 705a2ec2..a94cb608 100644
--- a/pcgrr/man/assign_somatic_germline_evidence.Rd
+++ b/pcgrr/man/assign_somatic_germline_evidence.Rd
@@ -5,12 +5,12 @@
 \title{Function that appends several tags denoting
 evidence for somatic/germline status of variants}
 \usage{
-assign_somatic_germline_evidence(sample_calls, config)
+assign_somatic_germline_evidence(sample_calls, settings = NULL)
 }
 \arguments{
 \item{sample_calls}{data frame with variants}
 
-\item{config}{configuration object}
+\item{settings}{PCGR configuration settings}
 }
 \value{
 sample_calls
diff --git a/pcgrr/man/generate_pcgr_report2.Rd b/pcgrr/man/generate_pcgr_report2.Rd
new file mode 100644
index 00000000..84273f02
--- /dev/null
+++ b/pcgrr/man/generate_pcgr_report2.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/main2.R
+\name{generate_pcgr_report2}
+\alias{generate_pcgr_report2}
+\title{Function that generates all contents of the cancer genome report (PCGR)}
+\usage{
+generate_pcgr_report2(yaml_fname = NULL)
+}
+\arguments{
+\item{yaml_fname}{Name of PCGR configuration file (yaml)}
+}
+\description{
+Function that generates all contents of the cancer genome report (PCGR)
+}
diff --git a/pcgrr/man/generate_report_data_msi.Rd b/pcgrr/man/generate_report_data_msi.Rd
index bac9da71..1fd7da9e 100644
--- a/pcgrr/man/generate_report_data_msi.Rd
+++ b/pcgrr/man/generate_report_data_msi.Rd
@@ -4,16 +4,14 @@
 \alias{generate_report_data_msi}
 \title{Function that generates MSI prediction data for PCGR report}
 \usage{
-generate_report_data_msi(sample_calls, pcgr_data, sample_name, pcgr_config)
+generate_report_data_msi(variant_set, ref_data = NULL, settings = NULL)
 }
 \arguments{
-\item{sample_calls}{variant calls subject to mutational signature analysis}
+\item{variant_set}{variant calls subject to MSI classification}
 
-\item{pcgr_data}{object with PCGR annotation data}
+\item{ref_data}{PCGR reference data object}
 
-\item{sample_name}{sample identifier}
-
-\item{pcgr_config}{Object with PCGR configuration parameters}
+\item{settings}{PCGR run configuration settings}
 }
 \description{
 Function that generates MSI prediction data for PCGR report
diff --git a/pcgrr/man/generate_report_data_signatures_mp.Rd b/pcgrr/man/generate_report_data_signatures_mp.Rd
index 07ecdbf6..36e27347 100644
--- a/pcgrr/man/generate_report_data_signatures_mp.Rd
+++ b/pcgrr/man/generate_report_data_signatures_mp.Rd
@@ -5,25 +5,17 @@
 \title{Function that generates mutational signatures data for PCGR report}
 \usage{
 generate_report_data_signatures_mp(
-  vcf_fname,
-  pcgr_data,
-  sample_name,
-  pcgr_config,
-  type_specific = T
+  callset_snv = NULL,
+  ref_data = NULL,
+  settings = NULL
 )
 }
 \arguments{
-\item{vcf_fname}{VCF file processed with PCGR annotation pipeline -
-possibly filtered for depth/allelic fraction}
+\item{callset_snv}{Somatic callset (SNV)}
 
-\item{pcgr_data}{object with PCGR annotation data}
+\item{ref_data}{PCGR reference data object}
 
-\item{sample_name}{sample identifier}
-
-\item{pcgr_config}{Object with PCGR configuration parameters}
-
-\item{type_specific}{logical indicating if all reference signatures are to be
-included (F) rather than those known to be prevalent in the tumor (T)}
+\item{settings}{PCGR configuration settings object}
 }
 \description{
 Function that generates mutational signatures data for PCGR report
diff --git a/pcgrr/man/generate_report_data_snv_indel.Rd b/pcgrr/man/generate_report_data_snv_indel.Rd
index f076c749..79b56a02 100644
--- a/pcgrr/man/generate_report_data_snv_indel.Rd
+++ b/pcgrr/man/generate_report_data_snv_indel.Rd
@@ -5,27 +5,15 @@
 \title{Function that generates tiered variant sets for SNVs/InDels}
 \usage{
 generate_report_data_snv_indel(
-  sample_calls,
-  pcgr_data,
-  sample_name,
-  config,
-  callset = "somatic calls",
-  biomarker_mapping_stringency = 1,
+  pcg_report = NULL,
+  callset = NULL,
   tier_model = "pcgr_acmg"
 )
 }
 \arguments{
-\item{sample_calls}{variant calls subject to mutational signature analysis}
+\item{pcg_report}{PCGR report object}
 
-\item{pcgr_data}{object with PCGR annotation data}
-
-\item{sample_name}{sample identifier}
-
-\item{config}{Object with PCGR configuration parameters}
-
-\item{callset}{type of calls}
-
-\item{biomarker_mapping_stringency}{quality level for biomarkers}
+\item{callset}{Object with input calls (CNA, SNV/InDel)}
 
 \item{tier_model}{tier model (pcgr_acmg)}
 }
diff --git a/pcgrr/man/generate_report_data_snv_indel2.Rd b/pcgrr/man/generate_report_data_snv_indel2.Rd
new file mode 100644
index 00000000..ed7c8d19
--- /dev/null
+++ b/pcgrr/man/generate_report_data_snv_indel2.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/main2.R
+\name{generate_report_data_snv_indel2}
+\alias{generate_report_data_snv_indel2}
+\title{Function that generates tiered variant sets for SNVs/InDels}
+\usage{
+generate_report_data_snv_indel2(
+  pcg_report = NULL,
+  callset = NULL,
+  tier_model = "pcgr_acmg"
+)
+}
+\arguments{
+\item{pcg_report}{PCGR report object}
+
+\item{callset}{Object with input calls (CNA, SNV/InDel)}
+
+\item{tier_model}{tier model (pcgr_acmg)}
+}
+\value{
+pcg_report_data data frame with all report elements
+}
+\description{
+Function that generates tiered variant sets for SNVs/InDels
+}
diff --git a/pcgrr/man/generate_report_data_tumor_only.Rd b/pcgrr/man/generate_report_data_tumor_only.Rd
index 4ea8c3d6..e966b78d 100644
--- a/pcgrr/man/generate_report_data_tumor_only.Rd
+++ b/pcgrr/man/generate_report_data_tumor_only.Rd
@@ -1,10 +1,16 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/main.R
+% Please edit documentation in R/main.R, R/main2.R
 \name{generate_report_data_tumor_only}
 \alias{generate_report_data_tumor_only}
 \title{Function that generates germline-filtered callset and PCGR
 report statistics for a given tumor-only callsets}
 \usage{
+generate_report_data_tumor_only(
+  unfiltered_sample_calls,
+  sample_name,
+  pcgr_config
+)
+
 generate_report_data_tumor_only(
   unfiltered_sample_calls,
   sample_name,
@@ -19,6 +25,9 @@ generate_report_data_tumor_only(
 \item{pcgr_config}{Object with PCGR configuration parameters}
 }
 \description{
+Function that generates germline-filtered callset and PCGR
+report statistics for a given tumor-only callsets
+
 Function that generates germline-filtered callset and PCGR
 report statistics for a given tumor-only callsets
 }
diff --git a/pcgrr/man/generate_tier_tsv.Rd b/pcgrr/man/generate_tier_tsv.Rd
index f218cd5a..60df86f9 100644
--- a/pcgrr/man/generate_tier_tsv.Rd
+++ b/pcgrr/man/generate_tier_tsv.Rd
@@ -1,9 +1,11 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/main.R
+% Please edit documentation in R/main.R, R/main2.R
 \name{generate_tier_tsv}
 \alias{generate_tier_tsv}
 \title{Function that annotates CNV segment files}
 \usage{
+generate_tier_tsv(variant_set, config, annotation_tags, sample_name = "test")
+
 generate_tier_tsv(variant_set, config, annotation_tags, sample_name = "test")
 }
 \arguments{
@@ -16,10 +18,21 @@ generate_tier_tsv(variant_set, config, annotation_tags, sample_name = "test")
 \item{sample_name}{Sample identifier}
 }
 \value{
+tsv_variants data frame with tier-annotated list of
+variants for tab-separated output
+
 tsv_variants data frame with tier-annotated list of
 variants for tab-separated output
 }
 \description{
+param cna_segments_tsv CNV file name with chromosomal log(2)-ratio segments
+param pcgr_data object with PCGR annotation data
+param sample_name sample identifier
+param pcgr_config Object with PCGR configuration parameters
+param oncotree Data frame with phenotype terms relevant for tumor type
+param transcript_overlap_pct required aberration overlap fraction
+(percent) for reported transcripts (default 100 percent)
+
 param cna_segments_tsv CNV file name with chromosomal log(2)-ratio segments
 param pcgr_data object with PCGR annotation data
 param sample_name sample identifier
@@ -297,7 +310,314 @@ if (tumor_type != "Cancer, NOS") \{
       eitems = eitems_specific_tt)
 
   ## Assign putative TIER 1 variant set
-  pcg_report_cna[["clin_eitem"]][["specific_ttype"]] <-
+  pcg_report_cna[["clin_eitem"]][["query_ttype"]] <-
+    biomarker_hits_cna_specific$clin_eitem
+  pcg_report_cna[["variant_set"]][["tier1"]] <-
+    biomarker_hits_cna_specific$variant_set
+\}
+
+pcg_report_cna[["eval"]] <- T
+pcg_report_cna[["variant_set"]][["tsv"]] <-
+  cna_transcript_df_print
+pcg_report_cna[["v_stat"]][["n_cna_gain"]] <-
+  n_cna_gain
+pcg_report_cna[["v_stat"]][["n_cna_loss"]] <-
+  n_cna_loss
+pcg_report_cna[["disp"]][["segment"]] <-
+  cna_segments_filtered
+pcg_report_cna[["disp"]][["oncogene_gain"]] <-
+  onco_ts_sets[["oncogene_gain"]]
+pcg_report_cna[["disp"]][["tsgene_loss"]] <-
+  onco_ts_sets[["tsgene_loss"]]
+pcg_report_cna[["disp"]][["other_target"]] <-
+  onco_ts_sets[["other_target"]]
+
+
+pcg_report_cna <-
+  pcgrr::assign_tier1_tier2_acmg_cna(pcg_report_cna)
+
+return(pcg_report_cna)
+}\if{html}{\out{</div>}}
+
+}
+
+Function that annotates CNV segment files
+
+param yaml_fname PCGR yaml file
+param ref_data PCGR/CPSR reference data object
+
+export
+Function that generates dense and tiered annotated variant datasets
+
+export
+generate_report_data_cna <-
+function(cna_segments_tsv,
+pcgr_data,
+sample_name,
+pcgr_config,
+oncotree,
+transcript_overlap_pct = 100) {
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{invisible(
+  assertthat::assert_that(
+    file.exists(cna_segments_tsv),
+    msg = paste0("File 'cna_segments_tsv' (",
+                 cna_segments_tsv, ") does not exist")))
+pcg_report_cna <- pcgrr::init_report(config = pcgr_config,
+                                     class = "cna")
+log_r_homdel <- pcgr_config[["cna"]][["log_r_homdel"]]
+log_r_gain <- pcgr_config[["cna"]][["log_r_gain"]]
+tumor_type <- pcgr_config[["t_props"]][["tumor_type"]]
+MEGABASE <- 1000000
+
+pcgrr::log4r_info("------")
+pcgrr::log4r_info(paste0("Generating report data for copy number segment file ",
+                  cna_segments_tsv))
+
+## READ INPUT FILE, VALIDATE INPUT CHROMOSOMES AND SEGMENTS, ADD CYTOBAND INFO
+cna_df <- utils::read.table(file = cna_segments_tsv, header = T,
+                     stringsAsFactors = F, sep = "\\t",
+                     comment.char = "", quote = "") |>
+  dplyr::rename(chromosome = Chromosome,
+                LogR = Segment_Mean,
+                segment_start = Start,
+                segment_end = End) |>
+  dplyr::distinct() |>
+  dplyr::select(
+    c("chromosome","LogR",
+      "segment_start","segment_end")) |>
+  dplyr::mutate(
+    chromosome = stringr::str_replace(
+      .data$chromosome, "^chr", "")) |>
+  pcgrr::get_valid_chromosomes(
+    chromosome_column = "chromosome",
+    bsg = pcgr_data[["assembly"]][["bsg"]]) |>
+  pcgrr::get_valid_chromosome_segments(
+    genome_assembly = pcgr_data[["assembly"]][["grch_name"]],
+    bsg = pcgr_data[["assembly"]][["bsg"]]) |>
+  dplyr::filter(!is.na(.data$LogR)) |>
+  dplyr::mutate(LogR = round(as.numeric(.data$LogR), digits = 3)) |>
+  dplyr::mutate(SEGMENT_ID = paste0(.data$chromosome, ":",
+                                    .data$segment_start, "-",
+                                    .data$segment_end)) |>
+  pcgrr::get_cna_cytoband(pcgr_data = pcgr_data) |>
+  dplyr::mutate(SAMPLE_ID = sample_name) |>
+  pcgrr::append_ucsc_segment_link(
+    hgname = pcgr_data[["assembly"]][["hg_name"]],
+    chrom = "chromosome",
+    start = "segment_start",
+    end = "segment_end") |>
+  dplyr::mutate(
+    SEGMENT_LENGTH_MB =
+      round((as.numeric((.data$segment_end - .data$segment_start) /
+                          MEGABASE)),
+            digits = 5)) |>
+  dplyr::rename(SEGMENT = .data$SEGMENT_LINK, LOG_R = .data$LogR)
+
+## MAKE SIMPLE SEGMENTS DATA FRAME FOR FILTERING IN REPORT
+cna_segments <- cna_df |>
+  dplyr::select(.data$SEGMENT,
+                .data$SEGMENT_LENGTH_MB,
+                .data$CYTOBAND,
+                .data$LOG_R,
+                .data$EVENT_TYPE) |>
+  dplyr::distinct()
+
+#### FIND AND APPEND GENCODE TRANSCRIPTS THAT OVERLAP
+cna_transcript_df <-
+  pcgrr::get_cna_overlapping_transcripts(
+     cna_df, pcgr_data = pcgr_data)
+#get_cna_overlapping_transcripts(
+#  cna_df, pcgr_data = pcgr_data)
+
+#### GENERATE DATAFRAME OF UNIQUE TRANSCRIPT-CNA SEGMENTS FOR OUTPUT TSV
+cna_transcript_df_print <- cna_transcript_df |>
+  dplyr::select(.data$chrom,
+                .data$segment_start,
+                .data$segment_end,
+                .data$SEGMENT_ID,
+                .data$SEGMENT_LENGTH_MB,
+                .data$EVENT_TYPE,
+                .data$CYTOBAND,
+                .data$LOG_R,
+                .data$SAMPLE_ID,
+                .data$ensembl_gene_id,
+                .data$symbol,
+                .data$ensembl_transcript_id,
+                .data$transcript_start,
+                .data$transcript_end,
+                .data$transcript_overlap_percent,
+                .data$name,
+                .data$biotype,
+                .data$tumor_suppressor,
+                .data$oncogene,
+                .data$intogen_driver,
+                .data$chembl_compound_id,
+                .data$gencode_tag,
+                .data$gencode_release) |>
+  magrittr::set_colnames(tolower(names(.)))
+
+avg_transcript_overlap <- as.data.frame(
+  cna_transcript_df |>
+    dplyr::filter(.data$biotype == "protein_coding") |>
+    dplyr::group_by(.data$SEGMENT_ID, .data$symbol) |>
+    dplyr::summarise(
+      MEAN_TRANSCRIPT_CNA_OVERLAP = mean(
+        .data$transcript_overlap_percent),
+      TRANSCRIPTS = paste0(.data$ensembl_transcript_id, collapse = ", "),
+      .groups = "drop") |>
+    dplyr::rename(SYMBOL = .data$symbol) |>
+    dplyr::mutate(
+      MEAN_TRANSCRIPT_CNA_OVERLAP =
+        round(.data$MEAN_TRANSCRIPT_CNA_OVERLAP, digits = 2))
+)
+
+cna_transcript_df <-
+  dplyr::select(cna_transcript_df, -.data$ensembl_transcript_id) |>
+  dplyr::filter(.data$biotype == "protein_coding") |>
+  dplyr::distinct() |>
+  dplyr::mutate(VAR_ID = as.character(rep(1:nrow(.)))) |>
+  magrittr::set_colnames(toupper(names(.))) |>
+  pcgrr::append_otargets_pheno_link(
+    pcgr_data = pcgr_data,
+    oncotree = oncotree) |>
+  dplyr::rename(OPENTARGETS_ASSOCIATIONS =
+                  .data$OT_DISEASE_LINK) |>
+  dplyr::select(.data$VAR_ID,
+                .data$SEGMENT_ID,
+                .data$SYMBOL,
+                .data$ONCOGENE,
+                .data$ONCOGENE_EVIDENCE,
+                .data$TUMOR_SUPPRESSOR,
+                .data$TUMOR_SUPPRESSOR_EVIDENCE,
+                .data$CANCERGENE_SUPPORT,
+                .data$OPENTARGETS_ASSOCIATIONS,
+                .data$OPENTARGETS_RANK,
+                .data$ENTREZ_ID,
+                .data$CHROM,
+                .data$NAME,
+                .data$EVENT_TYPE,
+                .data$SEGMENT_LENGTH_MB,
+                .data$SEGMENT,
+                .data$TRANSCRIPT_OVERLAP_PERCENT,
+                .data$LOG_R) |>
+  dplyr::mutate(ENTREZ_ID = as.character(.data$ENTREZ_ID)) |>
+  dplyr::rename(GENENAME = .data$NAME,
+                TRANSCRIPT_OVERLAP = .data$TRANSCRIPT_OVERLAP_PERCENT,
+                CHROMOSOME = .data$CHROM) |>
+  dplyr::left_join(pcgr_data[["kegg"]][["pathway_links"]],
+                   by = c("ENTREZ_ID" = "gene_id")) |>
+  dplyr::rename(KEGG_PATHWAY = .data$kegg_pathway_urls)
+
+## Get gene annotation links
+entrezgene_annotation_links <-
+  pcgrr::generate_annotation_link(
+    cna_transcript_df,
+    vardb = "GENE_NAME",
+    group_by_var = "VAR_ID",
+    link_key_var = "ENTREZ_ID",
+    link_display_var = "GENENAME",
+    url_prefix = "http://www.ncbi.nlm.nih.gov/gene/")
+
+cna_transcript_df <- cna_transcript_df |>
+  dplyr::left_join(
+    dplyr::rename(entrezgene_annotation_links,
+                  GENE_NAME = .data$link),
+    by = c("VAR_ID")) |>
+  dplyr::select(.data$SEGMENT_ID,
+                .data$CHROMOSOME,
+                .data$SYMBOL,
+                .data$GENE_NAME,
+                .data$KEGG_PATHWAY,
+                .data$TUMOR_SUPPRESSOR,
+                .data$TUMOR_SUPPRESSOR_EVIDENCE,
+                .data$ONCOGENE,
+                .data$ONCOGENE_EVIDENCE,
+                .data$CANCERGENE_SUPPORT,
+                .data$OPENTARGETS_ASSOCIATIONS,
+                .data$OPENTARGETS_RANK,
+                .data$SEGMENT_LENGTH_MB,
+                .data$SEGMENT,
+                .data$EVENT_TYPE,
+                .data$LOG_R) |>
+  dplyr::distinct() |>
+  dplyr::left_join(avg_transcript_overlap,
+                   by = c("SEGMENT_ID", "SYMBOL"))
+
+
+n_cna_loss <-
+  dplyr::filter(cna_segments, .data$LOG_R <= log_r_homdel) |>
+  nrow()
+n_cna_gain <-
+  dplyr::filter(cna_segments, .data$LOG_R >= log_r_gain) |>
+  nrow()
+cna_segments_filtered <- cna_segments |>
+  dplyr::filter(.data$LOG_R >= log_r_gain | .data$LOG_R <= log_r_homdel) |>
+  dplyr::arrange(dplyr::desc(.data$LOG_R))
+pcgrr::log4r_info(
+  paste0("Detected ", nrow(cna_segments_filtered),
+         " segments subject to amplification/deletion (",
+         n_cna_loss, " deletions, ", n_cna_gain,
+         " gains according to user-defined log(2) ratio thresholds)"))
+
+
+## Get aberration sets related to tumor suppressor genes
+## /oncogenes/drug targets
+onco_ts_sets <-
+  get_oncogene_tsgene_target_sets(
+    cna_transcript_df,
+    transcript_overlap_pct = transcript_overlap_pct,
+    log_r_homdel = log_r_homdel,
+    log_r_gain = log_r_gain,
+    tumor_type = tumor_type,
+    pcgr_data = pcgr_data)
+
+## load all clinical evidence items ()
+eitems_any_tt <- pcgrr::load_eitems(
+  eitems_raw = pcgr_data$biomarkers,
+  alteration_types = "CNA",
+  ontology =
+    pcgr_data$phenotype$oncotree,
+  origin = "Somatic",
+  tumor_type_specificity = "any")
+
+
+
+## Get all clinical evidence items that are related to
+## tumor suppressor genes/oncogenes/drug targets (NOT tumor-type specific)
+biomarker_hits_cna_any <-
+  pcgrr::get_clin_assocs_cna(
+    onco_ts_sets,
+    annotation_tags = pcgr_data$annotation_tags,
+    eitems = eitems_any_tt)
+
+pcg_report_cna[["clin_eitem"]][["any_ttype"]] <-
+  biomarker_hits_cna_any[["clin_eitem"]]
+pcg_report_cna[["variant_set"]][["tier2"]] <-
+  biomarker_hits_cna_any$variant_set
+
+## Get all clinical evidence items that
+## overlap query set (if tumor type is specified)
+if (tumor_type != "Cancer, NOS") \{
+
+  ## load tumor-type specific evidence items ()
+  eitems_specific_tt <- pcgrr::load_eitems(
+    eitems_raw = pcgr_data$biomarkers,
+    alteration_types = "CNA",
+    ontology =
+      pcgr_data$phenotype$oncotree,
+    origin = "Somatic",
+    tumor_type_specificity = "specific",
+    tumor_type = tumor_type)
+
+  biomarker_hits_cna_specific <-
+    pcgrr::get_clin_assocs_cna(
+      onco_ts_sets,
+      annotation_tags = pcgr_data$annotation_tags,
+      eitems = eitems_specific_tt)
+
+  ## Assign putative TIER 1 variant set
+  pcg_report_cna[["clin_eitem"]][["query_ttype"]] <-
     biomarker_hits_cna_specific$clin_eitem
   pcg_report_cna[["variant_set"]][["tier1"]] <-
     biomarker_hits_cna_specific$variant_set
diff --git a/pcgrr/man/get_population_tag.Rd b/pcgrr/man/get_population_tag.Rd
deleted file mode 100644
index 161947c6..00000000
--- a/pcgrr/man/get_population_tag.Rd
+++ /dev/null
@@ -1,23 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/germline.R
-\name{get_population_tag}
-\alias{get_population_tag}
-\title{Function that retrieves name of VCF INFO tag and
-population description for gnomad/1000G population}
-\usage{
-get_population_tag(population_code, db = "1KG", subset = NA)
-}
-\arguments{
-\item{population_code}{three-letter code}
-
-\item{db}{1KG or GNOMAD}
-
-\item{subset}{NA or "non_cancer" (for GNOMAD)}
-}
-\value{
-pop_tag_info
-}
-\description{
-Function that retrieves name of VCF INFO tag and
-population description for gnomad/1000G population
-}
diff --git a/pcgrr/man/get_prevalent_site_signatures.Rd b/pcgrr/man/get_prevalent_site_signatures.Rd
index 4d67dfa8..489e52aa 100644
--- a/pcgrr/man/get_prevalent_site_signatures.Rd
+++ b/pcgrr/man/get_prevalent_site_signatures.Rd
@@ -3,12 +3,12 @@
 \name{get_prevalent_site_signatures}
 \alias{get_prevalent_site_signatures}
 \title{Function that retrieves prevalent signatures for a given tumor type/primary site
-Data is collected from COSMIC v3.2.}
+Data is collected from COSMIC v3.4.}
 \usage{
 get_prevalent_site_signatures(
   site = "Any",
   custom_collection = NULL,
-  pcgr_data = NULL,
+  ref_data = NULL,
   min_prevalence_pct = 5,
   incl_poss_artifacts = T
 )
@@ -18,7 +18,7 @@ get_prevalent_site_signatures(
 
 \item{custom_collection}{Custom collection of signatures from COSMIC}
 
-\item{pcgr_data}{PCGR data object}
+\item{ref_data}{PCGR reference data object}
 
 \item{min_prevalence_pct}{Minimum prevalence (pct) of signature in
 cohorts associated with primary site -
@@ -29,5 +29,5 @@ are to be included}
 }
 \description{
 Function that retrieves prevalent signatures for a given tumor type/primary site
-Data is collected from COSMIC v3.2.
+Data is collected from COSMIC v3.4.
 }
diff --git a/pcgrr/man/get_proper_maf_alleles.Rd b/pcgrr/man/get_proper_maf_alleles.Rd
deleted file mode 100644
index 80b2ef37..00000000
--- a/pcgrr/man/get_proper_maf_alleles.Rd
+++ /dev/null
@@ -1,23 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/mutation.R
-\name{get_proper_maf_alleles}
-\alias{get_proper_maf_alleles}
-\title{Function that transforms a tier-structured variant data frame
-into a MAF-like data frame (for input to 2020plus, MutSigCV)}
-\usage{
-get_proper_maf_alleles(maf_df, genome_seq, seqinfo)
-}
-\arguments{
-\item{maf_df}{data frame with somatic mutations}
-
-\item{genome_seq}{BSgenome object}
-
-\item{seqinfo}{seqinfo object}
-}
-\value{
-maf_all
-}
-\description{
-Function that transforms a tier-structured variant data frame
-into a MAF-like data frame (for input to 2020plus, MutSigCV)
-}
diff --git a/pcgrr/man/init_kataegis_content.Rd b/pcgrr/man/init_kataegis_content.Rd
new file mode 100644
index 00000000..0d39335c
--- /dev/null
+++ b/pcgrr/man/init_kataegis_content.Rd
@@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/report.R
+\name{init_kataegis_content}
+\alias{init_kataegis_content}
+\title{Function that initiates report element with kataegis information}
+\usage{
+init_kataegis_content()
+}
+\description{
+Function that initiates report element with kataegis information
+}
diff --git a/pcgrr/man/init_msi_content.Rd b/pcgrr/man/init_msi_content.Rd
new file mode 100644
index 00000000..da404e1a
--- /dev/null
+++ b/pcgrr/man/init_msi_content.Rd
@@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/report.R
+\name{init_msi_content}
+\alias{init_msi_content}
+\title{Function that initiates report element with MSI classification}
+\usage{
+init_msi_content()
+}
+\description{
+Function that initiates report element with MSI classification
+}
diff --git a/pcgrr/man/load_dna_variants.Rd b/pcgrr/man/load_dna_variants.Rd
index c01b72af..ac17e3b9 100644
--- a/pcgrr/man/load_dna_variants.Rd
+++ b/pcgrr/man/load_dna_variants.Rd
@@ -3,28 +3,34 @@
 \name{load_dna_variants}
 \alias{load_dna_variants}
 \title{Function that reads and validates CNA or SNV/InDel TSV files
-file from PCGR/CPSR pre-report pipeline}
+file from PCGR/CPSR pre-report (Python) pipeline}
 \usage{
 load_dna_variants(
   fname = NA,
   cols = NULL,
   ref_data = NULL,
+  vartype = "snv_indel",
+  primary_site = "Any",
   retained_info_tags = "None",
   variant_origin = "Somatic"
 )
 }
 \arguments{
-\item{fname}{Path to raw file with DNA aberrations (PCGR/CPSR)}
+\item{fname}{Path to raw input file with DNA aberrations (PCGR/CPSR)}
 
-\item{cols}{column type definitions of input}
+\item{cols}{column type definitions of raw input file}
 
 \item{ref_data}{reference data object}
 
+\item{vartype}{type of DNA aberrations ('snv_indel','cna')}
+
+\item{primary_site}{primary site of tumor}
+
 \item{retained_info_tags}{VCF INFO tags to be retained in output (SNVs/InDels)}
 
 \item{variant_origin}{Germline/Somatic}
 }
 \description{
 Function that reads and validates CNA or SNV/InDel TSV files
-file from PCGR/CPSR pre-report pipeline
+file from PCGR/CPSR pre-report (Python) pipeline
 }
diff --git a/pcgrr/man/load_somatic_cna.Rd b/pcgrr/man/load_somatic_cna.Rd
index 6c37394f..65db2c4f 100644
--- a/pcgrr/man/load_somatic_cna.Rd
+++ b/pcgrr/man/load_somatic_cna.Rd
@@ -5,12 +5,14 @@
 \title{Function that reads and validates a fully annotated CNA file from PCGR
 pre-report pipeline}
 \usage{
-load_somatic_cna(fname, ref_data = NULL)
+load_somatic_cna(fname, ref_data = NULL, settings = NULL)
 }
 \arguments{
 \item{fname}{Path to file name}
 
 \item{ref_data}{Object with reference data}
+
+\item{settings}{Object with PCGR report configuration}
 }
 \description{
 Function that reads and validates a fully annotated CNA file from PCGR
diff --git a/pcgrr/man/make_upset_plot_data.Rd b/pcgrr/man/make_upset_plot_data.Rd
index 97927100..d3c0a0ee 100644
--- a/pcgrr/man/make_upset_plot_data.Rd
+++ b/pcgrr/man/make_upset_plot_data.Rd
@@ -2,9 +2,8 @@
 % Please edit documentation in R/germline.R
 \name{make_upset_plot_data}
 \alias{make_upset_plot_data}
-\title{Function that makes input data for an UpSet plot
-(filtering/intersection results) for the somatic-germline
-classification procedure}
+\title{Function that retrieves name of VCF INFO tag and
+population description for gnomad/1000G population}
 \usage{
 make_upset_plot_data(calls, config)
 }
diff --git a/pcgrr/man/predict_msi_status.Rd b/pcgrr/man/predict_msi_status.Rd
index 8dc47f0c..1b172054 100644
--- a/pcgrr/man/predict_msi_status.Rd
+++ b/pcgrr/man/predict_msi_status.Rd
@@ -5,8 +5,8 @@
 \title{Function that predicts MSI status based on fraction of indels among calls}
 \usage{
 predict_msi_status(
-  vcf_data_df,
-  pcgr_data,
+  variant_set,
+  ref_data,
   msi_prediction_model,
   msi_prediction_dataset,
   target_size_mb,
@@ -14,9 +14,9 @@ predict_msi_status(
 )
 }
 \arguments{
-\item{vcf_data_df}{data frame with somatic mutations/indels}
+\item{variant_set}{data frame with somatic mutations/indels}
 
-\item{pcgr_data}{object with PCGR datasets}
+\item{ref_data}{PCGR reference data object}
 
 \item{msi_prediction_model}{statistical model for MSI prediction}
 
diff --git a/pcgrr/man/write_report_output.Rd b/pcgrr/man/write_report_output.Rd
index 3a628bb3..82633a4a 100644
--- a/pcgrr/man/write_report_output.Rd
+++ b/pcgrr/man/write_report_output.Rd
@@ -1,10 +1,17 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/main.R
+% Please edit documentation in R/main.R, R/main2.R
 \name{write_report_output}
 \alias{write_report_output}
 \title{Function that writes contents of PCGR object to various output formats
 (Rmarkdown/flexdashboard HTML reports, JSON, tab-separated etc)}
 \usage{
+write_report_output(
+  report,
+  tier_model = "pcgr_acmg",
+  output_format = "html",
+  flexdb = FALSE
+)
+
 write_report_output(
   report,
   tier_model = "pcgr_acmg",
@@ -23,6 +30,9 @@ write_report_output(
 \item{flexdb}{logical indicating if HTML output should be dashboard}
 }
 \description{
+Function that writes contents of PCGR object to various output formats
+(Rmarkdown/flexdashboard HTML reports, JSON, tab-separated etc)
+
 Function that writes contents of PCGR object to various output formats
 (Rmarkdown/flexdashboard HTML reports, JSON, tab-separated etc)
 }
diff --git a/scripts/pcgrr.R b/scripts/pcgrr.R
index 0253b08e..155e8b14 100755
--- a/scripts/pcgrr.R
+++ b/scripts/pcgrr.R
@@ -1,28 +1,36 @@
 #!/usr/bin/env Rscript
 
+options(warn=-1)
 .libPaths(R.home("library")) # use conda R pkgs, not e.g. user's local installation
 
-suppressWarnings(suppressPackageStartupMessages(library(argparse)))
 suppressWarnings(suppressPackageStartupMessages(library(pcgrr)))
-suppressWarnings(suppressPackageStartupMessages(library(stringr)))
+suppressWarnings(suppressPackageStartupMessages(library(log4r)))
+suppressWarnings(suppressPackageStartupMessages(library(argparse)))
 
+args <- commandArgs(trailingOnly=TRUE)
 
-# my_log4r_layout <- function(level, ...) {
-#   paste0(format(Sys.time()), " - pcgr-report-generation - ",
-#          level, " - ", ..., "\n", collapse = "")
-# }
+yaml_fname <- as.character(args[1])
+
+my_log4r_layout <- function(level, ...) {
+  paste0(format(Sys.time()), " - pcgr-report-generation - ",
+         level, " - ", ..., "\n", collapse = "")
+}
+
+log4r_logger <-
+  log4r::logger(
+    threshold = "INFO", appenders = log4r::console_appender(my_log4r_layout))
 
-# log4r_logger <- log4r::logger(threshold = "INFO",
-#                               appenders = log4r::console_appender(my_log4r_layout))
+# this gets passed on to all the log4r_* functions inside the pkg
+options("PCGRR_LOG4R_LOGGER" = log4r_logger)
 
-# # this gets passed on to all the log4r_* functions inside the pkg
-# options("PCGRR_LOG4R_LOGGER" = log4r_logger)
+yaml_fname <- "/Users/sigven/project_data/packages/package__pcgr/bundle_update_2023/pcgr/tumor_sample.BRCA.pcgr_acmg.grch38.conf.yaml"
 
 
-# pcg_report <- NULL
+## Generate report content
+pcg_report <- pcgrr::generate_pcgr_report2(
+  yaml_fname = yaml_fname
+)
 
-# defaultW <- getOption("warn")
-# options(warn = -1)
 
 # # ## Generate report object
 # pcg_report <-