# Making DBS input for SigProfiler

In [1]:
import io
import os
from tqdm import tqdm
import pysam
import numpy as np
import pandas as pd
import gzip as gz
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker
from matplotlib_venn import venn2, venn3
import sigProfilerPlotting as sigPlt
from SigProfilerAssignment import Analyzer as Analyze

sns.set_theme(font="Arial", font_scale=1.15, style='ticks') 
matplotlib.rcParams['figure.dpi'] = 150
plt.rc("axes.spines", top=True, right=True)

def read_vcf(path):
    if path[-3:] == ".gz": 
        with gz.open(path, 'rb') as f:
            lines = [l.decode('utf-8') for l in f if not l.startswith(b'##')]
            return pd.read_csv(
                io.StringIO(''.join(lines)),
                dtype={'#CHROM': str, 'POS': int, 'ID': str, 'REF': str, 'ALT': str,
                       'QUAL': str, 'FILTER': str, 'INFO': str},
                       sep='\t'
                       ).rename(columns={'#CHROM': 'CHROM'})
    else:
        with open(path, 'r') as f:
            lines = [l for l in f if not l.startswith('##')]
            return pd.read_csv(
                io.StringIO(''.join(lines)),
                dtype={'#CHROM': str, 'POS': int, 'ID': str, 'REF': str, 'ALT': str,
                       'QUAL': str, 'FILTER': str, 'INFO': str},
                       sep='\t'
                       ).rename(columns={'#CHROM': 'CHROM'})

def reverse_complement(string):
    try:
        complement_dict = {'A':'T', 'T':'A', 'G':'C', 'C':'G'}
        complement_string = ''.join([complement_dict[s] for s in string])
    except KeyError:
        raise ValueError("Invalid character other than A,T,G and C")
    return complement_string[::-1]

def dbs_context_change(string):
    # GC>CT => GC>AG
    # 5'-GC-3' > 5'-CT-3' 
    # 3'-CG-5' > 3'-GA-5' ==> 5'-GC-3' > 5'-AG-3'
    if len(string) != 5 or string[2] != '>':
        raise ValueError("Input string must be in the format 'NN>NN'")
    
    string_pair = string.split('>')
    new_string = reverse_complement(string_pair[0]) + '>' + reverse_complement(string_pair[1])
    
    return new_string

dir="/mmfs1/gscratch/stergachislab/mhsohny/SMaHT/Improving_SomaticVariantCalling_through_DSA/Fiber-seq"
DSA="/mmfs1/gscratch/stergachislab/mhsohny/SMaHT/DSA/DSA_COLO829BL_v3.0.0.fasta"

DSA_pysam = pysam.FastaFile(f"{DSA}")

colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt = read_vcf(f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/COLO829T_PassageB_DSA.deepvariant.split.snv.modified.final.vcf.gz")
colota_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt = read_vcf(f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/COLO829T_PassageA_DSA.deepvariant.split.snv.modified.final.vcf.gz")

colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt['SNVid'] = colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt[['CHROM', 'POS', 'REF', 'ALT']].astype(str).apply('_'.join, axis=1)
colota_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt['SNVid'] = colota_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt[['CHROM', 'POS', 'REF', 'ALT']].astype(str).apply('_'.join, axis=1)

colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt_set = set(colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt['SNVid'].values)
colota_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt_set = set(colota_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt['SNVid'].values)

tba_FlaggerHap_glfilt_pileupfilt_tba_set = colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt_set.intersection(colota_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt_set)
tb_FlaggerHap_glfilt_pileupfilt_onlytb_set = colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt_set.difference(colota_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt_set)
ta_FlaggerHap_glfilt_pileupfilt_onlyta_set = colota_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt_set.difference(colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt_set)

print(f"The number of Shared SNVs between Passage B and A: {len(tba_FlaggerHap_glfilt_pileupfilt_tba_set):,}")
print(f"The number of B specific SNVs: {len(tb_FlaggerHap_glfilt_pileupfilt_onlytb_set):,}")
print(f"The number of A specific SNVs: {len(ta_FlaggerHap_glfilt_pileupfilt_onlyta_set):,}")

print(f"The number of DBS variants for Passage B: {colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt[colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt.groupby('CHROM')['POS'].diff() == 1].shape[0]:,}")
print(f"The number of DBS variants for Passage B: {colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt[colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt.groupby('CHROM')['POS'].diff().shift(-1) == 1].shape[0]:,}")

print(f"The number of DBS variants for Passage A: {colota_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt[colota_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt.groupby('CHROM')['POS'].diff() == 1].shape[0]:,}")
print(f"The number of DBS variants for Passage A: {colota_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt[colota_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt.groupby('CHROM')['POS'].diff().shift(-1) == 1].shape[0]:,}")

print(f"The number of DBS variants for Passage B: {colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt[colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt.groupby('CHROM')['POS'].diff() == 1].shape[0]:,}")
print(f"The number of DBS variants for Passage B: {colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt[colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt.groupby('CHROM')['POS'].diff().shift(-1) == 1].shape[0]:,}")


The number of Shared SNVs between Passage B and A: 51,678
The number of B specific SNVs: 6,577
The number of A specific SNVs: 3,781
The number of DBS variants for Passage B: 1,175
The number of DBS variants for Passage B: 1,175
The number of DBS variants for Passage A: 1,097
The number of DBS variants for Passage A: 1,097
The number of DBS variants for Passage B: 1,175
The number of DBS variants for Passage B: 1,175


In [2]:
def dbs78sig_maker(df: pd.core.frame.DataFrame, cram: pysam.libcalignmentfile.AlignmentFile, outdir: str, prefix: str) -> pd.core.frame.DataFrame:
    """
    Parameters
    ----------
    
    df : pandas.core.frame.DataFrame
        vcf read through read_vcf()
    cram : pysam.libcalignmentfile.AlignmentFile
        cram file read using pysam
    prefix : str
        file name prefix
    outdir : str
        directory to write output
    Write dbs78 input for SigProfiler

    Example
    ----------
    MutationType    COLO829T_PassageB_DSA
    AC>CA   1
    AC>CG   0
    AC>CT   0
    """

    sampleid = df.columns[9]

    dbs78_sigprofiler = ('AC>CA', 'AC>CG', 'AC>CT', 'AC>GA', 'AC>GG', 'AC>GT', 'AC>TA', 'AC>TG', 'AC>TT', 'AT>CA', 'AT>CC', 'AT>CG', 'AT>GA', 'AT>GC', 'AT>TA', 'CC>AA', 'CC>AG', 'CC>AT', 'CC>GA', 'CC>GG', 'CC>GT', 'CC>TA', 'CC>TG', 'CC>TT', 'CG>AT', 'CG>GC', 'CG>GT', 'CG>TA', 'CG>TC', 'CG>TT', 'CT>AA', 'CT>AC', 'CT>AG', 'CT>GA', 'CT>GC', 'CT>GG', 'CT>TA', 'CT>TC', 'CT>TG', 'GC>AA', 'GC>AG', 'GC>AT', 'GC>CA', 'GC>CG', 'GC>TA', 'TA>AT', 'TA>CG', 'TA>CT', 'TA>GC', 'TA>GG', 'TA>GT', 'TC>AA', 'TC>AG', 'TC>AT', 'TC>CA', 'TC>CG', 'TC>CT', 'TC>GA', 'TC>GG', 'TC>GT', 'TG>AA', 'TG>AC', 'TG>AT', 'TG>CA', 'TG>CC', 'TG>CT', 'TG>GA', 'TG>GC', 'TG>GT', 'TT>AA', 'TT>AC', 'TT>AG', 'TT>CA', 'TT>CC', 'TT>CG', 'TT>GA', 'TT>GC', 'TT>GG')
    dbs78_sigprofiler = dict.fromkeys(dbs78_sigprofiler, 0)

    df = df[df.groupby("CHROM")['POS'].diff().shift(-1) == 1] # Preceding one of the two DBS SNVs

    for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Processing Dinucleotides"):
        contig, start, end = row['CHROM'], row['POS'], row['POS']+1
        
        ref_dinucleotide = DSA_pysam.fetch(contig, start-1, end)

        for read in cram.fetch(contig, start, end):
            try:
                ref_positions = read.get_reference_positions(full_length=True)
                read_dinucleotide = read.query_sequence[ref_positions.index(start-1): ref_positions.index(end)]

                if len(read_dinucleotide) == 2: # len(read_dinucleotide) != 2 -> insertion
                    dbs = f"{ref_dinucleotide}>{read_dinucleotide}"
                    
                    if dbs in dbs78_sigprofiler:
                        dbs78_sigprofiler[dbs] += 1
                    elif dbs_context_change(dbs) in dbs78_sigprofiler:
                        dbs78_sigprofiler[dbs_context_change(dbs)] += 1

            except ValueError:
                # -> deletion
                pass
    new_df = pd.DataFrame(dbs78_sigprofiler, index=[0]).T
    new_df.columns = [sampleid]
    new_df.index.name = "MutationType"
    
    new_df.to_csv(f"{outdir}/{prefix}.DBS78.all", sep="\t")

In [10]:
#colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt[colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt.groupby("CHROM")['POS'].diff().shift(-1) == 1]
df1 = colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt[colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt['SNVid'].isin(tba_FlaggerHap_glfilt_pileupfilt_tba_set)]
print(df1[df1.groupby("CHROM")['POS'].diff().shift(-1) == 1].shape[0])

df2 = colota_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt[colota_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt['SNVid'].isin(tba_FlaggerHap_glfilt_pileupfilt_tba_set)]
print(df2[df2.groupby("CHROM")['POS'].diff().shift(-1) == 1].shape[0])

51678
1013
51678
1013


In [None]:
# Passage B All
colotb_cram = pysam.AlignmentFile(f"{dir}/COLO829T_PassageB/COLO829T_PassageB_DSA.cram", "rc")
colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt

dbs78sig_maker(colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt, colotb_cram, f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS", "COLO829T_PassageB_DSA_All")

Processing Dinucleotides: 100%|██████████| 1175/1175 [02:28<00:00,  7.90it/s]


In [None]:
# Passage A All
colota_cram = pysam.AlignmentFile(f"{dir}/COLO829T_PassageA/COLO829T_PassageA_DSA.cram", "rc")
colota_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt

dbs78sig_maker(colota_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt, colota_cram, f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS", "COLO829T_PassageA_DSA_All")

Processing Dinucleotides: 100%|██████████| 1097/1097 [01:48<00:00, 10.11it/s]


In [None]:
sigPlt.plotDBS(matrix_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/COLO829T_PassageB_DSA_All.DBS78.all",
               output_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/All",
               project="COLO829T_PassageB_DSA_All.percentage",
               plot_type="78",
               savefig_format="pdf",
               percentage=True)

sigPlt.plotDBS(matrix_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/COLO829T_PassageB_DSA_All.DBS78.all",
               output_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/All",
               project="COLO829T_PassageB_DSA_All",
               plot_type="78",
               savefig_format="pdf",
               percentage=False)

sigPlt.plotDBS(matrix_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/COLO829T_PassageA_DSA_All.DBS78.all",
               output_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/All",
               project="COLO829T_PassageA_DSA_All.percentage",
               plot_type="78",
               savefig_format="pdf",
               percentage=True)

sigPlt.plotDBS(matrix_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/COLO829T_PassageA_DSA_All.DBS78.all",
               output_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/All",
               project="COLO829T_PassageA_DSA_All",
               plot_type="78",
               savefig_format="pdf",
               percentage=False)

In [None]:
os.system(f"mkdir -p {dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/All/TB") 
Analyze.cosmic_fit(
    f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/COLO829T_PassageB_DSA_All.DBS78.all", 
    f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/All/TB", 
    input_type="matrix", 
    context_type="DINUC", 
    collapse_to_SBS96=False, 
    cosmic_version=3.4, 
    exome=False,
    genome_build="GRCh38", 
    signature_database=None,
    exclude_signature_subgroups=None, 
    export_probabilities=True,
    export_probabilities_per_mutation=False, 
    make_plots=True,
    sample_reconstruction_plots="pdf", 
    verbose=False)

os.system(f"mkdir -p {dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/All/TA") 
Analyze.cosmic_fit(
    f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/COLO829T_PassageA_DSA_All.DBS78.all", 
    f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/All/TA", 
    input_type="matrix", 
    context_type="DINUC", 
    collapse_to_SBS96=False, 
    cosmic_version=3.4, 
    exome=False,
    genome_build="GRCh38", 
    signature_database=None,
    exclude_signature_subgroups=None, 
    export_probabilities=True,
    export_probabilities_per_mutation=False, 
    make_plots=True,
    sample_reconstruction_plots="pdf", 
    verbose=False)

Assigning COSMIC sigs or Signature Database ...... 
|████████████████████████████████████████| 1/1 [100%] in 0.0s (38.59/s) 


 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 
Assigning COSMIC sigs or Signature Database ...... 
|████████████████████████████████████████| 1/1 [100%] in 0.0s (39.84/s) 


 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 


In [None]:
sharedb = colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt[colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt['SNVid'].isin(tba_FlaggerHap_glfilt_pileupfilt_tba_set)]
shareda = colota_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt[colota_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt['SNVid'].isin(tba_FlaggerHap_glfilt_pileupfilt_tba_set)]
bspecific = colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt[colotb_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt['SNVid'].isin(tb_FlaggerHap_glfilt_pileupfilt_onlytb_set)]
aspecific = colota_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt[colota_snvs_pass_annot_FlaggerHap_glfilt_pileupfilt['SNVid'].isin(ta_FlaggerHap_glfilt_pileupfilt_onlyta_set)]

colotb_cram = pysam.AlignmentFile(f"{dir}/COLO829T_PassageB/COLO829T_PassageB_DSA.cram", "rc")
colota_cram = pysam.AlignmentFile(f"{dir}/COLO829T_PassageA/COLO829T_PassageA_DSA.cram", "rc")

dbs78sig_maker(sharedb, colotb_cram, f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS", "COLO829T_PassageB_DSA_Shared")
dbs78sig_maker(shareda, colota_cram, f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS", "COLO829T_PassageA_DSA_Shared")

dbs78sig_maker(bspecific, colotb_cram, f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS", "COLO829T_PassageB_DSA_B-Specific")
dbs78sig_maker(aspecific, colota_cram, f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS", "COLO829T_PassageA_DSA_A-Specific")

Processing Dinucleotides: 100%|██████████| 1013/1013 [02:19<00:00,  7.27it/s]
Processing Dinucleotides: 100%|██████████| 1013/1013 [01:45<00:00,  9.64it/s]
Processing Dinucleotides: 100%|██████████| 158/158 [00:13<00:00, 11.64it/s]
Processing Dinucleotides: 100%|██████████| 75/75 [00:06<00:00, 11.17it/s]


In [7]:
sigPlt.plotDBS(matrix_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/COLO829T_PassageB_DSA_Shared.DBS78.all",
               output_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/Shared",
               project="COLO829T_PassageB_DSA_Shared.percentage",
               plot_type="78",
               savefig_format="pdf",
               percentage=True)

sigPlt.plotDBS(matrix_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/COLO829T_PassageB_DSA_Shared.DBS78.all",
               output_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/Shared",
               project="COLO829T_PassageB_DSA_Shared",
               plot_type="78",
               savefig_format="pdf",
               percentage=False)

sigPlt.plotDBS(matrix_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/COLO829T_PassageB_DSA_B-Specific.DBS78.all",
               output_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/B-Specific",
               project="COLO829T_PassageB_DSA_B-Specific.percentage",
               plot_type="78",
               savefig_format="pdf",
               percentage=True)

sigPlt.plotDBS(matrix_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/COLO829T_PassageB_DSA_B-Specific.DBS78.all",
               output_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/B-Specific",
               project="COLO829T_PassageB_DSA_B-Specific",
               plot_type="78",
               savefig_format="pdf",
               percentage=False)

sigPlt.plotDBS(matrix_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/COLO829T_PassageA_DSA_Shared.DBS78.all",
               output_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/Shared",
               project="COLO829T_PassageA_DSA_Shared.percentage",
               plot_type="78",
               savefig_format="pdf",
               percentage=True)

sigPlt.plotDBS(matrix_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/COLO829T_PassageA_DSA_Shared.DBS78.all",
               output_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/Shared",
               project="COLO829T_PassageA_DSA_Shared",
               plot_type="78",
               savefig_format="pdf",
               percentage=False)

sigPlt.plotDBS(matrix_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/COLO829T_PassageA_DSA_A-Specific.DBS78.all",
               output_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/A-Specific",
               project="COLO829T_PassageA_DSA_A-Specific.percentage",
               plot_type="78",
               savefig_format="pdf",
               percentage=True)

sigPlt.plotDBS(matrix_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/COLO829T_PassageA_DSA_A-Specific.DBS78.all",
               output_path=f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/A-Specific",
               project="COLO829T_PassageA_DSA_A-Specific",
               plot_type="78",
               savefig_format="pdf",
               percentage=False)

In [8]:
# Shared (Passage B)
os.system(f"mkdir -p {dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/Shared/TB") 
Analyze.cosmic_fit(
    f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/COLO829T_PassageB_DSA_Shared.DBS78.all", 
    f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/Shared/TB", 
    input_type="matrix", 
    context_type="DINUC", 
    collapse_to_SBS96=False, 
    cosmic_version=3.4, 
    exome=False,
    genome_build="GRCh38", 
    signature_database=None,
    exclude_signature_subgroups=None, 
    export_probabilities=True,
    export_probabilities_per_mutation=False, 
    make_plots=True,
    sample_reconstruction_plots="pdf", 
    verbose=False)

# B-Specific
os.system(f"mkdir -p {dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/B-Specific/TB") 
Analyze.cosmic_fit(
    f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/COLO829T_PassageB_DSA_B-Specific.DBS78.all", 
    f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/B-Specific/TB", 
    input_type="matrix", 
    context_type="DINUC", 
    collapse_to_SBS96=False, 
    cosmic_version=3.4, 
    exome=False,
    genome_build="GRCh38", 
    signature_database=None,
    exclude_signature_subgroups=None, 
    export_probabilities=True,
    export_probabilities_per_mutation=False, 
    make_plots=True,
    sample_reconstruction_plots="pdf", 
    verbose=False)

# Shared (Passage A)
os.system(f"mkdir -p {dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/Shared/TA") 
Analyze.cosmic_fit(
    f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/COLO829T_PassageA_DSA_Shared.DBS78.all", 
    f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/Shared/TA", 
    input_type="matrix", 
    context_type="DINUC", 
    collapse_to_SBS96=False, 
    cosmic_version=3.4, 
    exome=False,
    genome_build="GRCh38", 
    signature_database=None,
    exclude_signature_subgroups=None, 
    export_probabilities=True,
    export_probabilities_per_mutation=False, 
    make_plots=True,
    sample_reconstruction_plots="pdf", 
    verbose=False)

# A-Specific
os.system(f"mkdir -p {dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/A-Specific/TA") 
Analyze.cosmic_fit(
    f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/COLO829T_PassageA_DSA_A-Specific.DBS78.all", 
    f"{dir}/VariantCalls_DeepVariant_1.6.1/Mutational_Spectrum/02.DBS/A-Specific/TA", 
    input_type="matrix", 
    context_type="DINUC", 
    collapse_to_SBS96=False, 
    cosmic_version=3.4, 
    exome=False,
    genome_build="GRCh38", 
    signature_database=None,
    exclude_signature_subgroups=None, 
    export_probabilities=True,
    export_probabilities_per_mutation=False, 
    make_plots=True,
    sample_reconstruction_plots="pdf", 
    verbose=False)


Assigning COSMIC sigs or Signature Database ...... 
|████████████████████████████████████████| 1/1 [100%] in 0.2s (4.93/s) 


 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 
Assigning COSMIC sigs or Signature Database ...... 
|████████████████████████████████████████| 1/1 [100%] in 0.0s (20.25/s) 


 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 
Assigning COSMIC sigs or Signature Database ...... 
|████████████████████████████████████████| 1/1 [100%] in 0.0s (45.01/s) 


 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 
Assigning COSMIC sigs or Signature Database ...... 
|████████████████████████████████████████| 1/1 [100%] in 0.1s (18.89/s) 


 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 
