In [1]:
from SigProfilerExtractor import sigpro as sig
import pandas as pd
from Bio.Seq import Seq

#from SigProfilerMatrixGenerator import install as genInstall
#genInstall.install("GRCh38")
path_to_example_table = sig.importdata("matrix")
data = path_to_example_table 
# This "data" variable can be used as a parameter of the "project" argument of the sigProfilerExtractor function.

In [3]:
from os import rename


def convert_mutyper_mutation(mutation):
    """Convert a MuTyper mutation to a SigProfiler mutation."""
    if mutation[1] == 'A':
        first = Seq(mutation[0:3]).reverse_complement()
        second = Seq(mutation[4:7]).reverse_complement()
        mutation = first + ">" + second 
    c_base = mutation[1]
    m_base = mutation[5]
    new = f"{mutation[0]}[{c_base}>{m_base}]{mutation[2]}"
    #print(mutation, new)
    return new

def read_mutyper_file(mutyper_path, tag = None, write = True):        
    df = pd.read_csv(mutyper_path, sep="\t")
    df.set_index("sample", inplace=True)
    df = df.transpose()
    if tag is not None:
        df.columns = [f"{col}_{tag}" for col in df.columns]
    df.insert(0,"Mutation Types", df.index.to_series().apply(convert_mutyper_mutation))
    df.sort_values("Mutation Types", inplace=True)
    df.reset_index(drop=True, inplace=True)
    if write:
        df.to_csv(f"{mutyper_path}.cosmic.txt", sep="\t", index=False)
    return f"{mutyper_path}.cosmic.txt"

mutyper_path = "../data/mutyper-results-no-igc/spectra/stratify/Unique_spectra.txt"
unique = read_mutyper_file(mutyper_path, tag="Unique")

mutyper_path = "../data/mutyper-results-no-igc/spectra/stratify/SD_spectra.txt"
sd = read_mutyper_file(mutyper_path, tag="SD")

mutyper_path = "../data/mutyper-results-no-igc/spectra/stratify/SDnoIGC_spectra.txt"
sd_no_igc = read_mutyper_file(mutyper_path, tag="SD_no_IGC")

mutyper_path = "../data/mutyper-results-no-igc/spectra/stratify/SDIGC_spectra.txt"
igc = read_mutyper_file(mutyper_path, tag="IGC")


In [4]:
pd.read_csv(unique, sep="\t").head()

Unnamed: 0,Mutation Types,HG002_Unique,HG00438_Unique,HG005_Unique,HG00621_Unique,HG00673_Unique,HG00733_Unique,HG00735_Unique,HG00741_Unique,HG01071_Unique,...,HG03579_Unique,NA18906_Unique,NA19240_Unique,NA20129_Unique,NA21309_Unique,CHM1_Unique,GRCh38_Unique,HG00514_Unique,HG03125_Unique,NA12878_Unique
0,A[C>A]A,55223,55398,55011,55061,55085,55391,55270,54941,55112,...,55201,55875,55264,55396,55256,55484,55108,55071,55294,55040
1,A[C>A]C,53046,53193,53041,53028,53041,53215,53172,52846,52971,...,52786,53350,53010,53178,52737,53522,53132,52645,52422,52874
2,A[C>A]G,13496,13451,13424,13405,13373,13490,13533,13392,13611,...,13323,13488,13543,13490,13307,13574,13474,13101,13274,13566
3,A[C>A]T,27085,27103,27113,27013,26940,27040,27231,27001,27074,...,27185,27213,26931,27141,26836,27258,27218,26910,26582,27173
4,A[C>G]A,44060,43993,43978,44210,43896,43853,44023,44219,43855,...,43793,43969,43947,43925,43779,44518,44210,43602,43293,43830


In [8]:

for model_name, model in {"Unique":unique, "SD":sd, "SD-no-IGC":sd_no_igc, "IGC":igc}.items():
    print(model_name)
    sig.sigProfilerExtractor("matrix",
                            f"sigprof_results/{model_name}", 
                            model, 
                            reference_genome="GRCh38",
                            minimum_signatures=2,
                            maximum_signatures=6,
                            nmf_replicates=100,
                            cpu=80)


************** Reported Current Memory Use: 0.42 GB *****************

Extracting signature 2 for mutation type 96
The matrix normalizing cutoff is 5830769


process 3 continues please wait... 
execution time: 49 seconds 

process 3 continues please wait... 
execution time: 50 seconds 

process 3 continues please wait... 
execution time: 50 seconds 

process 3 continues please wait... 
execution time: 52 seconds 

process 3 continues please wait... 
execution time: 53 seconds 

process 3 continues please wait... 
execution time: 54 seconds 

process 3 continues please wait... 
execution time: 54 seconds 

process 3 continues please wait... 
execution time: 60 seconds 

process 3 continues please wait... 
execution time: 60 seconds 

process 3 continues please wait... 
execution time: 34 seconds 

process 3 continues please wait... 
execution time: 67 seconds 

process 3 continues please wait... 
execution time: 68 seconds 

process 3 continues please wait... 
execution time: 20 second

KeyboardInterrupt: 