In [None]:
import pickle
import gzip
import sys
sys.path.append('./src')

import numpy as np
import pandas as pd

In [None]:
%load_ext autoreload
%autoreload 2

## preprocessing

In [None]:
from load_preprocess import load_preprocess_sa, load_preprocess_sp

# load and preprocess SignatureAnalyzer signatures and activities

signatures_SA, activities_SA, signatures_SA_dict = load_preprocess_sa()

# load and preprocess SigProfiler signatures

signatures_SP, signatures_SP_dict = load_preprocess_sp()

# pool signatures from both catalogues

signatures = pd.concat([signatures_SA, signatures_SP.loc[:, [c for c in signatures_SP.columns if c not in signatures_SA.columns]]], axis=1, sort=False)

# tumor type iterable

tumor_types = np.unique(list(map(lambda x: x.split('__')[0], activities_SA.columns)))

In [None]:
with gzip.open('./data/signatures.pickle.gz', 'wb') as f:
    pickle.dump(signatures, f)

## get tables of activities reconstructed by SignatureAnalyzer

In [None]:
from load_preprocess import retrieve_activities

In [None]:
# breast-lung-colon mixed cohorts activities

tumor_types = ['Breast_AdenoCA', 'Lung_AdenoCA', 'ColoRect_AdenoCA']
breastlungcolon = retrieve_activities(tumor_types, activities_SA, signatures)

In [None]:
# breast cohort activities

breast = retrieve_activities(['Breast_AdenoCA'], activities_SA, signatures)

## plot injected profiles

In [None]:
from plots import plot_profiles

In [None]:
# Example SBS9
# Replace the first argument with the signature label of choice

plot_profiles('SBS9', signatures_SA_dict, ymax=0.2)

In [None]:
# Example SBS31

plot_profiles('SBS31', signatures_SP_dict, ymax=0.2)

## plot burden distributions

In [None]:
from plots import plot_burden_distribution

In [None]:
plot_burden_distribution(breast, breastlungcolon)

## build synthetic datasets

In [None]:
from synthetic import Synthetic, generate_synthetic_data

In [None]:
%%capture
synthetic_blc = Synthetic(breastlungcolon, signatures)
synthetic_b   = Synthetic(breast, signatures)

In [None]:
with gzip.open('./data/synthetic_b.pickle.gz', 'wb') as f:
    pickle.dump(synthetic_b, f)
with gzip.open('./data/synthetic_blc.pickle.gz', 'wb') as f:
    pickle.dump(synthetic_blc, f)

In [None]:
# breast

generate_synthetic_data(synthetic_b, './data/breast/')

In [None]:
# breast-lung-colon

generate_synthetic_data(synthetic_blc, './data/breastlungcolon/')