# ExploSig Connect Demo
## Association of ERCC2 and Signature 5* Example (Kim et al. Nature Genetics 2016)

In [1]:
import sys, os, numpy as np, pandas as pd
from case_study_ARDNMF_helpers import extract_signatures_Kim2016

### Step 0. Import `explosig_connect`
- API documentation: https://lrgr.io/explosig-connect/

In [2]:
from explosig_connect import connect

### Step 1. Start a session and connect

In [3]:
conn = connect()

### Step 2. Receive data

In [4]:
projects = ['TCGA-BLCA_BLCA_mc3.v0.2.8.WXS']
genes = ['ERCC2', 'TP53', 'RB1', 'CDKN1A', 'KDM6A', 'ARID1A']

# Mutation count data
counts_df = conn.get_mutation_type_counts(projects)
sbs_counts_df = conn.get_mutation_category_counts('SBS', projects)
# Gene-level data
gene_mut_df = conn.get_gene_mutation_data(genes, projects)
gene_exp_df = conn.get_gene_expression_data(genes, projects)
gene_cna_df = conn.get_copy_number_data(genes, projects)
# Sample metadata
samples_df = pd.DataFrame(index=sbs_counts_df.index.values.tolist(), columns=['Study'])
samples_df['Study'] = 'TCGA-BLCA_BLCA_mc3.v0.2.8.WXS'
clinical_df = conn.get_clinical_data(projects)

### Step 3. Extract signatures

In [15]:
sigs_df, exps_df = extract_signatures_Kim2016(sbs_counts_df)

### Step 4. Send data

In [16]:
conn.send_sample_metadata(samples_df)
conn.send_mutation_type_counts(counts_df)
conn.send_signatures('SBS', sigs_df)
conn.send_exposures('SBS', exps_df)
conn.send_gene_mutation_data(gene_mut_df)

In [17]:
# Rename signatures
sigs_df = sigs_df.rename(index={'Extracted Signature 4': 'Kim2016 5* (TCGA-130)'})
exps_df = exps_df.rename(columns={'Extracted Signature 4': 'Kim2016 5* (TCGA-130)'})
# Send renamed data
conn.send_signatures('SBS', sigs_df)
conn.send_exposures('SBS', exps_df)