In [1]:
#Set this variable to directory containing CONET executable
bin_dir = './'

In [2]:
import sys
sys.path.append('../..')
import matplotlib.pyplot as plt
from networkx.drawing.nx_pydot import graphviz_layout
import pandas as pd
import networkx as nx
import seaborn as sns
import conet 
import conet.src.data_converter.data_converter as dc
import conet.src.conet as c
import conet.src.conet_parameters as cp
import conet.src.inference_result as ir

In [3]:
# Use DataConverter class to convert corrected counts matrix into CONET specific input files
data_converter = dc.DataConverter("data/SA501X3F_filtered_corrected_counts.csv", 
                                  delimiter= ',', 
                                  default_bin_length = 150000, 
                                  event_length_normalizer = 3095677412,
                                  add_chromosome_ends = True,
                                  neutral_cn = 2.0)

In [4]:
# DataConverter expects list of potential breakpoint candidates, here we load precalculated set of candidates
breakpoint_candidates_indices = pd.read_csv('data/indices.csv', header=None, sep = ' ')[1].tolist()
breakpoint_candidates_indices = list(map(lambda x : x - 1, breakpoint_candidates_indices))

In [5]:
# Converts corrected counts matrix to CONET input files. @chromosomes parameter can be set to restrict inference to 
# a subset of chromosomes
data_converter.create_CoNET_input_files(breakpoint_candidates_indices, bin_dir, chromosomes=[17,18, 20, 23], add_chr_ends_to_indices=True)

In [8]:
# this may take up to 10 minutes
conet = c.CONET(bin_dir + "CONET")
params = cp.CONETParameters(data_size_prior_c = 0.5, data_dir = bin_dir, counts_penalty_c=200000, 
                            param_inf_iters=30000, seed = 21567, mixture_size=2, pt_inf_iters=200000)
conet.infer_tree(params)

./CONET ./ 30000 200000 200000 1.0 0.5 1 21567 2 5 4 10 10 10000 1
b'Input files have been loaded succesfully\n'
b'Starting parameter estimation\n'
b'Finished parameter estimation\n'
b'Estimated breakpoint distribution:\n'
b'(weight: 1 mean: 0.274938sd: 1.03384)\n'
b'\n'
b'Estimated no-breakpoint distribution:\n'
b'mean: 0 sd: 0.457687\n'
b'\n'
b'State after 0 iterations:\n'
b'Tree size: 3\n'
b'Log-likelihood: -5345.6\n'
b'State after 10000 iterations:\n'
b'Tree size: 9\n'
b'Log-likelihood: 1121.7\n'
b'State after 20000 iterations:\n'
b'Tree size: 9\n'
b'Log-likelihood: -147.855\n'
b'State after 30000 iterations:\n'
b'Tree size: 12\n'
b'Log-likelihood: 325.829\n'
b'State after 40000 iterations:\n'
b'Tree size: 12\n'
b'Log-likelihood: 534.698\n'
b'State after 50000 iterations:\n'
b'Tree size: 17\n'
b'Log-likelihood: 513.625\n'
b'State after 60000 iterations:\n'
b'Tree size: 16\n'
b'Log-likelihood: 529.454\n'
b'State after 70000 iterations:\n'
b'Tree size: 18\n'
b'Log-likelihood: 572.212