In [1]:
%%javascript
var rate = 0;
// apply setting to  all current CodeMirror instances
IPython.notebook.get_cells().map(
    function(c) {  return c.code_mirror.options.cursorBlinkRate=rate;  }
);

// make sure new CodeMirror instance also use this setting
CodeMirror.defaults.cursorBlinkRate=rate;

<IPython.core.display.Javascript object>

In [2]:
with open("TumourSupressors.bed", 'w') as ts_file:
    ts_file.write('''1\t500\t1000\tTS1
1\t1500\t2000\tTS1
2\t500\t1000\tTS2
2\t1500\t2000\tTS2
''')

with open("AR.bed", 'w') as ar_file:
    ar_file.write('''X\t500\t1000\tTS1
X\t1500\t2000\tTS1
X\t2500\t3000\tTS2
X\t3500\t4000\tTS2
''')

with open("FusionCandidates.bed", 'w') as fusion_file:
    fusion_file.write('''21\t1000000\t1100000\tERG
21\t2000000\t2100000\tTMPRSS2
''')

with open("SVs.bed", 'w') as svs_file:
    svs_file.write('''21\t1500000\t1500100\tDEL\t0\t+
21\t2500000\t2500100\tDEL\t0\t+
''')

In [16]:
from enum import Enum
from pybedtools import BedTool
import pandas as pd
from svcaller.calling.events import SvType


class SvEffect(Enum):
    NO_OVERLAP = "NO_OVERLAP"
    OVERLAP_WITH_EFFECT = "OVERLAP_WITH_EFFECT"
    OVERLAP_UNKNOWN_EFFECT = "OVERLAP_UNKNOWN_EFFECT"
    GENE_FUSION = "GENE_FUSION"


class GeneClass(Enum):
    TUMOUR_SUPRESSOR = "TUMOUR_SUPRESSOR"
    AR = "AR"
    FUSION_CANDIDATE = "FUSION_CANDIDATE"

In [4]:
def extract_groups(bed_filename):
    df = pd.read_table(bed_filename, header=None, sep="\t")
    return {name: table for name, table in df.groupby(df[3])}

In [5]:
list(extract_groups('SVs.bed').values())[0]

Unnamed: 0,0,1,2,3,4,5
0,21,1500000,1500100,DEL,0,+
1,21,2500000,2500100,DEL,0,+


In [6]:
def predict_effects(svs_filename, ts_filename, ar_filename, fusion_filename):
    """
    Predict the consequence of the specified structural variants on the specified
    tumour suppressors, androgen receptor, and gene fusion candidate.

    :param svs_filename: Location of bed file specifying the structural variant coordinates.
    :param ts_filename: Location of bed file specifying the tumour supressor gene region coords.
    :param ar_filename: Location of bed file specifying Androgren Receptor gene region coords.
    :param fusion_filename: Location of bed file specifying two broad gene fusion region.

    :return: A dictionary with gene class as key and results dictionary as value
    """

    gene_classes = [enum.value for enum in list(GeneClass)]

    svs_bed = extract_groups(svs_filename)

    gene_to_bed_tables = [extract_groups(filename) for filename in
                         [ts_filename, ar_filename, fusion_filename]]

    gene_class_to_gene_region_bed = dict(zip(gene_classes, gene_to_bed_tables))

    gene_class_to_results = {}
    for gene_class, gene_region_bed in gene_class_to_gene_region_bed.items():
        gene_class_to_results[gene_class] = \
            predict_effects_for_class(svs_bed, gene_class, gene_region_bed)
    
    return gene_class_to_results

In [7]:
def predict_del_effect(sv, functional_regions):
    return None

def predict_inv_effect_tumour_suppressor(sv, functional_regions):
    return None

def predict_dup_effect_tumour_suppressor(sv, functional_regions):
    return None

def predict_tra_effect_tumour_suppressor(sv, functional_regions):
    return None

def predict_del_effect(sv, functional_regions):
    return None

def predict_inv_effect_ar(sv, functional_regions):
    return None

def predict_dup_effect_ar(sv, functional_regions):
    return None

def predict_tra_effect_ar(sv, functional_regions):
    return None

In [17]:
def predict_effects_for_class(svs_table, gene_class, gene_to_table):
    sv_and_scenario_to_function = {
        (SvType.DEL, GeneClass.TUMOUR_SUPRESSOR): predict_del_effect,
        (SvType.INV, GeneClass.TUMOUR_SUPRESSOR): predict_inv_effect_tumour_suppressor,
        (SvType.DUP, GeneClass.TUMOUR_SUPRESSOR): predict_dup_effect_tumour_suppressor,
        (SvType.TRA, GeneClass.TUMOUR_SUPRESSOR): predict_tra_effect_tumour_suppressor,
        (SvType.DEL, GeneClass.AR): predict_del_effect,
        (SvType.INV, GeneClass.AR): predict_inv_effect_ar,
        (SvType.DUP, GeneClass.AR): predict_dup_effect_ar,
        (SvType.TRA, GeneClass.AR): predict_tra_effect_ar,
    }

    for sv_type in SvType
    for gene in gene_to_table:
        # Generate a BedTool from the current gene's bed-formatted data:
        gene_bed = BedTool(gene_to_table[gene].to_string(), from_string=True)

        # Convert it
        print(gene_bed)

    return {}

In [20]:
svs_tmp = extract_groups('SVs.bed')
ts_tmp = extract_groups('TumourSupressors.bed')

In [27]:
predict_effects_for_class(svs_tmp, GeneClass.TUMOUR_SUPRESSOR, ts_tmp)

0	1	2	3
0	1	500	1000	TS1
1	1	1500	2000	TS1

0	1	2	3
2	2	500	1000	TS2
3	2	1500	2000	TS2



{}

In [11]:
from pybedtools import BedTool
import pandas as pd

In [19]:
with open("Test.bed", 'w') as f:
    f.write('''
1\t500\t1000\tGene1
1\t1500\t2000\tGene1
2\t500\t1000\tGene2
2\t1500\t2000\tGene2
''')

In [4]:
x = BedTool("Test.bed")

In [15]:
??pd.read_table

In [22]:
df = pd.read_table("Test.bed", header=None, sep="\t", names=["chrom", "start", "end", "gene"])

In [23]:
df

Unnamed: 0,chrom,start,end,gene
0,1,500,1000,Gene1
1,1,1500,2000,Gene1
2,2,500,1000,Gene2
3,2,1500,2000,Gene2


In [28]:
unique_genes = sorted(list(set(df["gene"])))

In [39]:
gene_groups = list(df.groupby(df['gene']))

In [41]:
gene_groups[0][1]

Unnamed: 0,chrom,start,end,gene
0,1,500,1000,Gene1
1,1,1500,2000,Gene1
