In [68]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [92]:
import higlass as hg

In [69]:
import os
import resgen as rg

rgc = rg.connect(os.environ['RESGEN_USER'], os.environ['RESGEN_PASSWORD'])

In [70]:
project = rgc.find_or_create_project('Nott et al. 2019')

In [73]:
import requests

def parse_ucsc(hub_string):    
    things = [
        dict([d.split() for d in hub_section.split('\n')])
        for hub_section in re.split('\n\n+', hub_string.strip())
    ]
    
    return things

def sync_track_hub(base_url):
    hub_url = f"{base_url}/hub.txt"

    ret = requests.get(hub_url)
        
    content = ret.content.decode('utf8')
    hub_info = parse_ucsc(content)[0]
    
    genomes_url = f'{base_url}/{hub_info["genomesFile"]}'
    ret = requests.get(genomes_url)
    
    content = ret.content.decode('utf8')
    genome_infos = parse_ucsc(content)
    
    for genome_info in genome_infos:
        sync_genome(base_url, genome_info)
    
def sync_genome(base_url, genome_info):
    track_db_url = f"{base_url}/{genome_info['trackDb']}"
    ret = requests.get(track_db_url)
    content = ret.content.decode('utf8')
    genome_info_path = op.split(genome_info['trackDb'])[0]
    
    track_infos = parse_ucsc(content)
    for track in track_infos:
        if (track['type'] == 'bigWig'
            and track.get('bigDataUrl')):
            big_data_path = f"{base_url}/{genome_info_path}/{track.get('bigDataUrl')}"
            
            project.sync_dataset(
                big_data_path,
                datatype='vector',
                filetype='bigwig',
                assembly=genome_info['genome'],
                name=track.get('shortLabel'),
                description=track.get('longLabel')
            )
                
sync_track_hub('http://homer.ucsd.edu/hubs//nuclei_atac_hg19_pooled/')

 100.00% Complete

In [74]:
sync_track_hub('http://homer.ucsd.edu/hubs//nuclei_h3k27ac_hg19_pooled/')

 100.00% Complete

In [75]:
sync_track_hub('http://homer.ucsd.edu/hubs//nuclei_h3k4me3_hg19_pooled')

 100.00% Complete

In [76]:
project.sync_dataset(
    'http://homer.ucsd.edu/iholtman/Nuclei_project/interaction/MAPS/hg19/Microglia.5k_interactions_ucsc_genome_browser.inter.bb',
    datatype='bedlike',
    filetype='bigbed',
    name='Microglia PLAC-seq',
    assembly='hg19'
)

 100.00% Complete

R4e7-rVf: Microglia PLAC-seq

In [77]:
project.sync_dataset(
    'http://homer.ucsd.edu/iholtman/Nuclei_project/interaction/MAPS/hg19/NeuN.5k_interactions_ucsc_genome_browser.inter.bb',
    datatype='bedlike',
    filetype='bigbed',
    name='Neuron PLAC-seq',
    assembly='hg19'
)

 100.00% Complete

JzccFAJU: Neuron PLAC-seq

In [None]:
# dang, accidentally deleted the source of this dataset so commenting the entire section out

# project.sync_dataset(
#     datatype='bedlike',
#     filetype='bigbed',
#     name='Oligodendrocyte PLAC-seq',
#     assembly='hg19'
# )

In [56]:
genome_info = parse_ucsc(genomes_string)[0]
genome_info

{'genome': 'hg19', 'trackDb': 'hg19/trackDb.txt'}

In [20]:
chromsizes = rgc.find_datasets(
  datatype='chromsizes', assembly='hg19'
)[0]
chrominfo = rgc.get_chrominfo(chromsizes)

In [27]:
gene_annotations = rgc.find_datasets(
    datatype='gene-annotations', assembly='hg19'
)[0]

In [15]:
ds_dict = dict([(d.name, d) for d in project.list_datasets()])
print("\n".join(ds_dict.keys()))

pooled_astrocytes_atac
exvivo_atac_pooled
exvivo_atac_pooled_tbp
neurons_atac_pooled
oligodendrocytes_atac_pooled
peripheral_atac_pooled
microglia_atac_pooled
pooled_astrocytes_H3K27ac
exvivo_H3K27ac_pooled
exvivo_H3K27ac_pooled_tbp
neurons_H3K27ac_pooled
oligodendrocytes_H3K27ac_pooled
peripheral_H3K27ac
microglia_H3K27ac_pooled
pooled_astrocytes_H3K4me3
neurons_H3K4me3_pooled
oligodendrocytes_H3K4me3_pooled
microglia_H3K4me3_pooled
Microglia PLAC-seq
Neuron PLAC-seq
Oligodendrocyte PLAC-seq


In [87]:
[k for k in ds_dict.keys() if k.find('H3K4me3') > 0]

['pooled_astrocytes_H3K4me3',
 'neurons_H3K4me3_pooled',
 'oligodendrocytes_H3K4me3_pooled',
 'microglia_H3K4me3_pooled']

In [38]:
gene_locs = {}

for gene in ['cx3cr1', 'nefl', 'mog', 'gja1']:
    annotation = rgc.get_gene(gene_annotations, gene)

    gene_locs[gene] = chrominfo.to_gene_range(annotation, padding=1.5)

In [88]:
from higlass import Track, View
import higlass

options = {
    'microglia': {
        'barFillColor': 'rgb(201, 99, 95)'
    },
    'neurons': {
        'barFillColor': 'rgb(55, 177, 144)'
    },
    'od': {
        'barFillColor': 'rgb(54, 144, 192)'
    },
    'ac': {
        'barFillColor': 'rgb(234, 182, 64)'
    }
}

def view_at_loc(ds_names, tracks_options, location, x, xtra_track_opts={}):
    tracks = [ds_dict[ds_name].hg_track(**{
            **options, **xtra_track_opts
        }) for ds_name, options in zip(ds_names, tracks_options)]
    
    return View([gene_annotations.hg_track()] + tracks,
        initialXDomain = location, width=3, x=x)

dsets_atac = ['microglia_atac_pooled',
             'neurons_atac_pooled',
             'oligodendrocytes_atac_pooled',
             'pooled_astrocytes_atac']
dsets_h3k27ac = ['microglia_H3K27ac_pooled',
             'neurons_H3K27ac_pooled',
             'oligodendrocytes_H3K27ac_pooled',
             'pooled_astrocytes_H3K27ac']
dsets_h3k4me3 = ['microglia_H3K4me3_pooled',
             'neurons_H3K4me3_pooled',
             'oligodendrocytes_H3K4me3_pooled',
             'pooled_astrocytes_H3K4me3']

tracks_options = [
    options['microglia'],
    options['neurons'],
    options['od'],
    options['ac']
]

views_atac = [
    view_at_loc(dsets_atac, tracks_options, gene_locs['cx3cr1'], 0),
    view_at_loc(dsets_atac, tracks_options, gene_locs['nefl'], 3, { 'labelPosition': 'hidden' }),
    view_at_loc(dsets_atac, tracks_options, gene_locs['mog'], 6, { 'labelPosition': 'hidden' }),
    view_at_loc(dsets_atac, tracks_options, gene_locs['gja1'], 9, { 'labelPosition': 'hidden' })
]

views_h3k27ac = [
    view_at_loc(dsets_h3k27ac, tracks_options, gene_locs['cx3cr1'], 0),
    view_at_loc(dsets_h3k27ac, tracks_options, gene_locs['nefl'], 3, { 'labelPosition': 'hidden' }),
    view_at_loc(dsets_h3k27ac, tracks_options, gene_locs['mog'], 6, { 'labelPosition': 'hidden' }),
    view_at_loc(dsets_h3k27ac, tracks_options, gene_locs['gja1'], 9, { 'labelPosition': 'hidden' })
]

views_h3k4me3 = [
    view_at_loc(dsets_h3k4me3, tracks_options, gene_locs['cx3cr1'], 0),
    view_at_loc(dsets_h3k4me3, tracks_options, gene_locs['nefl'], 3, { 'labelPosition': 'hidden' }),
    view_at_loc(dsets_h3k4me3, tracks_options, gene_locs['mog'], 6, { 'labelPosition': 'hidden' }),
    view_at_loc(dsets_h3k4me3, tracks_options, gene_locs['gja1'], 9, { 'labelPosition': 'hidden' })
]

(d,s,v) = higlass.display(
    views_atac + views_h3k27ac + views_h3k4me3,
    auth_token=f'JWT {rgc.get_token()}'
)
d

HiGlassDisplay(auth_token='JWT eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyX2lkIjoyLCJ1c2VybmFtZSI6InBldGUiLC…

In [89]:
project.sync_viewconf(v, "Figure 1A")

In [94]:
%%javascript

require(["https://unpkg.com/higlass-arcs@0.2.0/dist/higlass-arcs.min.js"],
    function(hglib) {
});

<IPython.core.display.Javascript object>

In [116]:
gene.chrom

'chrX'

In [152]:
gene = rgc.get_gene(gene_annotations, 'SALL1')
domain=chrominfo.to_gene_range(gene, padding=0.5)
domain=chrominfo.to_abs_range('chr16', 50850000, 51850000)

arcs_options = {
    "track_type": "1d-arcs",
    "position": "top",
    "height": 50,
    "labelPosition": "topLeft",
}

dsets_atac = ['microglia_atac_pooled',
             'neurons_atac_pooled',
             'oligodendrocytes_atac_pooled']
dsets_h3k27ac = ['microglia_H3K27ac_pooled',
             'neurons_H3K27ac_pooled',
             'oligodendrocytes_H3K27ac_pooled']
dsets_h3k4me3 = ['microglia_H3K4me3_pooled',
             'neurons_H3K4me3_pooled',
             'oligodendrocytes_H3K4me3_pooled']

def with_options(**kwargs):
    return {**arcs_options, **kwargs}

(d,s,v) = hg.display(
    [
        View([
            Track('top-axis'),
            chromsizes.hg_track(tickPositions="ends"),
            gene_annotations.hg_track()]
            + [ds_dict[dset].hg_track(
                **options
            ) for dset,options in zip(dsets_atac, tracks_options)]
            + [ds_dict[dset].hg_track(
                **options
            ) for dset,options in zip(dsets_h3k27ac, tracks_options)]
            + [ds_dict[dset].hg_track(
                **options
            ) for dset,options in zip(dsets_h3k4me3, tracks_options)]
            + [ds_dict['Microglia PLAC-seq'].hg_track(**{**arcs_options, 'strokeColor':'red'}),
            ds_dict['Neuron PLAC-seq'].hg_track(**{**arcs_options, 'strokeColor':'green'}),
            ds_dict['Oligodendrocyte PLAC-seq'].hg_track(**{**arcs_options, 'strokeColor':'blue'})
        ], initialXDomain=domain)]
)
d

HiGlassDisplay(viewconf={'editable': True, 'views': [{'uid': 'bq8XqbKnSByCds_LNA0KPw', 'tracks': {'top': [{'ty…

In [147]:
## hi

In [123]:
chrominfo.cum_chrom_lengths

{'chr1': 0,
 'chr2': 249250621,
 'chr3': 492449994,
 'chr4': 690472424,
 'chr5': 881626700,
 'chr6': 1062541960,
 'chr7': 1233657027,
 'chr8': 1392795690,
 'chr9': 1539159712,
 'chr10': 1680373143,
 'chr11': 1815907890,
 'chr12': 1950914406,
 'chr13': 2084766301,
 'chr14': 2199936179,
 'chr15': 2307285719,
 'chr16': 2409817111,
 'chr17': 2500171864,
 'chr18': 2581367074,
 'chr19': 2659444322,
 'chr20': 2718573305,
 'chr21': 2781598825,
 'chr22': 2829728720,
 'chrX': 2881033286,
 'chrY': 3036303846,
 'chrM': 3095677412,
 'chr6_ssto_hap7': 3095693983,
 'chr6_mcf_hap5': 3100622550,
 'chr6_cox_hap2': 3105455948,
 'chr6_mann_hap4': 3110251319,
 'chr6_apd_hap1': 3114934582,
 'chr6_qbl_hap6': 3119556872,
 'chr6_dbb_hap3': 3124168856,
 'chr17_ctg5_hap1': 3128779252,
 'chr4_ctg9_hap1': 3130460080,
 'chr1_gl000192_random': 3131050506,
 'chrUn_gl000225': 3131598002,
 'chr4_gl000194_random': 3131809175,
 'chr4_gl000193_random': 3132000644,
 'chr9_gl000200_random': 3132190433,
 'chrUn_gl000222': 31

In [104]:
# hi

In [None]:
# hub_string = """
# hub nuclei_atac_hg19_pooled
# shortLabel nuclei_atac_hg19_pooled
# longLabel nuclei_atac_hg19_pooled
# genomesFile genomes.txt
# email alnott@ucsd.edu
# """

# genomes_string = """
# genome hg19
# trackDb hg19/trackDb.txt

# genome mm9
# trackDb mm9/trackDb.txt
# """

# track_db_string = """
# track nuclei_atac_hg19_pooled
# container multiWig
# noInherit on
# shortLabel nuclei_atac_hg19_pooled
# longLabel nuclei_atac_hg19_pooled
# type bigWig
# configurable on
# visibility full
# aggregate transparentOverlay
# showSubtrackColorOnUi on
# autoScale on
# windowingFunction maximum
# priority 1.4
# alwaysZero on
# yLineMark 0
# yLineOnOff on
# maxHeightPixels 100:75:11

# track human_LHX2nuclei_atac_epilepsy_pooled_hg19
# bigDataUrl human_LHX2nuclei_atac_epilepsy_pooled_hg19.ucsc.bigWig
# shortLabel pooled_astrocytes_atac
# longLabel human_LHX2nuclei_atac_epilepsy_pooled_hg19
# type bigWig
# parent nuclei_atac_hg19_pooled
# color 255,127,0

# track human_microglia_atac_exvivo_pooled_hg19
# bigDataUrl human_microglia_atac_exvivo_pooled_hg19.ucsc.bigWig
# shortLabel exvivo_atac_pooled
# longLabel human_microglia_atac_exvivo_pooled_hg19
# type bigWig
# parent nuclei_atac_hg19_pooled
# color 152,78,163

# track human_microglia_atac_exvivo_pooled_hg19_tbp
# bigDataUrl human_microglia_atac_exvivo_pooled_hg19_tbp.ucsc.bigWig
# shortLabel exvivo_atac_pooled_tbp
# longLabel human_microglia_atac_exvivo_pooled_hg19_tbp
# type bigWig
# parent nuclei_atac_hg19_pooled
# color 152,78,163

# track human_NEUNnuclei_atac_epilepsy_pooled_hg19
# bigDataUrl human_NEUNnuclei_atac_epilepsy_pooled_hg19.ucsc.bigWig
# shortLabel neurons_atac_pooled
# longLabel human_NEUNnuclei_atac_epilepsy_pooled_hg19
# type bigWig
# parent nuclei_atac_hg19_pooled
# color 77,175,74

# track human_OLIG2nuclei_atac_epilepsy_pooled_hg19
# bigDataUrl human_OLIG2nuclei_atac_epilepsy_pooled_hg19.ucsc.bigWig
# shortLabel oligodendrocytes_atac_pooled
# longLabel human_OLIG2nuclei_atac_epilepsy_pooled_hg19
# type bigWig
# parent nuclei_atac_hg19_pooled
# color 55,126,184

# track human_peripheralPU1nuclei_atac_hg19
# bigDataUrl human_peripheralPU1nuclei_atac_hg19.ucsc.bigWig
# shortLabel peripheral_atac_pooled
# longLabel human_peripheralPU1nuclei_atac_hg19
# type bigWig
# parent nuclei_atac_hg19_pooled
# color 177,89,40

# track human_PU1nuclei_atac_epilepsy_pooled_hg19
# bigDataUrl human_PU1nuclei_atac_epilepsy_pooled_hg19.ucsc.bigWig
# shortLabel microglia_atac_pooled
# longLabel human_PU1nuclei_atac_epilepsy_pooled_hg19
# type bigWig
# parent nuclei_atac_hg19_pooled
# color 228,26,28
# """