Permalink
Fetching contributors…
Cannot retrieve contributors at this time
71 lines (71 sloc) 2.63 KB
type: Dataset
defined_as: dataloader.py::SeqDataset
args:
intervals_file:
doc: bed3 file with `chrom start end id score strand`
example:
md5: a060800e9c7ecccc2fc7d212ea6e6a61
url: https://zenodo.org/record/1466084/files/CEBPB-meta_Unique35_DGF-example_files-intervals.bed?download=1
fasta_file:
doc: Reference genome sequence
example:
md5: b0f5cdd4f75186f8a4d2e23378c57b5b
url: https://zenodo.org/record/1466084/files/CEBPB-meta_Unique35_DGF-example_files-hg38_chr22.fa?download=1
dnase_file:
doc: DNase bigwig file
example:
md5: 9e7d6fbd169867dded34840f7f3be15c
url: https://zenodo.org/record/1466084/files/CEBPB-meta_Unique35_DGF-example_files-dnase_synth.chr22.bw?download=1
cell_line:
doc: Cell type as a string.
example: PC-3
optional: True
RNAseq_PC_file:
doc: file path to a RNAseq PC file computed by https://github.com/davidaknowles/tf_net/blob/master/gene_expression_pca.R. See https://github.com/uci-cbcl/FactorNet/blob/master/data/README.md.
optional: True
mappability_file:
doc: USCS mappability track - http://hgdownload.cse.ucsc.edu/goldenpath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig. by deafult, provide this file with the dataloader, download in background
example:
url: https://github.com/kipoi/models/blob/92b23ed62956d56eb457488b431fc27550ddbf32/FactorNet/template/dataloader_files/wgEncodeDukeMapabilityUniqueness35bp.chr22.bigWig?raw=true
md5: 912e1bb77fb67c1a792ac4a05d9416f9
optional: True
use_linecache:
doc: if True, use linecache https://docs.python.org/3/library/linecache.html to access bed file rows
optional: True
info:
authors:
- name: Ziga Avsec
github: avsecz
doc: Dataloader for the FactorNet model.
dependencies:
conda:
- bioconda::bedtools
- bioconda::pybedtools
- bioconda::genomelake
- numpy
- cython
output_schema:
inputs:
- name: seq
shape: (1002, 6)
doc: DNA sequence and other big-wig channels (mappability and DNase-seq)
associated_metadata: ranges
- name: seq_rc
shape: (1002, 6)
doc: Reverse-complemented DNA sequence and reversed other bigwig channels (mappability and DNase-seq)
associated_metadata: ranges_rc
- name: meta_features
shape: (8,)
doc: First 8 RNAseq principle-components for the tissue.
targets:
name: is_binding_site
shape: (1,)
doc: TF binding class
column_labels: tasks.txt
metadata:
ranges:
type: GenomicRanges
doc: Ranges describing inputs.seq
ranges_rc:
type: GenomicRanges
doc: Ranges describing inputs.seq_rc