# `kipoi` python-sdk

In [1]:
import keras # somehow needs to be loaded before kipoi?
# If I don't do it, I get a segfault...

Using TensorFlow backend.


In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import kipoi

## Load the model

In [4]:
model_dir = "../examples/extended_coda/"

In [5]:
model = kipoi.load_model(model_dir)

INFO:2017-09-01 14:01:59,449:kipoi] successfully loaded model architecture from ../examples/extended_coda/model.json
INFO:2017-09-01 14:01:59,470:kipoi] successfully loaded model weights from ../examples/extended_coda/weights.h5


In [6]:
print(model.__doc__)

Model instance

        # Methods
          predict_on_batch(x)

        # model.yaml

            author: Johnny Israeli
        name: extended CODA
        version: 0.1
        description: Single bp resolution ChIP-seq denoising
        model:
            type: keras
            args:
                weights: weights.h5
                arch: model.json
            inputs:
                H3K27ac_subsampled:
                    type: bigwig
            targets:
                H3K27ac:
                    type: bigwig

        


In [7]:
model.model

<keras.engine.training.Model at 0x7f5832aa4c88>

## Load the extractor

In [8]:
Extractor = kipoi.load_extractor(model_dir)

INFO:2017-09-01 14:02:01,167:kipoi] successfully imported CodaDataset from extractor.py


In [9]:
print(Extractor.__doc__)

Model instance

        # Methods
            - .__getitem__(idx) - Get items via subsetting obj.[idx]
            - .__len__() - Get the length - len(obj)

        # extractor.yaml

            author: Johnny Israeli
        name: extended CODA
        version: 0.1
        description: Preprocessor for single bp resolution ChIP-seq denoising
        extractor:
            type: Dataset
            defined_as: CodaDataset
            arguments:
                intervals_file: "string; tsv file with `chrom start end`"
                input_data_sources: "dict; {data_name: <path to genomelake directory>}"
                target_data_sources: "dict, optional; {data_name: <path to genomelake directory>}"
            output:
                H3K27ac_subsampled:
                    type: bigwig
                    provide_ranges: True
                metadata:
                    type: dict
                    ranges:
                        chrom:
                            type:str
       

## Run the pipeline for some test examples

In [10]:
cd $model_dir/test_files

/data/nasif12/home_if12/avsec/projects-work/kipoi/examples/extended_coda/test_files


In [11]:
# example arguments
import yaml
with open("test.json", "r") as f:
    test_kwargs=yaml.load(f)

In [12]:
test_kwargs

{'input_data_sources': {'H3K27AC_subsampled': 'H3K27AC_subsampled.bw'},
 'intervals_file': 'intervals.tsv'}

In [13]:
ext = Extractor(**test_kwargs)

In [14]:
ext[0]

{'inputs': {'H3K27AC_subsampled': array([[ 4.1371],
         [ 4.1371],
         [ 4.1371],
         ..., 
         [ 0.3026],
         [ 0.3026],
         [ 0.3026]], dtype=float32)},
 'metadata': {'chrom': 'chr22', 'end': 29492188, 'id': '1', 'start': 29467163}}

In [15]:
len(ext)

127

In [16]:
# run the predictions on the whole dataset
from torch.utils.data import DataLoader
from kipoi.data import numpy_collate

In [17]:
dloader = DataLoader(ext, batch_size=32, collate_fn=numpy_collate)

In [18]:
it = iter(dloader)

In [19]:
batch = next(it)

In [20]:
batch

{'inputs': {'H3K27AC_subsampled': array([[[  4.1371],
          [  4.1371],
          [  4.1371],
          ..., 
          [  0.3026],
          [  0.3026],
          [  0.3026]],
  
         [[  4.2852],
          [  4.2852],
          [  4.2852],
          ..., 
          [  0.3282],
          [  0.3282],
          [  0.3282]],
  
         [[  4.2451],
          [  4.2451],
          [  4.2451],
          ..., 
          [  0.3706],
          [  0.3706],
          [  0.3706]],
  
         ..., 
         [[  0.0021],
          [  0.0021],
          [  0.0021],
          ..., 
          [ 28.3524],
          [ 28.3524],
          [ 28.3524]],
  
         [[  0.0312],
          [  0.0312],
          [  0.0312],
          ..., 
          [ 27.8853],
          [ 27.8853],
          [ 27.8853]],
  
         [[  0.0276],
          [  0.0276],
          [  0.0276],
          ..., 
          [ 27.1714],
          [ 27.1714],
          [ 27.1714]]], dtype=float32)},
 'metadata': {'chrom': ['c

In [21]:
model.predict_on_batch(batch["inputs"])

array([[[  5.9986],
        [  9.1787],
        [ 10.9781],
        ..., 
        [  0.4445],
        [  0.4149],
        [  0.414 ]],

       [[  6.0444],
        [  9.3397],
        [ 11.2766],
        ..., 
        [  0.5161],
        [  0.4716],
        [  0.4944]],

       [[  6.1058],
        [  9.3714],
        [ 11.2273],
        ..., 
        [  0.5636],
        [  0.5195],
        [  0.5388]],

       ..., 
       [[  0.0788],
        [  0.0973],
        [  0.1109],
        ..., 
        [ 43.1456],
        [ 39.2124],
        [ 40.7182]],

       [[  0.0847],
        [  0.1074],
        [  0.1317],
        ..., 
        [ 42.4621],
        [ 38.581 ],
        [ 40.2093]],

       [[  0.1034],
        [  0.1377],
        [  0.1754],
        ..., 
        [ 41.3534],
        [ 37.5824],
        [ 39.0459]]], dtype=float32)

## Run all at once

In [22]:
from kipoi.pipeline import ModelExtractor

In [23]:
me = ModelExtractor(model_dir="../")

INFO:2017-09-01 14:02:15,029:kipoi] successfully loaded model architecture from ../model.json
INFO:2017-09-01 14:02:15,048:kipoi] successfully loaded model weights from ../weights.h5
INFO:2017-09-01 14:02:15,052:kipoi] successfully imported CodaDataset from extractor.py


In [24]:
pred = me.predict(test_kwargs)

INFO:2017-09-01 14:02:15,891:kipoi] Initialized data generator. Running batches...
4it [00:08,  2.16s/it]


In [25]:
pred[:5,:5]

array([[[  5.9986],
        [  9.1787],
        [ 10.9781],
        [ 10.0724],
        [ 10.3762]],

       [[  6.0444],
        [  9.3397],
        [ 11.2766],
        [ 10.273 ],
        [ 10.5634]],

       [[  6.1058],
        [  9.3714],
        [ 11.2273],
        [ 10.2716],
        [ 10.6129]],

       [[  6.4618],
        [  9.8299],
        [ 11.622 ],
        [ 10.6543],
        [ 11.15  ]],

       [[  7.1046],
        [ 10.7682],
        [ 12.6426],
        [ 11.6129],
        [ 12.1911]]], dtype=float32)

In [26]:
pred.shape

(127, 25025, 1)