# Prédictions à partir du meilleur modèle d'Akita

In [1]:
import json
import numpy as np
import pandas as pd

In [2]:
import tensorflow as tf

In [3]:
from basenji import dataset
from basenji import seqnn
from basenji import trainer

In [4]:
params_file = "/home/bureau/projects/def-bureau/basenji/manuscripts/akita/params.json"
model_dir = "/home/bureau/projects/def-bureau/bureau/basenji/manuscripts/akita/"
model_file  = model_dir+'model_best.h5'

In [5]:
data_stats_file = "/home/bureau/projects/def-bureau/bureau/distiller/iPSC/data/1m/statistics.json"

In [6]:
tfr_train_full = "/home/bureau/projects/def-bureau/bureau/distiller/iPSC/data/1m/tfrecords/train-*.tfr"
tfr_eval_full = "/home/bureau/projects/def-bureau/bureau/distiller/iPSC/data/1m/tfrecords/valid-*.tfr"


## Chargement des paramètres du modèle

In [7]:
with open(params_file) as params_open:
    params = json.load(params_open)
params_model = params['model']
params_train = params['train']
#params_model['batch_norm'] = False
#params_model['head_hic'][-1]['units'] =1

## Chargement des stats du modèle

In [8]:
with open(data_stats_file) as data_stats_open:
    data_stats = json.load(data_stats_open)
seq_length = data_stats['seq_length']
target_length = data_stats['target_length']
hic_diags =  data_stats['diagonal_offset']
target_crop = data_stats['crop_bp'] // data_stats['pool_width']
target_length1 = data_stats['seq_length'] // data_stats['pool_width']

## Chargement des données

In [9]:
# Ici je spécifie mode = EVAL pour ne pas avoir TRAIN (il n'y a pas de mode TEST)
tfr_pattern_path = "/home/bureau/projects/def-bureau/bureau/distiller/iPSC/data/1m"
test_data = dataset.SeqDataset(tfr_pattern_path,split_label="test",
                               batch_size=8,mode=tf.estimator.ModeKeys.EVAL)

In [10]:
data_stats

{'num_targets': 1,
 'train_seqs': 7617,
 'valid_seqs': 6676,
 'test_seqs': 6667,
 'seq_length': 1048576,
 'pool_width': 2048,
 'crop_bp': 65536,
 'diagonal_offset': 2,
 'target_length': 99681}

## Initialisation du modèle

In [11]:
seqnn_model = seqnn.SeqNN(params_model)

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
sequence (InputLayer)           [(None, 1048576, 4)] 0                                            
__________________________________________________________________________________________________
stochastic_reverse_complement ( ((None, 1048576, 4), 0           sequence[0][0]                   
__________________________________________________________________________________________________
stochastic_shift (StochasticShi (None, 1048576, 4)   0           stochastic_reverse_complement[0][
__________________________________________________________________________________________________
re_lu (ReLU)                    (None, 1048576, 4)   0           stochastic_shift[0][0]           
____________________________________________________________________________________________

In [12]:
seqnn_model.restore(model_file)

In [None]:
test_inputs = test_data.numpy(return_inputs=True, return_outputs=False)
test_pred = seqnn_model.model.predict(test_inputs)

In [11]:
np.__version__

'1.18.4'

In [12]:
pd.__version__

'1.0.3'