# Examen d'un modèle Basenji

In [1]:
import json
import numpy as np
import pandas as pd

In [2]:
import tensorflow as tf

In [3]:
from basenji import dataset
from basenji import seqnn
from basenji import trainer

In [4]:
params_file = "/home/bureau/projects/def-bureau/basenji/manuscripts/akita/params.json"

In [5]:
#data_stats_file = "/home/bureau/projects/def-bureau/bureau/distiller/results_Neu10000/data/1m/statistics.json"
data_stats_file = "/home/bureau/projects/def-bureau/bureau/distiller/iPSC/data/1m/statistics.json"

In [6]:
#tfr_train_full = "/home/bureau/projects/def-bureau/bureau/distiller/results_Neu10000/data/1m/tfrecords/train-*.tfr"
#tfr_eval_full = "/home/bureau/projects/def-bureau/bureau/distiller/results_Neu10000/data/1m/tfrecords/valid-*.tfr"
tfr_train_full = "/home/bureau/projects/def-bureau/bureau/distiller/iPSC/data/1m/tfrecords/train-*.tfr"
tfr_eval_full = "/home/bureau/projects/def-bureau/bureau/distiller/iPSC/data/1m/tfrecords/valid-*.tfr"

## Chargement des paramètres du modèle

In [8]:
with open(params_file) as params_open:
    params = json.load(params_open)
params_model = params['model']
params_train = params['train']
#params_model['batch_norm'] = False
params_model['head_hic'][-1]['units'] =1

## Chargement des stats du modèle

In [8]:
with open(data_stats_file) as data_stats_open:
    data_stats = json.load(data_stats_open)
data_stats

{'num_targets': 1,
 'train_seqs': 7617,
 'valid_seqs': 6676,
 'test_seqs': 6667,
 'seq_length': 1048576,
 'pool_width': 2048,
 'crop_bp': 65536,
 'diagonal_offset': 2,
 'target_length': 99681}

## Chargement des données

In [9]:
train_data = dataset.SeqDataset(tfr_train_full,
    params_train['batch_size'],
    data_stats['seq_length'],
    data_stats['target_length'],
    tf.estimator.ModeKeys.TRAIN)
eval_data = dataset.SeqDataset(tfr_eval_full,
    params_train['batch_size'],
    data_stats['seq_length'],
    data_stats['target_length'],
    tf.estimator.ModeKeys.EVAL)


/home/bureau/projects/def-bureau/bureau/distiller/iPSC/data/1m/tfrecords/train-*.tfr has 7617 sequences with 1/1 targets
/home/bureau/projects/def-bureau/bureau/distiller/iPSC/data/1m/tfrecords/valid-*.tfr has 6676 sequences with 1/1 targets


## Initialisation du modèle

In [9]:
seqnn_model = seqnn.SeqNN(params_model)

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
sequence (InputLayer)           [(None, 1048576, 4)] 0                                            
__________________________________________________________________________________________________
stochastic_reverse_complement ( ((None, 1048576, 4), 0           sequence[0][0]                   
__________________________________________________________________________________________________
stochastic_shift (StochasticShi (None, 1048576, 4)   0           stochastic_reverse_complement[0][
__________________________________________________________________________________________________
re_lu (ReLU)                    (None, 1048576, 4)   0           stochastic_shift[0][0]           
____________________________________________________________________________________________

In [31]:
seqnn_model.models[0].layers[len(seqnn_model.models[0].layers)-2].__dict__

{'_self_setattr_tracking': True,
 '_instrumented_keras_api': True,
 '_instrumented_keras_layer_class': True,
 '_instrumented_keras_model_class': False,
 '_trainable': True,
 '_stateful': False,
 '_build_input_shape': TensorShape([None, 99681, 48]),
 '_saved_model_inputs_spec': None,
 '_input_spec': InputSpec(min_ndim=2, axes={-1: 48}),
 '_name': 'dense',
 '_activity_regularizer': None,
 '_trainable_weights': [<tf.Variable 'dense/kernel:0' shape=(48, 1) dtype=float32, numpy=
  array([[-0.26429203],
         [-0.04040996],
         [ 0.02718536],
         [ 0.04537422],
         [ 0.36597288],
         [ 0.02942787],
         [ 0.02919805],
         [ 0.02968781],
         [ 0.4407891 ],
         [ 0.31129742],
         [-0.01744878],
         [ 0.1604862 ],
         [-0.05110191],
         [ 0.1339272 ],
         [-0.14477791],
         [ 0.06518918],
         [ 0.43345943],
         [-0.06201223],
         [-0.06375764],
         [-0.23724872],
         [-0.19133489],
         [ 0.1527

In [33]:
params['model'] = params_model
display(params['model'])

{'seq_length': 1048576,
 'target_length': 512,
 'target_crop': 32,
 'diagonal_offset': 2,
 'augment_rc': True,
 'augment_shift': 11,
 'activation': 'relu',
 'batch_norm': True,
 'bn_momentum': 0.9265,
 'trunk': [{'filters': 96, 'kernel_size': 11, 'pool_size': 2},
  {'filters_init': 96,
   'filters_mult': 1.0,
   'kernel_size': 5,
   'pool_size': 2,
   'repeat': 10},
  {'filters': 48, 'rate_mult': 1.75, 'repeat': 8, 'dropout': 0.4},
  {'filters': 64, 'kernel_size': 5}],
 'head_hic': [{'operation': 'mean'},
  {},
  {'filters': 48, 'kernel_size': 3},
  {},
  {'filters': 24,
   'kernel_size': 3,
   'rate_mult': 1.75,
   'repeat': 6,
   'dropout': 0.1},
  {'cropping': 32},
  {'diagonal_offset': 2},
  {'units': 1, 'activation': 'linear'}]}

In [34]:
# Écriture du fichier de paramètres
with open('/home/bureau/projects/def-bureau/bureau/distiller/iPSC/data/1m/params_tutorial.json','w') as params_tutorial_file:
    json.dump(params,params_tutorial_file) 

In [37]:
# Tentative d'estimation
!akita_train.py -o /home/bureau/projects/def-bureau/bureau/distiller/iPSC/data/1m/train_out/  /home/bureau/projects/def-bureau/bureau/distiller/iPSC/data/1m/params_tutorial.json /home/bureau/projects/def-bureau/bureau/distiller/iPSC/data/1m/

2021-02-19 11:30:33.533302: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-02-19 11:30:33.801390: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
> /lustre03/project/6000443/basenji/basenji/seqnn.py(55)build_block()
-> block_name = block_params['name']
(Pdb) 
--KeyboardInterrupt--
(Pdb) 

In [11]:
np.__version__

'1.18.4'

In [12]:
pd.__version__

'1.0.3'