In [None]:
!pip install -q biopython

%load_ext autoreload
%autoreload 2

from google.colab import drive
drive.mount('/content/drive')

[K     |████████████████████████████████| 2.3MB 8.0MB/s 
[?25h

In [None]:
DATA_DIR = '/content/drive/MyDrive/data/histone/'
LOG_DIR = '/content/drive/MyDrive/dna-nn/results/'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!cp -r '/content/drive/MyDrive/dna-nn/dna_nn' .
!ls dna_nn

dataset.py  download.py  load.py  model.py


In [None]:
import gc

import matplotlib.pyplot as plt
import pandas as pd
from tensorflow import keras

from dna_nn.dataset import h3
from dna_nn.model import models, evaluate

In [None]:
x_shape, train_ds, validation_data, test_ds = h3(DATA_DIR + 'H3.fasta')

14963 samples loaded


In [None]:
models

{'cnn_deepdbp': <function dna_nn.model.cnn_deepdbp>,
 'cnn_nguyen_2_conv2d': <function dna_nn.model.cnn_nguyen_2_conv2d>,
 'cnn_nguyen_conv1d_2_conv2d': <function dna_nn.model.cnn_nguyen_conv1d_2_conv2d>,
 'cnn_zeng_2_conv2d': <function dna_nn.model.cnn_zeng_2_conv2d>,
 'cnn_zeng_3_conv2d': <function dna_nn.model.cnn_zeng_3_conv2d>,
 'cnn_zeng_4_conv2d': <function dna_nn.model.cnn_zeng_4_conv2d>}

In [None]:
key = 'cnn_zeng_4_conv2d'

keras.backend.clear_session()
model = models[key](x_shape)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics='accuracy')
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 496, 62, 16)       160       
_________________________________________________________________
batch_normalization (BatchNo (None, 496, 62, 16)       64        
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 248, 31, 16)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 246, 29, 32)       4640      
_________________________________________________________________
batch_normalization_1 (Batch (None, 246, 29, 32)       128       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 123, 14, 32)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 121, 12, 64)       1

In [None]:
csv_path = LOG_DIR + f'{key}-histone-dynamics.csv'
model_path = LOG_DIR + f'{key}-histone.h5'

callbacks = [
    keras.callbacks.CSVLogger(csv_path),
    keras.callbacks.LambdaCallback(
        on_epoch_end=lambda epoch, logs: gc.collect(),
        # on_train_end=lambda logs: model.save(model_path)
    ),
    keras.callbacks.ModelCheckpoint(model_path, save_best_only=True)
]

history = model.fit(train_ds, epochs=50, validation_data=validation_data,
                    callbacks=callbacks, verbose=2)

Epoch 1/50
328/328 - 10s - loss: 0.6489 - accuracy: 0.6534 - val_loss: 2.0164 - val_accuracy: 0.4878
Epoch 2/50
328/328 - 9s - loss: 0.5744 - accuracy: 0.6892 - val_loss: 1.0048 - val_accuracy: 0.4895
Epoch 3/50
328/328 - 9s - loss: 0.5485 - accuracy: 0.7215 - val_loss: 0.4924 - val_accuracy: 0.7751
Epoch 4/50
328/328 - 9s - loss: 0.5406 - accuracy: 0.7261 - val_loss: 0.5164 - val_accuracy: 0.7590
Epoch 5/50
328/328 - 9s - loss: 0.5278 - accuracy: 0.7365 - val_loss: 0.4947 - val_accuracy: 0.7661
Epoch 6/50
328/328 - 9s - loss: 0.5141 - accuracy: 0.7393 - val_loss: 0.4711 - val_accuracy: 0.7826
Epoch 7/50
328/328 - 9s - loss: 0.4884 - accuracy: 0.7637 - val_loss: 0.5016 - val_accuracy: 0.7688
Epoch 8/50
328/328 - 9s - loss: 0.4777 - accuracy: 0.7644 - val_loss: 0.5004 - val_accuracy: 0.7528
Epoch 9/50
328/328 - 9s - loss: 0.4515 - accuracy: 0.7813 - val_loss: 0.4695 - val_accuracy: 0.7906
Epoch 10/50
328/328 - 9s - loss: 0.4377 - accuracy: 0.7879 - val_loss: 0.6022 - val_accuracy: 0.723

In [None]:
model = keras.models.load_model(model_path)
test_loss, test_accuracy = model.evaluate(test_ds, verbose=3)

y_score = model.predict(test_ds)
y_true = [int(y) for x, y in test_ds.unbatch()]
evaluate(model, history, test_accuracy, y_score, y_true, LOG_DIR, key, 'histone')