In [190]:
%load_ext autoreload
%autoreload 2

import os 
import sys

ROOT_PATH = os.path.dirname(os.getcwd())
sys.path.append(ROOT_PATH)

from src.evals.run_model import run_model, build_model

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Custom Config

In [191]:
DATA_PATH = os.path.join(ROOT_PATH,
                         "data/210728_scrambles_for_unstructure_model.csv")

In [194]:
from tensorflow.keras import layers
import tensorflow as tf


class ProtConvNet2D(tf.keras.Model):

    def __init__(
            self,
            num_char,
            seq_length,
            num_conv_layers: int = 2,
            kernel_size: int = 3,
            strides: int = 1,
            target_names=None,
            **kwargs,
    ):
        super(ProtConvNet2D, self).__init__(name="ProtConvNet2D")

        if target_names is None:
            target_names = ("Target_1", "Target_2")

        filter_sizes = [256, 128, 64, 32, 16]
        assert num_conv_layers < len(filter_sizes) - 1

        self.one_hot_layer = layers.Lambda(tf.one_hot,
                                           arguments={'depth': num_char},
                                           output_shape=num_char)
        self.expand_dim_layer = layers.Lambda(tf.expand_dims,
                                              arguments={"axis": 3})
        self.conv_layers = [
            layers.Conv2D(filters=filter_sizes[i],
                          kernel_size=(kernel_size, kernel_size),
                          input_shape=(seq_length, num_char, 1),
                          strides=strides,
                          activation="relu")
            for i in range(num_conv_layers)
        ]
        self.maxpool_layer = layers.MaxPooling2D((2, 2))
        self.flatten = layers.Flatten()
        self.dense1 = layers.Dense(256, activation='relu')
        self.dense2 = layers.Dense(64, activation='relu')
        self.output_layer1 = layers.Dense(1, name=target_names[0])
        self.output_layer2 = layers.Dense(1, name=target_names[1])

    def call(self, inputs, training=None, mask=None):

        x = self.one_hot_layer(inputs)
        x = self.expand_dim_layer(x)

        for layer in self.conv_layers:
            x = layer(x)
            x = self.maxpool_layer(x)

        x = self.flatten(x)
        x = self.dense1(x)
        x = self.dense2(x)

        output1 = self.output_layer1(x)
        output2 = self.output_layer2(x)

        return [output1, output2]

In [197]:
model, history = run_model(data_path = DATA_PATH, model_type = "convnet_2d")

{'num_char': 19, 'seq_length': 72, 'target_names': ['Trypsin', 'Chemotrypsin'], 'strides': 1, 'padding': 'causal', 'num_filters': 256, 'num_conv_layers': 1, 'kernel_size': 12, 'embdedding_output_dim': 64}
Epoch 1/7
 77/446 [====>.........................] - ETA: 2:03 - loss: 1.9494 - output_1_loss: 0.9407 - output_2_loss: 1.0088 - output_1_mse: 0.9407 - output_2_mse: 1.0088

KeyboardInterrupt: 

________________________________

________________________________

________________________________

________________________________

________________________________

### Manuall inspect the model

In [127]:
from src.evals.data_processing import get_and_process_data
from src.models.resnet1d import ProtResNet_1D
from tensorflow.keras import optimizers

#### Prepare data

In [None]:
X, y1, y2 = get_and_process_data(DATA_PATH)
X.shape

#### Define the model

In [187]:
from tensorflow.keras import layers
import tensorflow as tf


class ProtConvNet_2D(tf.keras.Model):

    def __init__(
            self,
            num_char,
            seq_length,
            num_conv_layers: int = 2,
            kernel_size: int = 3,
            strides: int = 1,
            target_names=None,
            **kwargs,
    ):
        super(ProtConvNet_2D, self).__init__(name="ProtConvNet_2D")
        
        if target_names is None:
            target_names = ("Target_1", "Target_2")

        filter_sizes = [256, 128, 64, 32, 16]
        assert num_conv_layers < len(filter_sizes) - 1
        
        self.one_hot_layer = layers.Lambda(tf.one_hot, arguments={'depth': num_char}, output_shape=num_char)
        self.expand_dim_layer = layers.Lambda(tf.expand_dims, arguments= {"axis": 3})
        
        self.conv_layers = [
                        layers.Conv2D(filters=filter_sizes[i], kernel_size=(kernel_size, kernel_size),
                                    input_shape=(seq_length, num_char, 1),
                                    strides=strides, activation="relu")
            for i in range(num_conv_layers)
        ]
        
        self.maxpool_layer = layers.MaxPooling2D((2, 2))
        
        self.flatten = layers.Flatten()
        
        self.dense1 = layers.Dense(256, activation='relu')
        self.dense2 = layers.Dense(64, activation='relu')
        
        self.output_layer1 = layers.Dense(1, name=target_names[0])
        self.output_layer2 = layers.Dense(1, name=target_names[1])

        
    def call(self, inputs, training=None, mask=None):
        print("Input shape:", inputs.shape)
        
        x = self.one_hot_layer(inputs)
        print("One Hot shape:",x.shape)
        
        x = self.expand_dim_layer(x)
        print("Expand Dim shape:",x.shape, )
        
        for layer in self.conv_layers:
            x = layer(x)
            print("Conv Layer shape:",x.shape)
            x = self.maxpool_layer(x)
            print("Max Pool shape:",x.shape)
        
        
        x = self.flatten(x)
        print("Dense 1 shape:",x.shape)
        x = self.dense1(x)
        print("Dense 2 shape:",x.shape)
        x = self.dense2(x)

        output1 = self.output_layer1(x)
        print("Output shape:",x.shape)
        output2 = self.output_layer2(x)

        return [output1,output2 ]

In [188]:
model = ProtConvNet_2D(19, 72)
model.compile(optimizer=optimizers.Adam(learning_rate=0.001),loss='mse',metrics=['mse'])
model.fit(X[:64], [y1[:64], y1[:64]])

Input shape: (32, 72)
One Hot shape: (32, 72, 19)
Expand Dim shape: (32, 72, 19, 1)
Conv Layer shape: (32, 70, 17, 256)
Max Pool shape: (32, 35, 8, 256)
Conv Layer shape: (32, 33, 6, 128)
Max Pool shape: (32, 16, 3, 128)
Dense 1 shape: (32, 6144)
Dense 2 shape: (32, 256)
Output shape: (32, 64)
Input shape: (32, 72)
One Hot shape: (32, 72, 19)
Expand Dim shape: (32, 72, 19, 1)
Conv Layer shape: (32, 70, 17, 256)
Max Pool shape: (32, 35, 8, 256)
Conv Layer shape: (32, 33, 6, 128)
Max Pool shape: (32, 16, 3, 128)
Dense 1 shape: (32, 6144)
Dense 2 shape: (32, 256)
Output shape: (32, 64)


<tensorflow.python.keras.callbacks.History at 0x7fde718b8a10>

In [186]:
model.summary()

Model: "ProtConvNet_2D"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lambda_67 (Lambda)           multiple                  0         
_________________________________________________________________
lambda_68 (Lambda)           multiple                  0         
_________________________________________________________________
conv2d_34 (Conv2D)           multiple                  2560      
_________________________________________________________________
conv2d_35 (Conv2D)           multiple                  590080    
_________________________________________________________________
max_pooling2d_12 (MaxPooling multiple                  0         
_________________________________________________________________
flatten_10 (Flatten)         multiple                  0         
_________________________________________________________________
dense_41 (Dense)             multiple               