# **Now, starts the NN model creation and building**

### Importing the necessary libraries

In [None]:
!pip install pysoundfile
!pip install ketos==2.0.0b4

import ketos.data_handling.database_interface as dbi
from ketos.neural_networks.resnet import ResNetInterface
from ketos.data_handling.data_feeding import BatchGenerator



### Opening the database from where we want to read the data

In [None]:
db = dbi.open_file("/train_database.h5", 'r')

In [None]:
db

File(filename=/train_database.h5, title='', mode='r', root_uep='/', filters=Filters(complevel=0, shuffle=False, bitshuffle=False, fletcher32=False, least_significant_digit=None))
/ (RootGroup) ''
/val3 (Group) ''
/val3/data (Table(42,), fletcher32, shuffle, zlib(1)) ''
  description := {
  "data": Float32Col(shape=(94, 2433), dflt=0.0, pos=0),
  "filename": StringCol(itemsize=100, shape=(), dflt=b'', pos=1),
  "id": UInt32Col(shape=(), dflt=0, pos=2),
  "label": UInt8Col(shape=(), dflt=0, pos=3),
  "offset": Float64Col(shape=(), dflt=0.0, pos=4)}
  byteorder := 'little'
  chunkshape := (1,)
/wav (Group) ''
/wav/data (Table(1664,), fletcher32, shuffle, zlib(1)) ''
  description := {
  "data": Float32Col(shape=(94, 2433), dflt=0.0, pos=0),
  "filename": StringCol(itemsize=100, shape=(), dflt=b'', pos=1),
  "id": UInt32Col(shape=(), dflt=0, pos=2),
  "label": UInt8Col(shape=(), dflt=0, pos=3),
  "offset": Float64Col(shape=(), dflt=0.0, pos=4)}
  byteorder := 'little'
  chunkshape := (1,)

### Transform the data into necessary dimensions so that we could use it for training

In [None]:
def transform_batch(X, Y):
  x = X.reshape(X.shape[0],X.shape[1],X.shape[2],1)
  y = tf.one_hot(Y['label'], depth=2, axis=1).numpy()
  return x, y

In [None]:
train_data = dbi.open_table(db, "/wav/data")

In [None]:
train_data

/wav/data (Table(1664,), fletcher32, shuffle, zlib(1)) ''
  description := {
  "data": Float32Col(shape=(94, 2433), dflt=0.0, pos=0),
  "filename": StringCol(itemsize=100, shape=(), dflt=b'', pos=1),
  "id": UInt32Col(shape=(), dflt=0, pos=2),
  "label": UInt8Col(shape=(), dflt=0, pos=3),
  "offset": Float64Col(shape=(), dflt=0.0, pos=4)}
  byteorder := 'little'
  chunkshape := (1,)

### Extract the data in batches for traning

In [None]:
train_generator = BatchGenerator(batch_size=128, data_table=train_data, 
                                  output_transform_func=ResNetInterface.transform_batch,
                                  shuffle=True, refresh_on_epoch_end=True)

In [None]:
train_generator

<ketos.data_handling.data_feeding.BatchGenerator at 0x7efe90239748>

#### The recipe file contains the ResNet model that we are going to use

In [None]:
resnet = ResNetInterface.build_from_recipe_file("/recipe.json")


#### Start training the model 

In [None]:
db2 = dbi.open_file("/train_database.h5", 'r')
train_dataset = dbi.open_table(db2, "/wav/data")
val_dataset = dbi.open_table(db2, "/val3/data")

train_generator = BatchGenerator(batch_size=10, data_table=train_dataset,
                             output_transform_func=ResNetInterface.transform_batch,
                             shuffle=True, refresh_on_epoch_end=True)


val_generator = BatchGenerator(batch_size=10, data_table=val_dataset,
                             output_transform_func=ResNetInterface.transform_batch,
                             shuffle=True, refresh_on_epoch_end=False)


srkw = ResNetInterface.build_from_recipe_file("/recipe.json")

srkw.train_generator = train_generator
srkw.val_generator = val_generator
srkw.checkpoint_dir = "my_checkpoints"
srkw.log_dir = "my_logs"

srkw.train_loop(50,validate=True, log_csv=True)


Epoch: 1 
train_loss: 0.4287393093109131
train_CategoricalAccuracy: 0.579 train_Precision: 0.600 train_Recall: 0.474 
val_loss: 0.49949806928634644
val_CategoricalAccuracy: 0.500 val_Precision: 0.500 val_Recall: 1.000 


Epoch: 2 
train_loss: 0.36158618330955505
train_CategoricalAccuracy: 0.670 train_Precision: 0.689 train_Recall: 0.620 
val_loss: 0.46444422006607056
val_CategoricalAccuracy: 0.548 val_Precision: 1.000 val_Recall: 0.095 


Epoch: 3 
train_loss: 0.3429550230503082
train_CategoricalAccuracy: 0.684 train_Precision: 0.686 train_Recall: 0.681 
val_loss: 0.45973527431488037
val_CategoricalAccuracy: 0.548 val_Precision: 1.000 val_Recall: 0.095 


Epoch: 4 
train_loss: 0.33020666241645813
train_CategoricalAccuracy: 0.706 train_Precision: 0.711 train_Recall: 0.694 
val_loss: 0.457150399684906
val_CategoricalAccuracy: 0.548 val_Precision: 1.000 val_Recall: 0.095 


Epoch: 5 
train_loss: 0.3271583616733551
train_CategoricalAccuracy: 0.709 train_Precision: 0.724 train_Recall: 0.67

#### Use different models to perform predictions

In [None]:
from ketos.neural_networks.cnn import CNNInterface
from ketos.neural_networks.dev_utils.nn_interface import RecipeCompat
import tensorflow as tf
!pip install pysoundfile
!pip install ketos==2.0.0b4

import ketos.data_handling.database_interface as dbi
from ketos.neural_networks.resnet import ResNetInterface
from ketos.data_handling.data_feeding import BatchGenerator
from ketos.neural_networks.cnn import CNNInterface






In [None]:
vgg_like_recipe = {'convolutional_layers':  [{'n_filters':32, "filter_shape":(3,3), 'strides':1, 'padding':'valid', 'activation':'relu', 'max_pool': None, 'batch_normalization':True},
                                    {'n_filters':32, "filter_shape":(3,3), 'strides':1, 'padding':'valid', 'activation':'relu', 'max_pool': None, 'batch_normalization':True},
                                    {'n_filters':32, "filter_shape":(3,3), 'strides':1, 'padding':'valid','activation':'relu', 'max_pool':None, 'batch_normalization':True, },
                                    {'n_filters':32, "filter_shape":(3,3), 'strides':1, 'padding':'valid','activation':'relu', 'max_pool':None, 'batch_normalization':True},
                                    {'n_filters':32, "filter_shape":(3,3), 'strides':1, 'padding':'valid', 'activation':'relu', 'max_pool':None, 'batch_normalization':True, },
                                    {'n_filters':32, "filter_shape":(3,3), 'strides':1, 'padding':'valid', 'activation':'relu', 'max_pool':None, 'batch_normalization':True, },
                                    {'n_filters':32, "filter_shape":(3,3), 'strides':1, 'padding':'valid', 'activation':'relu', 'max_pool':None, 'batch_normalization':True,},
                                    {'n_filters':32, "filter_shape":(3,3), 'strides':1, 'padding':'valid', 'activation':'relu', 'max_pool':None, 'batch_normalization':True},
                                    {'n_filters':32, "filter_shape":(3,3), 'strides':1, 'padding':'valid', 'activation':'relu', 'max_pool':None, 'batch_normalization':True, },
                                    {'n_filters':32, "filter_shape":(3,3), 'strides':1, 'padding':'valid', 'activation':'relu', 'max_pool':None, 'batch_normalization':True, },
                                    {'n_filters':32, "filter_shape":(3,3), 'strides':1, 'padding':'valid', 'activation':'relu', 'max_pool':None, 'batch_normalization':True, },
                                    {'n_filters':32, "filter_shape":(3,3), 'strides':1, 'padding':'valid', 'activation':'relu', 'max_pool':None, 'batch_normalization':True,},
                                    {'n_filters':32, "filter_shape":(3,3), 'strides':1, 'padding':'valid', 'activation':'relu', 'max_pool':None, 'batch_normalization':True,},
                                    {'n_filters':32, "filter_shape":(3,3), 'strides':1, 'padding':'valid', 'activation':'relu', 'max_pool':None, 'batch_normalization':True,},
                                    {'n_filters':32, "filter_shape":(3,3), 'strides':1, 'padding':'valid', 'activation':'relu', 'max_pool':None, 'batch_normalization':True,},
                                    {'n_filters':32, "filter_shape":(3,3), 'strides':1, 'padding':'valid', 'activation':'relu', 'max_pool':None, 'batch_normalization':True,}],
                 
                  'dense_layers':[{'n_hidden':4096, 'activation':'relu', 'batch_normalization':True, 'dropout':0.5},
                                    {'n_hidden':4096, 'activation':'relu', 'batch_normalization':True, 'dropout':0.5},
                                    {'n_hidden':4096, 'activation':'relu', 'batch_normalization':True, 'dropout':0.5},],
                  'n_classes': 2 ,
                  'optimizer': RecipeCompat('Adam', tf.keras.optimizers.Adam, learning_rate=0.005),
                  'loss_function': RecipeCompat('BinaryCrossentropy', tf.keras.losses.BinaryCrossentropy),  
                  'metrics': [RecipeCompat('BinaryAccuracy',tf.keras.metrics.BinaryAccuracy)]
                  

}




In [None]:
db2 = dbi.open_file("/train_database.h5", 'r')
train_dataset = dbi.open_table(db2, "/wav/data")
val_dataset = dbi.open_table(db2, "/val3/data")
train_dataset

/wav/data (Table(1664,), fletcher32, shuffle, zlib(1)) ''
  description := {
  "data": Float32Col(shape=(94, 2433), dflt=0.0, pos=0),
  "filename": StringCol(itemsize=100, shape=(), dflt=b'', pos=1),
  "id": UInt32Col(shape=(), dflt=0, pos=2),
  "label": UInt8Col(shape=(), dflt=0, pos=3),
  "offset": Float64Col(shape=(), dflt=0.0, pos=4)}
  byteorder := 'little'
  chunkshape := (1,)

In [None]:
val_dataset

/val3/data (Table(42,), fletcher32, shuffle, zlib(1)) ''
  description := {
  "data": Float32Col(shape=(94, 2433), dflt=0.0, pos=0),
  "filename": StringCol(itemsize=100, shape=(), dflt=b'', pos=1),
  "id": UInt32Col(shape=(), dflt=0, pos=2),
  "label": UInt8Col(shape=(), dflt=0, pos=3),
  "offset": Float64Col(shape=(), dflt=0.0, pos=4)}
  byteorder := 'little'
  chunkshape := (1,)

In [None]:

train_generator = BatchGenerator(batch_size=3, data_table=train_dataset,
                             output_transform_func=CNNInterface.transform_batch,
                             shuffle=True, refresh_on_epoch_end=True)


val_generator = BatchGenerator(batch_size=3, data_table=val_dataset,
                             output_transform_func=CNNInterface.transform_batch,
                             shuffle=True, refresh_on_epoch_end=False)

In [None]:
recipex = {'conv_set':[[64, False], [128, True], [256, True]], # doctest: +SKIP
               'dense_set': [512, ],
               'n_classes':2,
               'optimizer': {'name':'Adam', 'parameters': {'learning_rate':0.005}},
               'loss_function': {'name':'FScoreLoss', 'parameters':{}},  
               'metrics': [{'name':'CategoricalAccuracy', 'parameters':{}}]
             }


In [None]:
srkw = CNNInterface._build_from_recipe(default_cnn_recipe,recipe_compat=True)

srkw.train_generator = train_generator
srkw.val_generator = val_generator
srkw.checkpoint_dir = "iimy_checkpoints"
srkw.log_dir = "iimy_logs"

srkw.train_loop(5,validate=True, log_csv=True)

ValueError: ignored

In [None]:
from ketos.neural_networks.cnn import default_cnn_recipe

In [None]:
default_cnn_recipe

{'convolutional_layers': [{'activation': 'relu',
   'batch_normalization': True,
   'filter_shape': (8, 8),
   'max_pool': {'pool_size': (3, 3), 'strides': (2, 2)},
   'n_filters': 32,
   'padding': 'valid',
   'strides': 4},
  {'activation': 'relu',
   'batch_normalization': True,
   'filter_shape': (3, 3),
   'max_pool': {'pool_size': (3, 3), 'strides': (2, 2)},
   'n_filters': 64,
   'padding': 'valid',
   'strides': 1}],
 'dense_layers': [{'activation': 'relu',
   'batch_normalization': True,
   'dropout': 0.5,
   'n_hidden': 512},
  {'activation': 'relu',
   'batch_normalization': True,
   'dropout': 0.5,
   'n_hidden': 128}],
 'loss_function': BinaryCrossentropy ketos recipe,
 'metrics': [BinaryAccuracy ketos recipe],
 'n_classes': 2,
 'optimizer': Adam ketos recipe}