This setup expects samples to be in the ``data_training`` directory in the following structure

```
data_training >
    _background_noise_ >
        random_wave_file.wav
    testing >
        unknown >
            negative_test_1.wav
            negative_test_2.wav
        wakeword >
            positive_test_1.wav 
            positive_test_2.wav
    training >
        unknown >
            negative_train_1.wav
            negative_train_2.wav
        wakeword >
            positive_train_1.wav 
            positive_train_2.wav
    validation >
        unknown >
            negative_validation_1.wav
            negative_validation_2.wav
        wakeword >
            positive_validation_1.wav 
            positive_validation_2.wav
```

In [None]:
TRAIN_DIR = 'trained_models/bc_resnet_075'

SAMPLE_RATE = 16000
CLIP_DURATION_MS = 2000
FEATURE_BIN_COUNT = 40
BACKGROUND_FREQUENCY = 0
BACKGROUND_VOLUME_RANGE = 0
TIME_SHIFT_MS = 100.0
WINDOW_STRIDE = 20
WINDOW_SIZE_MS = 30
PREPROCESS = 'micro'
WANTED_WORDS = "wakeword,unknown"
DATA_URL = ''
SILENT_PERCENTAGE = 0
UNKNOWN_PERCENTAGE = 0
DATASET_DIR =  'data_training/'

Use the following to train the streaming DS-CNN model

In [None]:
!python -m kws_streaming.train.model_train_eval \
--data_url={DATA_URL} \
--data_dir={DATASET_DIR} \
--train_dir={TRAIN_DIR} \
--split_data 0 \
--mel_upper_edge_hertz 7500.0 \
--mel_lower_edge_hertz 125.0 \
--silence_percentage={SILENT_PERCENTAGE} \
--unknown_percentage={UNKNOWN_PERCENTAGE} \
--background_frequency={BACKGROUND_FREQUENCY} \
--background_volume={BACKGROUND_VOLUME_RANGE} \
--how_many_training_steps 20000,20000,20000,20000 \
--learning_rate 0.001,0.0005,0.0001,0.00002 \
--window_size_ms={WINDOW_SIZE_MS} \
--window_stride_ms={WINDOW_STRIDE} \
--clip_duration_ms={CLIP_DURATION_MS} \
--mel_num_bins 40 \
--dct_num_features 40 \
--preprocess={PREPROCESS} \
--feature_type='raw' \
--micro_enable_pcan 1 \
--micro_min_signal_remaining 0.05 \
--micro_out_scale 1 \
--micro_features_scale 0.0390625 \
--resample 0.1 \
--alsologtostderr \
--train 1 \
--wanted_words={WANTED_WORDS} \
--use_spec_augment 0 \
--time_masks_number 2 \
--time_mask_max_size 20 \
--frequency_masks_number 2 \
--frequency_mask_max_size 3 \
--pick_deterministically 1 \
--return_softmax 1 \
ds_cnn \
--cnn1_kernel_size "(10,4)" \
--cnn1_dilation_rate "(1,1)" \
--cnn1_strides "(1,2)" \
--cnn1_padding "valid" \
--cnn1_filters 16 \
--cnn1_act 'relu' \
--bn_momentum 0.98 \
--bn_center 1 \
--bn_scale 0 \
--bn_renorm 0 \
--dw2_kernel_size '(3,3),(3,3),(3,3)' \
--dw2_dilation_rate '(1,1),(2,2),(2,2)' \
--dw2_strides '(1,1),(1,1),(1,1)' \
--dw2_padding "valid" \
--dw2_act "'relu','relu','relu'" \
--cnn2_filters '16,16,16' \
--cnn2_act "'relu','relu','relu'" \
--dropout1 0.2 

Use the following to train the non-streaming BC-Resnet model

In [None]:
!python -m kws_streaming.train.model_train_eval \
--data_url={DATA_URL} \
--data_dir={DATASET_DIR} \
--train_dir={TRAIN_DIR} \
--split_data 0 \
--mel_upper_edge_hertz 7500.0 \
--mel_lower_edge_hertz 125.0 \
--silence_percentage={SILENT_PERCENTAGE} \
--unknown_percentage={UNKNOWN_PERCENTAGE} \
--background_frequency={BACKGROUND_FREQUENCY} \
--background_volume={BACKGROUND_VOLUME_RANGE} \
--how_many_training_steps 20000,20000,20000,20000 \
--learning_rate 0.001,0.0005,0.0001,0.00002 \
--window_size_ms={WINDOW_SIZE_MS} \
--window_stride_ms={WINDOW_STRIDE} \
--clip_duration_ms={CLIP_DURATION_MS} \
--mel_num_bins 40 \
--dct_num_features 40 \
--preprocess={PREPROCESS} \
--feature_type='raw' \
--micro_enable_pcan 1 \
--micro_min_signal_remaining 0.05 \
--micro_out_scale 1 \
--micro_features_scale 0.0390625 \
--resample 0.1 \
--alsologtostderr \
--train 1 \
--wanted_words={WANTED_WORDS} \
--use_spec_augment 0 \
--time_masks_number 2 \
--time_mask_max_size 20 \
--frequency_masks_number 2 \
--frequency_mask_max_size 3 \
--pick_deterministically 1 \
--return_softmax 1 \
bc_resnet \
--sub_groups 5 \
--last_filters 32 \
--first_filters 16 \
--paddings 'causal' \
--dilations '(1,1),(2,1),(4,1),(8,1)' \
--strides '(1,1),(1,2),(1,2),(1,1)' \
--blocks_n '2, 2, 4, 4' \
--filters '6, 9, 12, 15' \
--dropouts '0.1, 0.1, 0.1, 0.1' \
--pools '1, 1, 1, 1' \
--max_pool 0

Use the following to convert a .tflite file to .cc file which can be used with TFLite Micro

In [None]:
!xxd -i input_model.tflite > model_data.cc