# Mobilenet V3 RGB - with dropout + regularization


In [1]:
%cd ~/africa_poverty_clean/
%load_ext autoreload
%autoreload 2
%matplotlib inline

/home/jupyter/africa_poverty_clean


In [2]:
import tensorflow as tf
import pandas as pd
import numpy as np
import os
from glob import glob
from batchers import dataset_constants, tfrecord_paths_utils
from models import processing
from models.loss import r2
from models.checkpoint import CustomModelCheckpoint
import datetime

2023-03-08 02:18:15.366809: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-08 02:18:15.538959: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/local/cuda/extras/CUPTI/lib64
2023-03-08 02:18:15.538990: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-03-08 02:18:15.578405: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS w

In [3]:
!pwd

/home/jupyter/africa_poverty_clean


### Constants

In [4]:
LAST_BEST_WEIGHT = 'models/checkpoints/MN_RGB_20230307-134630/model_epoch32.h5'

In [5]:
# DHS_TFRECORDS_PATH_ROOT = 'data/dhs_tfrecords/' #local
DHS_TFRECORDS_PATH_ROOT = 'gcs/dhs_tfrecords/' #VM
CSV_PATH = 'data/dhs_clusters.csv' 
CHECKPOINT_PATH = 'models/checkpoints/' 

CLUSTERS_DF = pd.read_csv(CSV_PATH, float_precision='high', index_col=False)
MEANS = dataset_constants._MEANS_DHS
STDS = dataset_constants._STD_DEVS_DHS
BATCH_SIZE = 8
DATASET = 'DHS_OOC_A'
SHUFFLE = 16
PREFETCH = 2
EPOCHS = 100 # CHANGE
STEPS_PER_EPOCH = 1474
VALIDATION_STEPS = 488 #has to be <= (size of val_ds / batch_size)  

### NOTE: (size of dataset / batch size) has to be >= steps_per_epoch * epochs!!!!!
### DHS_OOC_A's train, val, test =  11,797, 3,909, 3,963

In [6]:
def process_datasets(dataset, shuffle, batch_size, prefetch, epochs):
    ''' prepares train_ds, val_ds and test_ds'''
    
    train_tfrecord_paths = tfrecord_paths_utils.dhs_ooc(dataset, split="train")
    val_tfrecord_paths = tfrecord_paths_utils.dhs_ooc(dataset, split="val")
    test_tfrecord_paths = tfrecord_paths_utils.dhs_ooc(dataset, split="test")
    
    # for testing - comment out
#     train_tfrecord_paths = train_tfrecord_paths[:300]
#     val_tfrecord_paths = val_tfrecord_paths[300:400]
#     test_tfrecord_paths = test_tfrecord_paths[0:1]
    print(len(train_tfrecord_paths), len(val_tfrecord_paths), len(test_tfrecord_paths))

    train_ds = tf.data.TFRecordDataset(train_tfrecord_paths, compression_type="GZIP")
    val_ds = tf.data.TFRecordDataset(val_tfrecord_paths, compression_type="GZIP")
    test_ds = tf.data.TFRecordDataset(test_tfrecord_paths, compression_type="GZIP")

    # normalize and resize
    train_ds = train_ds.map(processing.process_tfrecords_rgb)
    val_ds = val_ds.map(processing.process_tfrecords_rgb)  
    test_ds = test_ds.map(processing.process_tfrecords_rgb)

    # train_ds = train_ds.map(processing.augment)
    # val_ds = val_ds.map(processing.augment)  
    # test_ds = test_ds.map(processing.augment)

    train_ds = train_ds.cache()
    train_ds = train_ds.shuffle(shuffle)
    train_ds = train_ds.batch(batch_size)
    train_ds = train_ds.repeat(epochs) # repeats the dataset for the number of epochs 
    train_ds = train_ds.prefetch(prefetch)

    val_ds = val_ds.cache()
    val_ds = val_ds.shuffle(shuffle)
    val_ds = val_ds.batch(batch_size)
    val_ds = val_ds.prefetch(prefetch)

    test_ds = test_ds.cache()
#     test_ds = test_ds.shuffle(shuffle)
    test_ds = test_ds.batch(batch_size)
    test_ds = test_ds.prefetch(prefetch)

    return train_ds, val_ds, test_ds

In [30]:
# def simple_model_instantiate():
#     input_tensor = tf.keras.Input(shape=(224, 224, 3), name = 'images')
#     x = tf.keras.layers.Conv2D(32, (3,3), padding='same', activation='relu', name='conv_layer_1')(input_tensor)
#     x = tf.keras.layers.MaxPooling2D(pool_size=(2,2), name='maxpool_1')(x)
#     x = tf.keras.layers.Conv2D(64, (3,3), padding='same', activation='relu', name='conv_layer_2')(x)
#     x = tf.keras.layers.MaxPooling2D(pool_size=(2,2), name='maxpool_2')(x)
#     x = tf.keras.layers.Conv2D(128, (3,3), padding='same', activation='relu', name='conv_layer_3')(x)
#     x = tf.keras.layers.MaxPooling2D(pool_size=(2,2), name='maxpool_3')(x)
#     x = tf.keras.layers.Flatten()(x)
#     x = tf.keras.layers.Dense(64, activation='relu')(x)
#     predictions = tf.keras.layers.Dense(1, activation='linear')(x)
#     model = tf.keras.Model(inputs=input_tensor, outputs=predictions)
#     adam = tf.keras.optimizers.Adam(
#         learning_rate=0.001,
#     )
#     model.compile(loss='mse', optimizer=adam, metrics=['mse', 'mae', r2])
    
#     print(model.summary())
    
#     return model

In [31]:
# def EN_instantiate():
#     ''' initiates EfficientNetB0 model and prints model summary '''

#     input_tensor = tf.keras.Input(shape=(224, 224, 3), name = 'images')
#     EN_model = tf.keras.applications.efficientnet.EfficientNetB0(include_top = False, input_tensor = input_tensor)    
#     x = EN_model.output
#     x = tf.keras.layers.Flatten()(x)
#     x = tf.keras.layers.Dense(256, activation='relu')(x)
#     predictions = tf.keras.layers.Dense(1, activation='linear')(x)
#     model = tf.keras.Model(inputs=EN_model.input, outputs=predictions)
    
#     adam = tf.keras.optimizers.Adam(
#         learning_rate=0.0001,
#     )
#     model.compile(loss='mse', optimizer=adam, metrics=['mse', 'mae', r2])
#     print(model.summary())
    
#     return model

In [7]:
def mobilenet_instantiate(last_best_weight = None, dropout_rate=None):
    ''' initiates mobilenet model and prints model summary '''

    input_tensor = tf.keras.Input(shape=(224, 224, 3), name = 'images')
    mn_model = tf.keras.applications.MobileNetV3Small(
        include_top = False, 
        input_tensor = input_tensor,
        dropout_rate = dropout_rate
    )    
    x = mn_model.output
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(256, activation='relu')(x)
    predictions = tf.keras.layers.Dense(1, activation='linear')(x)
    model = tf.keras.Model(inputs=mn_model.input, outputs=predictions)
    
    l2 = tf.keras.regularizers.l2(.01)
    for layer in model.layers:
        if isinstance(layer, tf.keras.layers.Conv2D):
            model.add_loss(lambda layer=layer: l2(layer.kernel))
    
    if last_best_weight:
        model.load_weights(last_best_weight)
    
    adam = tf.keras.optimizers.Adam(
        learning_rate=0.0001
    )
    model.compile(loss='mse', optimizer=adam, metrics=['mse', 'mae', r2])
    print(model.summary())
    
    return model

In [13]:
def train_val(model, train_ds, val_ds, checkpoint_path, epochs, steps_per_epoch, validation_steps):

    now = datetime.datetime.now()
    date_time = now.strftime("%Y%m%d-%H%M%S")
    checkpoint_path = os.path.join(checkpoint_path, f"MN-reg_RGB_{date_time}")
    
    # creates a folder inside models/checkpoints for checkpoints and csv to be saved
    # folder name: date_time 
    if not os.path.exists(checkpoint_path):
        os.makedirs(checkpoint_path)
        
    # saves checkpoint at the end of every epoch if val_loss has improved vs the previous epoch 
    print(f"checkpoint_path: {checkpoint_path}")
    cp_callback = CustomModelCheckpoint(
        filepath=checkpoint_path,
        save_best_only=True,
        verbose=1
    )
    
    # logs the outcome of every epoch in a csv file
    csvpath = os.path.join(checkpoint_path, 'model_history_log.csv')
    print(f"csvpath: {csvpath}")
    with open(csvpath, 'a', encoding='utf-8') as f:
        csv_logger = tf.keras.callbacks.CSVLogger(
            csvpath,
            separator=',',
            append=True
        )
    
    hist = model.fit(
        train_ds, epochs=epochs, validation_data=val_ds,
        callbacks=[cp_callback, csv_logger], verbose=1, 
        steps_per_epoch=steps_per_epoch, validation_steps=validation_steps
    )
    
    return trained_model, hist.history, hist.params

In [9]:
train_ds, val_ds, test_ds = process_datasets(DATASET, SHUFFLE, BATCH_SIZE, PREFETCH, EPOCHS)

11797 3909 3963
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'


2023-03-08 02:20:41.547906: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/local/cuda/extras/CUPTI/lib64
2023-03-08 02:20:41.547979: W tensorflow/stream_executor/cuda/cuda_driver.cc:263] failed call to cuInit: UNKNOWN ERROR (303)
2023-03-08 02:20:41.548003: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (pov-map-1677631110): /proc/driver/nvidia/version does not exist
2023-03-08 02:20:41.548596: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [35]:
# for i in train_ds.take(1):
#     print(i)

In [14]:
model = mobilenet_instantiate(last_best_weight=LAST_BEST_WEIGHT, dropout_rate=.15)

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 images (InputLayer)            [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 rescaling_1 (Rescaling)        (None, 224, 224, 3)  0           ['images[0][0]']                 
                                                                                                  
 Conv (Conv2D)                  (None, 112, 112, 16  432         ['rescaling_1[0][0]']            
                                )                                                                 
                                                                                            

In [15]:
model

<keras.engine.functional.Functional at 0x7fbfd906f610>

In [None]:
trained_model, history, params = train_val(model, train_ds, val_ds, CHECKPOINT_PATH, EPOCHS, STEPS_PER_EPOCH, VALIDATION_STEPS)

checkpoint_path: models/checkpoints/MN-reg_RGB_20230308-022119
csvpath: models/checkpoints/MN-reg_RGB_20230308-022119/model_history_log.csv
Epoch 1/100
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
  23/1474 [..............................] - ETA: 1:18:46 - loss: 51.0388 - mse: 0.0155 - mae: 0.0942 - r2: 0.9731

---