## Import list

In [1]:
import os
import re
import pandas as pd
import numpy as np
import gc

import tensorflow as tf
from tensorflow.keras import optimizers, callbacks,models,layers
from keras.applications.efficientnet import EfficientNetB0
import matplotlib.pyplot as plt
import tensorflow_addons as tfa


## Global Parameters

In [2]:
damage_path = '../data/damage_csv'
images_path = '../data/ASONAM17_Damage_Image_Dataset/'
IMG_SIZE = 224

## Not needeed

In [3]:
import wandb
from wandb.keras import WandbCallback
wandb.init(project="earthquake_class_project", entity="gparaison")

[34m[1mwandb[0m: W&B API key is configured (use `wandb login --relogin` to force relogin)
  warn("The `IPython.html` package has been deprecated since IPython 4.0. "


## GPU Configuration

In [4]:
# Uncomment those lines if you have gpu.
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

2022-02-04 17:05:55.921148: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-04 17:05:55.990477: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-04 17:05:55.990634: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


## Function to create the csv files that will be used to create the dataset
**Will create train.csv, dev.csv, test.csv files inside the event folder**
1. A folder will be created using the event name
2. In that folder the following files will be created.
3. The even_list_dir arguments should contains the event folders you want to include in this study


In [5]:
def create_relevance_dataset(damage_csv_path, event, event_list_dir):
    #Create output directory
    try:
        os.mkdir(os.path.join(damage_csv_path, event))
    except FileExistsError:
        print(f"{os.path.join(damage_csv_path, event)} is already there")    

    # Output directory
    output_direct = os.path.join(damage_csv_path, event)

    pd_dev = pd.DataFrame()
    pd_train = pd.DataFrame()
    pd_test = pd.DataFrame()
    
    #Concatenate csv files from different events in the event_list_directory
    for folder in event_list_dir:
        current_dir = os.path.join(damage_csv_path, folder)
        for file in os.listdir(current_dir):        
            if file.endswith('.csv'):
                file_path = os.path.join(current_dir, file)
                temp_df = pd.read_csv(file_path)
                temp_df.columns = ['path','label']
                if file.startswith('dev.'):                 
                    if pd_dev.empty:
                        pd_dev = temp_df
                    else:
                        pd_dev = pd_dev.append(temp_df, ignore_index=True)
                elif file.startswith('train.'):
                    if pd_train.empty: 
                        pd_train = temp_df
                    else:
                        pd_train = pd_train.append(temp_df, ignore_index=True)
                elif file.startswith('test.'):
                    if pd_test.empty:
                        pd_test = temp_df
                    else:
                        pd_test = pd_test.append(temp_df, ignore_index=True)
                else:
                    pass
    #Creating target label regarding if an image is relevant to an eartquake or not
    pd_dev['label'] = pd_dev['label'].apply(lambda x: 1 if x > 0 else 0)
    pd_train['label'] = pd_train['label'].apply(lambda x: 1 if x > 0 else 0)
    pd_test['label'] = pd_test['label'].apply(lambda x: 1 if x > 0 else 0)

    #writting the csv files to local disk
    dev_filename = os.path.join(output_direct, "dev.csv")
    if not os.access(dev_filename, os.F_OK) and len(pd_dev)>0:
        pd_dev.to_csv(dev_filename, index=False, header=False)
        print(f'{dev_filename} created')
    else:
        print(f'{dev_filename} already created or no data found')
        
    train_filename = os.path.join(output_direct, "train.csv")
    if not os.access(train_filename, os.F_OK) and len(pd_train) > 0:
        pd_train.to_csv(train_filename, index=False, header=False)
        print(f'{train_filename} created')
    else:
        print(f'{train_filename} already created or no data found')

    test_filename = os.path.join(output_direct, "test.csv")
    if not os.access(test_filename, os.F_OK) and len(pd_test) > 0:
        pd_test.to_csv(test_filename, index=False, header=False)
        print(f'{test_filename} created')
    else:
        print(f'{test_filename} already created or no data found')

## Function which will create the dataset

In [6]:
IMG_SIZE = 224
damage_path = '../data/damage_csv'
images_path = '../data/ASONAM17_Damage_Image_Dataset/'
from tensorflow.keras.preprocessing.image import ImageDataGenerator

data_augmentation_layer = tf.keras.Sequential([
                                  layers.RandomFlip("horizontal_and_vertical"),
                                  layers.RandomRotation(0.2),
                                  layers.RandomCrop(IMG_SIZE,IMG_SIZE),
                                  layers.RandomContrast(factor=0.8)
])


def create_dataset(damage_path,event,is_augment=False,batch_size=32,buffer_size=100):

    label_path = os.path.join(damage_path,event)
    
    img_gen = ImageDataGenerator(rescale=1/255.0,)
        
    train_df = pd.read_csv(os.path.join(label_path,'train.csv'),header=None)
    train_df.columns = ['path','label']

    train_gen = img_gen.flow_from_dataframe(dataframe = train_df,
                                directory=images_path,
                                x_col='path',
                                y_col='label',
                                class_mode='raw',          
                                batch_size=batch_size,
                                target_size= (IMG_SIZE,IMG_SIZE))



    valid_df = pd.read_csv(os.path.join(label_path,'dev.csv'),header=None)
    valid_df.columns = ['path','label']

    valid_gen = img_gen.flow_from_dataframe(dataframe = valid_df,
                                directory=images_path,
                                x_col='path',
                                y_col='label',
                                class_mode='raw',         
                                batch_size=batch_size,
                                target_size= (IMG_SIZE,IMG_SIZE))
    
    test_df = pd.read_csv(os.path.join(label_path,'test.csv'),header=None)
    test_df.columns = ['path','label']

    test_gen = img_gen.flow_from_dataframe(dataframe = test_df,
                                directory=images_path,
                                x_col='path',
                                y_col='label',
                                class_mode='raw',         
                                batch_size=batch_size,
                                target_size= (IMG_SIZE,IMG_SIZE)) 
      
    # Now we're converting our ImageDataGenerator to Dataset

    train_dataset = tf.data.Dataset.from_generator(
            lambda: train_gen ,  # Our generator 
            output_types = (tf.float32 , tf.float32) , # How we're expecting our output dtype
            output_shapes = ([None , IMG_SIZE , IMG_SIZE , 3] , [None , ]) # How we're expecting our output shape
        )

    valid_dataset = tf.data.Dataset.from_generator(
            lambda: valid_gen , 
            output_types = (tf.float32 , tf.float32), 
            output_shapes = ([None , IMG_SIZE , IMG_SIZE , 3] , [None , ])
        )
    
    test_dataset = tf.data.Dataset.from_generator(
            lambda: test_gen , 
            output_types = (tf.float32 , tf.float32), 
            output_shapes = ([None , IMG_SIZE , IMG_SIZE , 3] , [None , ])
        )

    if is_augment:
        train_dataset = train_dataset.map(lambda x,y: (data_augmentation_layer(x,training=True),y),
                                num_parallel_calls=tf.data.AUTOTUNE)
            
    print(f"steps_per_epochs: {len(train_df)// batch_size}")
    print(f"validations_steps: {len(valid_df)// batch_size}")
        
        
    steps_per_epoch = len(train_df)// batch_size
    validation_steps = len(valid_df)// batch_size


    train_dataset = train_dataset.prefetch(buffer_size=10)
    valid_dataset = valid_dataset.prefetch(buffer_size=10)

    return train_dataset,valid_dataset,test_dataset,steps_per_epoch,validation_steps

2022-02-04 17:06:03.493746: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-02-04 17:06:03.494551: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-04 17:06:03.494758: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-04 17:06:03.494885: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

## Here we decided to use those events in our study. The data from those events will be combined
- The event name is cross_event_relevance

In [7]:
event_list_dir = ['ecuador', 'nepal', 'gg']
event = 'cross_event_relevance'


## Actual creation of the dataset calling our previously defined functions

In [8]:
create_relevance_dataset(damage_path, event, event_list_dir)

train_ds, valid_ds,test_ds,steps_per_epoch,validation_steps, = create_dataset( damage_path,event,
                                    is_augment=False,batch_size=32,buffer_size=100)

../data/damage_csv/cross_event_relevance is already there
../data/damage_csv/cross_event_relevance/dev.csv already created or no data found
../data/damage_csv/cross_event_relevance/train.csv already created or no data found
../data/damage_csv/cross_event_relevance/test.csv already created or no data found
Found 14632 validated image filenames.
Found 4876 validated image filenames.
Found 4874 validated image filenames.
steps_per_epochs: 457
validations_steps: 152


## Function to plot history

In [9]:
def subplot_learning_curve_d(history):
    #plt.clf()
    plt.figure(figsize=(10,5))
    for i,metric in enumerate(['acc','loss']):
        plt.subplot(1,2,i+1)
        plt.plot(history.history[metric])
        plt.plot(history.history['val_' + metric])
        plt.xlabel('Epochs')
        plt.ylabel(metric)
        plt.legend((metric, 'val_' + metric))
        plt.title(": Learning curve " + metric + " vs " + 'val_' + metric)
    plt.show()

## Effiecient Net model feature extraction

In [10]:
from keras.applications.efficientnet import EfficientNetB0
def get_efficient_model(lr=0.001):
    tf.keras.backend.clear_session()
    print(f"lr in model = {lr}")
    pre_trained_model = EfficientNetB0(include_top=False,
                                       weights='imagenet',
                                       input_shape=(IMG_SIZE,IMG_SIZE,3))

    pre_trained_model.trainable = False

    inputs = layers.Input(shape=(IMG_SIZE,IMG_SIZE,3))
    x = tf.keras.applications.efficientnet.preprocess_input(inputs * 255.0)
    x = pre_trained_model(x,training= False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(256,activation='relu')(x)
    outputs = layers.Dense(1,activation='sigmoid')(x)

    efficient_model = models.Model(inputs,outputs)
    efficient_model.compile(optimizer = optimizers.RMSprop(learning_rate=lr),
                            loss='binary_crossentropy',
                            metrics=['acc'])

    efficient_model.summary()
    
    return efficient_model

## Running the model only Feature Extraction

In [None]:
EfficientNetB0_h = get_efficient_model()
n_epochs = 20
train_ds, valid_ds,test_ds,steps_per_epoch,validation_steps, = create_dataset(damage_path,event,
                                        is_augment=False,batch_size=20,buffer_size=100)

history = EfficientNetB0_h.fit(train_ds,
                        initial_epoch=n_epochs,
                        epochs=2*n_epochs,
                        steps_per_epoch = steps_per_epoch,
                        validation_data=valid_ds,
                        validation_steps = validation_steps)


subplot_learning_curve_d(history)

lr in model = 0.001
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 tf.math.multiply (TFOpLambd  (None, 224, 224, 3)      0         
 a)                                                              
                                                                 
 efficientnetb0 (Functional)  (None, 7, 7, 1280)       4049571   
                                                                 
 global_average_pooling2d (G  (None, 1280)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dropout (Dropout)           (None, 1280)              0         
                                                                 
 dense (Dense)               (None, 256) 

2022-02-04 17:06:47.530007: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8302

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.
2022-02-04 17:06:48.967404: W tensorflow/stream_executor/gpu/asm_compiler.cc:230] Falling back to the CUDA driver for PTX compilation; ptxas does not support CC 8.6
2022-02-04 17:06:48.967413: W tensorflow/stream_executor/gpu/asm_compiler.cc:233] Used ptxas at ptxas
2022-02-04 17:06:48.967922: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] UNIMPLEMENTED: ptxas ptxas too old. Falling back to the driver to compile.
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.


  8/731 [..............................] - ETA: 18s - loss: 0.5663 - acc: 0.7312

2022-02-04 17:06:49.276519: I tensorflow/stream_executor/cuda/cuda_blas.cc:1774] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40