# Creating a CNN to analyse Facies data

Based on the research in the following article: 

In [263]:
# from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

from tensorflow import feature_column
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer

from statistics import mean
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, normalization, Convolution1D
from keras.callbacks import History
from keras.utils import np_utils
from keras.callbacks import History
from sklearn import metrics
from classification_utilities import display_cm

### Load data and replace missing values

In [264]:
# Load data

facies_data = pd.read_csv('./datasets/facies_vectors.csv')
test_data = pd.read_csv('./datasets/validation_data_with_facies_new.csv')
X_test = test_data.drop('Facies', axis=1)
y_test = test_data['Facies']

X_test = X_test.drop('Unnamed: 0', axis=1)

# Parameters

non_feature_columns = ['Formation', 'Well Name', 'Depth']
feature_names = ['Facies', 'Formation', 'Well Name', 'Depth', 'GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'PE', 'NM_M', 'RELPOS']
facies_names = ['SS', 'CSiS', 'FSiS', 'SiSh', 'MS', 'WS', 'D', 'PS', 'BS']
facies_colors = ['#F4D03F', '#F5B041','#DC7633','#6E2C00', '#1B4F72','#2E86C1', '#AED6F1', '#A569BD', '#196F3D']

# Fill 'PE' missing values with mean
if facies_data['PE'].isnull().any():
    facies_data['PE'] = facies_data['PE'].fillna(value=facies_data['PE'].mean())

# Store features and labels
train = facies_data #.drop(non_feature_columns, axis=1)

test = test_data #.drop(non_feature_columns, axis=1)

# Store well labels and depths
well = facies_data['Well Name']
depth = facies_data['Depth']


#train = pd.DataFrame(imp.transform(train), columns=feature_names)
#validate = pd.DataFrame(imp.transform(validate), columns=feature_names)
#test = pd.DataFrame(imp.transform(test), columns=feature_names)


In [265]:
X_test.shape, y_test.shape, train.shape

((830, 10), (830,), (4149, 11))

In [None]:

# Convert facies class to one-hot-vector representation
num_classes = train['Facies'].unique().size
y_train = np_utils.to_categorical(train['Facies'].values-1, num_classes)

# Window around central value and define the seven features we are using
window_width = 15
feature_list = ['GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'PE', 'NM_M', 'RELPOS']
X_train = prepare_feature_vectors(train, feature_list, window_width)
X_test = prepare_feature_vectors(test, feature_list, window_width)

num_train_samples = np.asarray(np.shape(X_train))[0]
num_test_samples = np.asarray(np.shape(X_test))[0]

print('Training Samples=', num_train_samples, '   Test Samples=', num_test_samples)

In [None]:
# define neural network to classify facies
num_filters = 12
dropout_prob = 0.6

convnet = Sequential()
convnet.add(Convolution1D(num_filters, 1, border_mode='valid',
                          input_shape=(window_width, len(feature_list))))
convnet.add(Activation('relu'))
convnet.add(Convolution1D(7, 1, border_mode='valid'))
convnet.add(Activation('relu'))
convnet.add(Convolution1D(num_filters, 3, border_mode='valid'))
convnet.add(Activation('relu'))
convnet.add(Dropout(dropout_prob / 2))

convnet.add(Flatten())
convnet.add(Dense(4 * num_filters))
convnet.add(normalization.BatchNormalization())
convnet.add(Activation('sigmoid'))
convnet.add(Dropout(dropout_prob))

convnet.add(Dense(num_classes, activation='softmax'))
convnet.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy'])
convnet.summary()

# save initial weights
initial_weights = convnet.get_weights()

In [None]:


# define training parameters and prepare arrays to store training metrics
epochs_per_fold = 1000
num_fold = 6
roll_stride = np.ceil(num_train_samples/num_fold).astype(int)

convnet_hist = History()
hist = np.zeros((4, num_fold, epochs_per_fold))
f1scores = np.zeros(num_fold)
y_test_ohv = np.zeros((num_test_samples, num_fold, num_classes))


# shuffle input data
#rand_perm = np.random.permutation(num_train_samples)
#X_train = X_train[rand_perm]
#Y_train = Y_train[rand_perm]


In [None]:
# use 6-fold cross validation and train 6 neural networks, ending up with 6 sets of predictions
for i in np.arange(num_fold):
    convnet.set_weights(initial_weights)
    X_train = np.roll(X_train, i*roll_stride, axis=0)
    Y_train = np.roll(Y_train, i*roll_stride, axis=0)

    convnet.fit(X_train, y_train, batch_size=200, epochs=epochs_per_fold, verbose=0,
                validation_split=1.0/num_fold, callbacks=[convnet_hist])

    hist[:, i, :] = [convnet_hist.history['accuracy'], convnet_hist.history['val_accuracy'],
                     convnet_hist.history['loss'], convnet_hist.history['val_loss']]

    Y_predict = 1 + np.argmax(convnet.predict(X_train), axis=1)
    f1scores[i] = metrics.f1_score(1 + np.argmax(y_train, axis=1), Y_predict, average='micro')
    print('F1 Score =', f1scores[i])

    Y_test_ohv[:, i, :] = convnet.predict(X_test)
    
print('Average F1 Score =', np.mean(f1scores))

In [None]:
# Plot callbacks
hist = np.reshape(hist, (4, num_fold * epochs_per_fold))
plt.plot(hist[0]); plt.plot(hist[1])
plt.legend(['train', 'val'], loc='lower left')