# Import necessary Python libraries

In [None]:
import segyio
from segysak.segy import segy_header_scan
from IPython.display import display

In [None]:
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import colors
import matplotlib.pyplot as plt
import seaborn as sns
import plotly as px
import plotly.graph_objects as go
import plotly.express as px
from pylab import figure, cm

In [None]:
from sklearn import metrics
from sklearn import neighbors
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.metrics._plot.confusion_matrix import ConfusionMatrixDisplay
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from itertools import product

In [None]:
import warnings
warnings.filterwarnings('ignore')

import scipy as sp
import re
from IPython.display import display
from mpl_toolkits.axes_grid1 import make_axes_locatable


# Data Import

The following 3D volumes are present in the dataset:
1. Facies
2. Seismic volume
3. Relative Acoustic Impedance (RelAI)
4. Instantaneous Frequency
5. Envelope
6. Seismic Inversion

In [None]:
facies = segyio.tools.cube('/data/Facies_Cube _case_4.segy')
seis = segyio.tools.cube('/data/Seismic_Cube_Ormsby_10_60_case_4.segy')
relai = segyio.tools.cube('/data/RelAcImpedance_10_60_Cube_case_4.segy')
inst_freq = segyio.tools.cube('/data/InstFs_10_60_Cube_case_4.segy')
envel = segyio.tools.cube('/data/Envelope_10_60_Cube_case_4.segy')
seis_inv = segyio.tools.cube('/data/Impedance_10_60_Cube_case_4.segy')


# Exploratory Data Analysis & Visualization

## Cleaning the dataset

The data is equal to -1 outside of the zone of interest. Hence, we will remove those data points.

In [None]:
facies = np.where(facies==-1, np.nan, facies)
seis = np.where(seis==-1, np.nan, seis)
relai= np.where(relai==-1, np.nan, relai)
inst_freq= np.where(inst_freq==-1, np.nan, inst_freq)
envel = np.where(envel==-1, np.nan, envel)
seis_inv = np.where((seis_inv==-1), np.nan, seis_inv)

Let us inspect the shape of the dataset, to ensure consistency. 

In [None]:
facies.shape

In [None]:
seis_inv.shape

We will remove the additional row of the Seismic Inverted volume. 

In [None]:
seis_inv_1 = np.delete(seis_inv, -1, axis=-1)
seis_inv_1.shape

## Plotting 3D cubes

In [None]:
def plot_3D_cube(data, color_map='facies', number_of_facies=5, number_of_wells=7):
    x = np.indices(data.shape)[0]
    y = np.indices(data.shape)[1]
    z = np.indices(data.shape)[2]
    col = data.flatten()

    # 3D Plot
    fig = plt.figure(figsize=(10, 6))
    ax3D = fig.add_subplot(projection='3d')
    # ax3D = plt.axes(projection='3d')



    # We will extract 7 wells for training, such that they don't cross any fault zones.
    start_x = [10, 60, 30, 25, 65, 50, 21]
    start_y = [15, 15, 20, 58, 60, 50, 35]
    start_z = [700, 700, 700, 700, 700, 700, 700]

    end_x = [10, 60, 30, 25, 65, 50, 21]
    end_y = [15, 15, 20, 58, 60, 50, 35]
    end_z  =[0, 0, 0, 0, 0, 0, 0]

    ax3D.set_xlabel('x')
    ax3D.set_ylabel('y')
    ax3D.set_zlabel('z')

    if number_of_wells == 7:
        for well in range(number_of_wells):
            ax3D.plot([start_x[well], end_x[well]], [start_y[well], end_y[well]],  zs=[start_z[well], end_z[well]], color='black', linewidth = 1)

    elif number_of_wells == None:
        pass


    if color_map == 'facies':

        if number_of_facies == 5:
            facies_name = ['','Coarse Sand', 'Sand', 'Fine Sand', 'Shale']
            facies_color = ['#FFFFFF' ,'#E69076', '#FFFF00', '#FFCC00', '#A6A6A6', '#8080FF']
            cmap = matplotlib.colors.ListedColormap(facies_color)
            bounds = [-1.5, -0.5, 0.5, 1.5, 2.5, 3.5, 4.5]
            norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N)

            p3d = ax3D.scatter(x, y, z, c=data, cmap=cmap)
            fig.colorbar(p3d)


        if number_of_facies == 4:
            facies_name = ['Coarse Sand', 'Sand', 'Fine Sand', 'Shale']
            facies_color = ['#E69076', '#FFFF00', '#FFCC00', '#A6A6A6', '#8080FF']
            cmap = matplotlib.colors.ListedColormap(facies_color)
            bounds = [-0.5, 0.5, 1.5, 2.5, 3.5, 4.5]
            norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N)

            p3d = ax3D.scatter(x, y, z, c=data, cmap=cmap)
            fig.colorbar(p3d, ticks=np.arange(0, 5))

    elif color_map == 'relai':

        p3d = ax3D.scatter(x, y, z, c=data, cmap='jet')
        fig.colorbar(p3d)

    else:

        p3d = ax3D.scatter(x, y, z, c=data, cmap='jet')
        fig.colorbar(p3d)

    plt.show()

In [None]:
plot_3D_cube(data=facies, color_map='facies', number_of_facies=5, number_of_wells=None)

## Statistical Analysis

In [None]:
# Histograms
fig, ax = plt.subplots(2, 3, figsize=(10, 6))
# fig, ax = plt.subplots(2, 3)


# Seismic
ax[0, 0].hist(seis.flatten(), weights=np.ones(len(seis.flatten())) / len(seis.flatten()))
ax[0, 0].set_title('Seismic')

# Seismic Inv
ax[0, 1].hist(seis_inv_1.flatten(), weights=np.ones(len(seis_inv_1.flatten())) / len(seis_inv_1.flatten()))
ax[0, 1].set_title('Seismic Inversion')

# Rel AI
ax[0, 2].hist(relai.flatten(), weights=np.ones(len(relai.flatten())) / len(relai.flatten()))
ax[0, 2].set_title('Relative AI')

# Inst Freq
ax[1, 0].hist(inst_freq.flatten(), weights=np.ones(len(inst_freq.flatten())) / len(inst_freq.flatten()))
ax[1, 0].set_title('Inst. Frequency')

# Envelope
ax[1, 1].hist(envel.flatten(), weights=np.ones(len(envel.flatten())) / len(envel.flatten()))
ax[1, 1].set_title('Envelope')
ax.flatten()[4].set_ylabel('Envelope, %')

# # Facies
ax[1, 2].hist(facies.flatten(), weights=np.ones(len(facies.flatten())) / len(facies.flatten()))
ax[1, 2].set_title('Facies')


for i in range(6):
    ax.flatten()[i].set_ylabel('Proportion, %')

fig.subplots_adjust(left=0.1, right=0.99, bottom=0.05, top=0.9,
                    hspace=0.5, wspace=0.5)


plt.show()

# Feature Scaling & Standardization

In [None]:
def standartization_3D(feature_3D_array):

    # Flatten 3D array (to 1D)
    feature_array_1d = feature_3D_array.reshape(-1)

    # Calculate STD
    std_all = np.nanstd(feature_array_1d)

    # Calculate Mean
    mean_all = np.nanmean(feature_array_1d)

    # Apply Standartization
    feature_std_3D = (feature_3D_array - mean_all) / std_all

    return feature_std_3D

In [None]:
relai_std = standartization_3D(relai)
relai_std.shape

In [None]:
seis_inv_std = standartization_3D(seis_inv_1)
seis_inv_std.shape

In [None]:
inst_freq_std = standartization_3D(inst_freq)
inst_freq_std.shape

In [None]:
envel_std = standartization_3D(envel)
envel_std.shape

In [None]:
seis_std = standartization_3D(seis)
seis_std.shape

Select 7 wells for training the models, such that the wells don't cross over fault zones.

In [None]:
start_x = [10, 60, 30, 25, 65, 50, 21]
start_y = [15, 15, 20, 58, 60, 50, 35]
start_z = [660, 660, 660, 660, 660, 660, 660]

end_x = [10, 60, 30, 25, 65, 50, 21]
end_y = [15, 15, 20, 58, 60, 50, 35]
end_z  =[0, 0, 0, 0, 0, 0, 0]

Using the well path coordinates, extract the properties for those wells. 

In [None]:
def extract_wells_with_data_3D(data_3D, x_coord_wells, y_coord_wells):
    wells_list = []

    for well_x in x_coord_wells:
        for well_y in y_coord_wells:

            each_well = data_3D[x_coord_wells, y_coord_wells, :]

    wells_list.append(each_well)

    # Convert to list and Remove nan from well_all
    well_3d_array = np.asarray(wells_list)
    well_2d_array = np.reshape(well_3d_array, (len(x_coord_wells), data_3D.shape[2]))
    # wells_1d_array = well_2d_array.flatten()

    return well_2d_array

In [None]:
seis_inv_std_wells = extract_wells_with_data_3D(seis_inv_std, start_x, start_y)
seis_inv_std_wells
df_seis_inv_std_wells = pd.DataFrame(seis_inv_std_wells.T)



In [None]:
relai_std_wells = extract_wells_with_data_3D(relai_std, start_x, start_y)
relai_std_wells
df_relai_std_wells = pd.DataFrame(relai_std_wells.T)


In [None]:
seis_std_wells = extract_wells_with_data_3D(seis_std, start_x, start_y)
seis_std_wells
df_seis_std_wells = pd.DataFrame(seis_std_wells.T)


In [None]:
inst_freq_std_wells = extract_wells_with_data_3D(inst_freq_std, start_x, start_y)
inst_freq_std_wells
df_inst_freq_std_wells = pd.DataFrame(inst_freq_std_wells.T)


In [None]:
envel_std_wells = extract_wells_with_data_3D(envel_std, start_x, start_y)
envel_std_wells
df_envel_std_wells = pd.DataFrame(envel_std_wells.T)


In [None]:
facies_wells = extract_wells_with_data_3D(facies, start_x, start_y)
facies_wells
df_facies_wells = pd.DataFrame(facies_wells.T)


In [None]:
np.unique(facies_wells)

In [None]:
print(relai_std_wells.shape)
print(seis_inv_std_wells.shape)
print(seis_std_wells.shape)
print(envel_std_wells.shape)
print(inst_freq_std_wells.shape)

Create a list with the required properties

In [None]:
feature_list_RelAI_Seis_Envel_InstFreq_SeisInv =    [df_relai_std_wells,
                                                    df_seis_std_wells,
                                                    df_envel_std_wells,
                                                    df_inst_freq_std_wells,
                                                    df_seis_inv_std_wells
                                                    ]
len(feature_list_RelAI_Seis_Envel_InstFreq_SeisInv)

In [None]:
def difference_map_3D(df_facies_comparison, facies_predicted, number_of_wells=7):
    d, e, f = np.shape(df_facies_comparison)
    facies_difference_map = np.zeros((d, e, f))
    facies_difference_map[:] = np.NaN


    # The difference map
    np_facies_fact = df_facies_comparison
    np_facies_predicted = facies_predicted


    facies_difference = np.subtract(np_facies_fact, np_facies_predicted)
    # np.unique(facies_difference)
    facies_difference_map = np.where(((facies_difference >= 1) | (facies_difference <= -1)),  1, facies_difference)


    x = np.indices(df_facies_comparison.shape)[0]
    y = np.indices(df_facies_comparison.shape)[1]
    z = np.indices(df_facies_comparison.shape)[2]
    col = df_facies_comparison.flatten()

    # 3D Plot
    fig = plt.figure(figsize=(8, 8))
    #fig1=plt.figure(figsize=(8,5))
    ax3D = fig.add_subplot(projection='3d')



    start_x = [10, 60, 30, 25, 65, 50, 21]
    start_y = [15, 15, 20, 58, 60, 50, 35]
    start_z = [700, 700, 700, 700, 700, 700, 700]

    end_x = [10, 60, 30, 25, 65, 50, 21]
    end_y = [15, 15, 20, 58, 60, 50, 35]
    end_z  =[0, 0, 0, 0, 0, 0, 0]

    ax3D.set_xlabel('x')
    ax3D.set_ylabel('y')
    ax3D.set_zlabel('z')

    for well in range(number_of_wells):
        ax3D.plot([start_x[well], end_x[well]], [start_y[well], end_y[well]],  zs=[start_z[well], end_z[well]], color='black', linewidth = 1)


    cmap = matplotlib.colors.ListedColormap(['green', 'red'])
    bounds = [-0.5, 0.5, 1.5]
    norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N)
    p3d = ax3D.scatter(x, y, z, c=facies_difference_map, cmap=cmap)
    # fig.colorbar(p3d, ticks=np.arange(0, 5))
    fig.colorbar(p3d, ticks=np.arange(0, 2))
    plt.show()

# Supervised ML models

## Data Preparation

In [None]:
def concat_features_RelAI_Seis_Envel_InstFreq_SeisInv(df_facies_wells, feature_list_RelAI_Seis_Envel_InstFreq_SeisInv):
    """The function concatenate features (RelAI, Seismic, Envelope, InstFreq) with Facies

    Args:
        df_facies_wells (DataFrame): Facies
        feature_list_RelAI_Seis_Envel_InstFreq (lisr): list of features

    Returns:
        facies_and_features (DataFrame): concatenated dataframe with facies and 5 features (Relative AI,
        Seismic, Envelope, Instant Frequency, Geological Time)
    """
    def create_empty_lists(a):
        for i in range(a):
            yield []

    XX0, XX1, XX2, XX3, XX4 = create_empty_lists(len(feature_list_RelAI_Seis_Envel_InstFreq_SeisInv))
    YY1 = []

    for every_well in range(len(df_facies_wells.columns)):

        X0 = feature_list_RelAI_Seis_Envel_InstFreq_SeisInv[0].iloc[:,every_well]
        X1 = feature_list_RelAI_Seis_Envel_InstFreq_SeisInv[1].iloc[:,every_well]
        X2 = feature_list_RelAI_Seis_Envel_InstFreq_SeisInv[2].iloc[:,every_well]
        X3 = feature_list_RelAI_Seis_Envel_InstFreq_SeisInv[3].iloc[:,every_well]
        X4 = feature_list_RelAI_Seis_Envel_InstFreq_SeisInv[4].iloc[:,every_well]

        Y = df_facies_wells.iloc[:,every_well]

        XX0.append(X0)
        XX1.append(X1)
        XX2.append(X2)
        XX3.append(X3)
        XX4.append(X4)

        YY1.append(Y)

    XX0 = pd.concat(XX0, ignore_index=False, axis=0)
    XX1 = pd.concat(XX1, ignore_index=False, axis=0)
    XX2 = pd.concat(XX2, ignore_index=False, axis=0)
    XX3 = pd.concat(XX3, ignore_index=False, axis=0)
    XX4 = pd.concat(XX4, ignore_index=False, axis=0)

    facies = pd.concat(YY1, ignore_index=False)

    features = pd.concat([XX0, XX1, XX2, XX3, XX4], axis=1)

    features = features.rename(columns = {0:'relai',
                                          1:'seis',
                                          2: 'envel',
                                          3: 'inst_freq',
                                          4: 'seis_inv'
                                          })
    features

    facies = pd.DataFrame(facies).rename(columns={0:'facies'})
    facies

    facies_and_features = pd.concat([facies, features], axis=1)
    facies_and_features = facies_and_features.dropna(axis=0)

    return facies_and_features

In [None]:
facies_features_3 = concat_features_RelAI_Seis_Envel_InstFreq_SeisInv(df_facies_wells, feature_list_RelAI_Seis_Envel_InstFreq_SeisInv)
facies_features_3

facies_and_features = facies_features_3.copy()
facies_and_features


In [None]:
facies_and_features['facies'].value_counts()

In [None]:
x_train, x_test, y_train, y_test = train_test_split(facies_and_features[['relai','seis', 'envel', 'inst_freq', 'seis_inv']],
                                                    facies_and_features[['facies']],
                                                    train_size=0.8,
                                                    random_state=123)

In [None]:
y_train.describe()

## Logistic Regression

In [None]:
model_LR = LogisticRegression()
model_LR.fit(x_train, y_train)

In [None]:
test_predict = model_LR.predict(x_test)

print(pd.crosstab(y_test.values.flatten(), test_predict))
print(classification_report(y_test.values.flatten(), test_predict))

In [None]:
def accuracy_score_cv(estimator, X, y, cv=10):

    from sklearn.model_selection import cross_val_score
    #Applying 10-fold cross validation
    accuracy_score_cv = cross_val_score(estimator=estimator, X=X, y=y, cv=cv)
    print("accuracy: ", np.mean(accuracy_score_cv))

    return np.mean(accuracy_score_cv)
accuracy_score_LR = accuracy_score_cv(estimator=model_LR, X=x_train, y=y_train, cv=10)
accuracy_score_LR

In [None]:
def feature_importance_plot(model, x_train, y_train, random_state):

    from sklearn.inspection import permutation_importance

    res = permutation_importance(model, x_train, y_train, scoring='accuracy', random_state=random_state)
    importance = res.importances_mean
    importance
    importance_res = pd.Series(importance, index=x_train.columns).sort_values(ascending=True)
    importance_res
    # Plot the results
    fig, ax = plt.subplots(figsize=(8,4))
    ax = importance_res.plot.barh()
    ax.set_title('Permutation importance', fontsize=14)
    ax.set_ylabel('Importance score', fontsize=14)
    ax.set_xlabel('Percentage, %', fontsize=12)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.grid()
    plt.show()
feature_importance_plot(model_LR, x_train, y_train, random_state=50)

In [None]:
def predict_facies_3D(facies, relai_std, seis_std, envel_std, inst_freq_std, seis_inv_std, model):
    r, c, b = np.shape(facies)

    map_facies = np.zeros((r, c, b))
    map_facies[:] = np.NaN
    map_facies_1 = map_facies.copy()
    df_f_comparison = map_facies.copy()

    # for every trace (column)
    for every_x in range(facies.shape[0]):
        for every_y in range(facies.shape[1]):

            new_df = pd.concat(
                        [pd.DataFrame(relai_std[every_x, every_y]),
                        pd.DataFrame(seis_std[every_x, every_y]),
                        pd.DataFrame(envel_std[every_x, every_y]),
                        pd.DataFrame(inst_freq_std[every_x, every_y]),
                        pd.DataFrame(seis_inv_std[every_x, every_y])]
                        , axis=1)



            new_features = new_df.dropna(axis=0)

            #new_features = new_df
            non_empty_facies = pd.DataFrame(facies[every_x, every_y, :]).dropna(axis=0)

            map_facies[every_x, every_y, new_features.index] = model.predict(new_features)


            # Filter
            map_facies_1[every_x, every_y, non_empty_facies.index] = map_facies[every_x, every_y, non_empty_facies.index]
            df_f_comparison[every_x, every_y, new_features.index] = facies[every_x, every_y, new_features.index]

    # df_facies_wells = df_wells_from_section(df_f_copy, col_30)

    map_facies
    map_facies_1
    return map_facies_1, df_f_comparison

In [None]:
facies_pred, df_facies_comparison = predict_facies_3D(facies=facies,

                                                    relai_std=relai_std,
                                                    seis_std=seis_std,
                                                    envel_std=envel_std,
                                                    inst_freq_std=inst_freq_std,
                                                    seis_inv_std=seis_inv_std,

                                                    model=model_LR)


In [None]:
plot_3D_cube(data=facies_pred, color_map='facies', number_of_facies=4, number_of_wells=7)

In [None]:
def confusion_matrix_3D(facies_pred, df_facies_comparison, model):

    from sklearn.metrics import f1_score
    from sklearn.metrics import precision_recall_fscore_support
    from sklearn import metrics
    from sklearn.metrics import classification_report
    from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
    from sklearn.metrics._plot.confusion_matrix import ConfusionMatrixDisplay

    actual_f = df_facies_comparison[~(np.isnan(df_facies_comparison))]
    predicted_f = facies_pred[~(np.isnan(facies_pred))]

    conf_matrix = metrics.confusion_matrix(actual_f, predicted_f)
    conf_matrix

    f1_score_per_class = f1_score(actual_f, predicted_f, average=None)
    accuracy_estimation = accuracy_score(actual_f, predicted_f)

    report_print = print(classification_report(actual_f, predicted_f))

    # Extract number of values of each class
    count_facies = np.unique(actual_f, return_counts=True)[1]

    # Plot confusion matrix
    display_conf_matrix = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=model.classes_)
    display_conf_matrix.plot()
    plt.show()

    return report_print, f1_score_per_class, count_facies, accuracy_estimation
report_print_LR, f1_score_per_class_LR, count_facies, accuracy_test_LR = confusion_matrix_3D(facies_pred,
                                                                                                df_facies_comparison,
                                                                                                model_LR)

In [None]:
accuracy_test_LR

In [None]:
difference_map_3D(df_facies_comparison, facies_pred, number_of_wells=7)

## Plot 2d section of the 3D cube

In [None]:
facies_pred_RF = facies_pred
facies_pred_RF.shape

In [None]:
df_facies_comparison_RF = df_facies_comparison
df_facies_comparison_RF.shape

In [None]:
facies_pred_RF[1].shape

In [None]:
facies_pred_RF_2D = facies_pred_RF[35, :, :]
facies_pred_RF_2D.shape

In [None]:
df_facies_comparison_RF_2D = df_facies_comparison_RF[35, :, :]
df_facies_comparison_RF_2D.shape

In [None]:
extent_plot = [0, 65, 0, 700]

In [None]:
def difference_map(df_facies_comparison, facies_predicted, extent, list_of_wells=None):
    """ Function to plot difference map between the ground-truth and predicted facies.

    Args:
        df_facies_comparison (2D array): ground truth facies
        facies_predicted (2D array): predicted facies from ML models
        extent (list): List with depth and trace numbers for the plots
        list_of_wells (list): the list with wells location. Defaults to None.

    Returns:
        nothing

    """


    import matplotlib.pyplot as plt
    import matplotlib.colors

    fig = plt.figure(figsize=(10, 4))
    ax = fig.add_subplot(1, 1, 1)

    extent = extent # define extent
    # The difference
    np_facies_fact = df_facies_comparison
    np_facies_predicted = facies_predicted

    facies_difference = np.subtract(np_facies_fact, np_facies_predicted)

    df1 = pd.DataFrame(facies_difference)
    df1 = df1.apply(np.sign).replace({-4:1, -3:1, -2:1, -1:1, 0:0,
                                1:1, 2:1, 3:1, 4:1
                                })
    facies_difference_result = np.array(df1)



    cmap = matplotlib.colors.ListedColormap(['green', 'red'])
    im = ax.imshow(facies_difference_result, cmap=cmap, vmin=0, vmax=1, aspect='auto', extent=extent)

    if list_of_wells==None:
        pass
    else:
        for well in list_of_wells:
            ax.axvline(x = well, linewidth = 1, color ='black')

    ax.set_xlabel('trace number', fontsize=14)
    ax.set_ylabel('TWT [ms]', fontsize=14)
    ax.set_title('Difference map', fontsize=16)
    colormap1 = fig.axes[0]
    colormap1.tick_params(labelsize=12)
    plt.colorbar(im, ticks=[True, False])
    plt.legend()
    plt.show()

In [None]:
difference_map(df_facies_comparison_RF_2D.T, facies_pred_RF_2D.T, extent_plot, list_of_wells=None)

In [None]:
facies_pred_RF_2D_1 = facies_pred_RF[5, :, :]
facies_pred_RF_2D_1.shape

In [None]:
df_facies_comparison_RF_2D_1 = df_facies_comparison_RF[5, :, :]
df_facies_comparison_RF_2D_1.shape

In [None]:
extent_plot = [0, 65, 0, 700]

In [None]:
difference_map(df_facies_comparison_RF_2D_1.T, facies_pred_RF_2D_1.T, extent_plot, list_of_wells=None)

# Deep Learning model

In [None]:
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense

In [None]:
from tensorflow.keras.layers import BatchNormalization, Dropout
from tensorflow.keras import optimizers

In [None]:
from tensorflow.keras.layers import InputLayer

In [None]:
facies_and_features_NN = facies_and_features[['facies', 'relai', 'seis', 'envel', 'inst_freq', 'seis_inv']]
facies_and_features_NN

In [None]:
tf.random.set_seed(123)
tf.debugging.set_log_device_placement(False)

## Split the data in train, test and validation sets

In [None]:
facies_and_features_mixed = facies_and_features_NN.sample(frac=1) # shuffle the rows
facies_and_features_mixed.head(5)

In [None]:
train_set, test_val_set = train_test_split(facies_and_features_mixed, test_size=0.4)

print(train_set.shape)
print(test_val_set.shape)

Split test_val_set on test and validation sets

In [None]:
test_set, val_set = train_test_split(test_val_set, test_size=0.5)
print(test_set.shape)
print(val_set.shape)

Drop the facies column

In [None]:
train_labels1 = train_set.pop('facies')
test_labels1 = test_set.pop('facies')
valid_labels1 = val_set.pop('facies')

In [None]:
train_labels2 = train_labels1.astype(np.uint8)
train_labels2

valid_labels2 = valid_labels1.astype(np.uint8)
test_labels2 = test_labels1.astype(np.uint8)

Encode the labeles

In [None]:
train_labels = pd.get_dummies(train_labels1, prefix='Label')
val_labels = pd.get_dummies(valid_labels1, prefix='Label')
test_labels = pd.get_dummies(test_labels1, prefix='Label')

In [None]:
train_labels.info()

The data is already standartized

## Define the NN architecture

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU, Activation
from keras import optimizers, regularizers
from keras.callbacks import LearningRateScheduler
import math



def NN_model_advanced(train_set_shape):
    model = Sequential()

    # Input Layer
    model.add(Dense(128, input_shape=(train_set_shape,), kernel_regularizer=regularizers.l2(0.01)))
    model.add(BatchNormalization())
    model.add(LeakyReLU())
    model.add(Dropout(0.5))

    # Hidden Layer 1
    model.add(Dense(256, kernel_regularizer=regularizers.l2(0.01)))
    model.add(BatchNormalization())
    model.add(LeakyReLU())
    model.add(Dropout(0.5))

    # Hidden Layer 2
    model.add(Dense(128, kernel_regularizer=regularizers.l2(0.01)))
    model.add(BatchNormalization())
    model.add(LeakyReLU())
    model.add(Dropout(0.5))

    # Hidden Layer 3
    model.add(Dense(64, kernel_regularizer=regularizers.l2(0.01)))
    model.add(BatchNormalization())
    model.add(Activation('elu'))
    model.add(Dropout(0.5))

    # Output Layer
    model.add(Dense(5, activation='softmax'))

    optimizer = optimizers.Adamax(lr=0.0)  # initial learning rate will be set by LearningRateScheduler
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])

    return model





Epochs = 500
Batch_size = 150

model = NN_model_advanced(train_set.shape[1])
print('Here is a summary of this model: ')
model.summary()

with tf.device('/CPU:0'):
    history = model.fit(
        train_set,
        train_labels,
        batch_size=Batch_size,
        epochs=Epochs,
        verbose=1,
        shuffle=True,
        steps_per_epoch = int(train_set.shape[0]/Batch_size),
        validation_data = (val_set, val_labels))



In [None]:
print('summary of the results after each epoch: ')
hist_res = pd.DataFrame(history.history)
hist_res['epoch'] = history.epoch
hist_res.tail(10)

## Inspect the accuracy on the training set

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['Train', 'Cross-Validation'], loc='lower right')
plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('epoch')
plt.legend(['Train', 'Cross-Validation'], loc='upper right')
plt.show()

In [None]:
nn_test_prob = model.predict(test_set)
nn_test_prob.shape

In [None]:
nn_test_prob

In [None]:
test_nn2 = np.array(pd.DataFrame(nn_test_prob).idxmax(axis=1))
test_nn2.shape

In [None]:
dfg = test_nn2.reshape(test_nn2.shape[0], -1)

In [None]:
dfg.shape

In [None]:
print(confusion_matrix(test_labels1, test_nn2))
print(accuracy_score(test_labels1, test_nn2))
print(classification_report(test_labels1, test_nn2))

## Predict the facies section

In [None]:
def predict_facies_3D_NN(facies, relai_std, seis_std, envel_std, inst_freq_std, seis_inv_std, model):
    r, c, b = np.shape(facies)

    map_facies = np.zeros((r, c, b))
    map_facies[:] = np.NaN
    map_facies_1 = map_facies.copy()
    df_f_comparison = map_facies.copy()

    map_facies_encoded = np.zeros((b, 5))
    map_facies_encoded[:] = np.NaN
    facies_encoded = map_facies_encoded.copy()

    # for every trace (column)
    for every_x in range(facies.shape[0]):
        for every_y in range(facies.shape[1]):

            new_df = pd.concat(
                        [pd.DataFrame(relai_std[every_x, every_y]),
                        pd.DataFrame(seis_std[every_x, every_y]),
                        pd.DataFrame(envel_std[every_x, every_y]),
                        pd.DataFrame(inst_freq_std[every_x, every_y]),
                        pd.DataFrame(seis_inv_std[every_x, every_y])]
                        , axis=1)

    ##########################################################################
            new_features = new_df.dropna(axis=0)

            #new_features = new_df
            non_empty_facies = pd.DataFrame(facies[every_x, every_y, :]).dropna(axis=0)

            facies_encoded[new_features.index, :] = model.predict(new_features)
            facies_encoded_nn = np.array(pd.DataFrame(facies_encoded).idxmax(axis=1))
            facies_encoded_nn_1 = facies_encoded_nn.reshape(facies_encoded_nn.shape[0], -1)



            # map_facies[every_x, every_y, new_features.index] = model.predict(new_features)


            # Filter
            map_facies_1[every_x, every_y, non_empty_facies.index] = facies_encoded_nn[non_empty_facies.index]
            df_f_comparison[every_x, every_y, new_features.index] = facies[every_x, every_y, new_features.index]

    return map_facies_1, df_f_comparison

In [None]:
facies_pred, df_facies_comparison = predict_facies_3D_NN(facies, relai_std, seis_std, envel_std, inst_freq_std, seis_inv_std, model)

In [None]:
plot_3D_cube(data=facies_pred, color_map='facies', number_of_facies=4, number_of_wells=7)

In [None]:
difference_map_3D(df_facies_comparison, facies_pred, number_of_wells=7)