# Machine Learning to Predict Brittleness from other Geophysical Logs

## Data: 4 wells from the Appalachian Basin

In [None]:
import os
import pandas as pd

import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors

from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingRegressor as gbR, GradientBoostingClassifier as gbC, IsolationForest
from sklearn.svm import SVC, SVR
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.feature_selection import mutual_info_regression

pd.set_option('display.max_columns', None)   #to display all the column information
pd.options.display.max_seq_items = 2000

### Load data

In [None]:
file_directory = r"../Thesis work/Thesis work/Well_Data_CSV_Merged"    #for macbook google drive

file_name1 = "Poseidon.csv"
file_name2 = "Boggess.csv"
file_name3 = "Mip3h.csv"
file_name4 = "Whipkey.csv"

file_name = [file_name1, file_name2, file_name3, file_name4]
data = []

for i in file_name:
    file_path = os.path.join(file_directory,i)
    df = pd.read_csv(file_path)
    data.append(df)

In [None]:
data_poseidon = data[0]
data_boggess = data[1]
data_mip3h = data[2]
data_whipkey = data[3]

In [None]:
# ## Marcellus Shale interval
# data_poseidon = data_poseidon.loc[(data_poseidon['DEPT'] > 7880) & (data_poseidon['DEPT'] < 8040)]
# data_boggess = data_boggess.loc[(data_boggess['DEPT'] > 7880) & (data_boggess['DEPT'] < 7970)]
# data_mip3h = data_mip3h.loc[(data_mip3h['DEPT'] > 7450) & (data_mip3h['DEPT'] < 7560)]
# data_whipkey = data_whipkey.loc[(data_whipkey['DEPT'] > 7730) & (data_whipkey['DEPT'] < 7840)]

In [None]:
print("The Poseidon data has {} rows".format(data_poseidon.shape[0]))
print("The Boggess data has {} rows".format(data_boggess.shape[0]))
print("The Mip3h data has {} rows".format(data_mip3h.shape[0]))
print("The Whipkey data has {} rows".format(data_whipkey.shape[0]))

# Input and Output of the Model

### Data for Regression task

In [None]:
features =  ['DEPT', 'GR', 'NPHI','RHOZ', 'HCAL', 'DTCO','PEFZ','Brittleness_new']   #list of the features names to select
# features =  ['DEPT', 'GR','RHOZ', 'HCAL', 'NPHI','DTCO', 'Brittleness_new']   #list of the features names to select
target = 'Brittleness_new'   #name of the output feature

In [None]:
data = pd.concat([data_whipkey,
                  data_boggess,
                  data_poseidon], ignore_index=True)
data = data.loc[: ,features]

In [None]:
fig, ax = plt.subplots(1, 2, figsize = (15,6))

m = ax[0].scatter(data_poseidon.PR_DYN, data_poseidon.YME_DYN, c = data_poseidon.Brittleness)
ax[0].set_xlabel("Poisson's ratio", fontsize =15)
ax[0].set_ylabel("Young's modulus", fontsize =15)
ax[0].axhline(y=6, color='r', linestyle='--')
ax[0].axvline(x=0.2, color='r', linestyle='--')
ax[0].text(0.23, 0.6, 'Brittle Region',fontsize=15, horizontalalignment='center', verticalalignment='center', transform=ax[0].transAxes, c='r')
ax[0].text(0.75, 0.08, 'Ductile Region',fontsize=15, horizontalalignment='center', verticalalignment='center', transform=ax[0].transAxes, c='r')

l = ax[1].scatter(data_poseidon.PR_DYN, data_poseidon.YME_DYN, c = data_poseidon.Brittleness_new)
ax[1].set_xlabel("Poisson's ratio", fontsize =15)
ax[1].set_ylabel("Young's modulus", fontsize =15)
ax[1].axhline(y=6, color='r', linestyle='--')
ax[1].axvline(x=0.2, color='r', linestyle='--')
ax[1].text(0.23, 0.6, 'Brittle Region',fontsize=15, horizontalalignment='center', verticalalignment='center', transform=ax[1].transAxes, c='r')
ax[1].text(0.75, 0.08, 'Ductile Region',fontsize=15, horizontalalignment='center', verticalalignment='center', transform=ax[1].transAxes, c='r')

fig.colorbar(l, ax = ax[1])
fig.colorbar(m, ax = ax[0])

# fig.savefig(r'./Images/{}.png'.format('YME-PR plot'), dpi=300)

In [None]:
data[data < 0] = np.nan  #remove negative values
data.dropna(inplace = True)

In [None]:
data.shape

In [None]:
data.describe()

In [None]:
#add correlation plot
data.corr(method = 'spearman')

In [None]:
def StatRelat(data, target):
    #Mutual information and Pearson's corelation for measuring the dependency between the variables.
    """
    function to estimate the Mutual information and Pearson's corelation 
    for measuring the dependency between the variables.
    
    Parameters
    ----------
    data : DataFrame
        The data 
    target: Str
        The column name of the target feature
        
    Returns
    -------
    A histogram of mutual information and heatmap of correlation between features
    """
    df2 = data.copy().dropna()
    X = df2.drop(['DEPT',target], axis=1)._get_numeric_data()   # separate DataFrames for predictor and response features
    y = df2.loc[:,[target]]._get_numeric_data()

    mi = mutual_info_regression(X,np.ravel(y), random_state=20) # calculate mutual information
    mi /= np.max(mi)                          # calculate relative mutual information

    indices = np.argsort(mi)[::-1]            # find indicies for descending order

    print("Feature ranking:")                 # write out the feature importances
    for f in range(X.shape[1]):
        print("%d. feature %s = %f" % (f + 1, X.columns[indices][f], mi[indices[f]]))

    fig, ax = plt.subplots(nrows=1,ncols=2,figsize=(15, 7))
#     fig.subplots_adjust(left=0.0, bottom=0.0, right=1., top=1., wspace=0.2, hspace=0.2)
    
    ax[0].bar(range(X.shape[1]), mi[indices],color="g", align="center")
    ax[0].set_title("Mutual Information")
    ax[0].set_xticks(range(X.shape[1]))
    ax[0].set_xticklabels(X.columns[indices],rotation=90)
    ax[0].set_xlim([-1, X.shape[1]])
    
    cmap = sns.diverging_palette(250, 10, as_cmap=True)
    mask = np.zeros_like(df2.drop(['DEPT'], axis=1).corr())
    mask[np.triu_indices_from(mask)] = True
    with sns.axes_style("white"):
        sns.heatmap(df2.drop(['DEPT'], axis=1).corr(), mask=mask,cmap=cmap, vmax=.3, ax=ax[1], square=True, annot = True)
        ax[1].set_yticklabels(ax[1].get_yticklabels(), rotation=45)
    
    
    fig.savefig(r'./Images/{}.png'.format('feature_selection'), dpi=300)

In [None]:
StatRelat(data, target)

In [None]:
data_summary = data.drop(['DEPT'], axis=1).describe().T.round(2)
# data_summary.to_excel(r'./Images/{}.xlsx'.format('data_summary_before_stand'))

In [None]:
data_summary

In [None]:
#range
data_summary['max'] - data_summary['min']

In [None]:
#standard deviation
data.std()

In [None]:
scaler = MinMaxScaler()
data_norm = pd.DataFrame(scaler.fit_transform(data.drop(['DEPT'], axis=1)), columns = data.drop(['DEPT'], axis=1).columns)
data_norm_summary = data_norm.describe().T.round(2)
data_norm_summary
# data_norm_summary.to_excel(r'./Images/{}.xlsx'.format('data_summary_after_minmax'))

In [None]:
scaler = StandardScaler()
data_norm = pd.DataFrame(scaler.fit_transform(data.drop(['DEPT'], axis=1)), columns = data.drop(['DEPT'], axis=1).columns)
data_norm_summary = data_norm.describe().T.round(2)
data_norm_summary
# data_norm_summary.to_excel(r'./Images/{}.xlsx'.format('data_summary_after_standard'))

In [None]:
X = data.drop(['DEPT','RHOZ',target], axis=1)
y = data.loc[:,[target]]

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state = 1)

In [None]:
X_train.shape

In [None]:
def box_plot(X_train, save_file_name):
    fig, ax = plt.subplots(1,len(X_train.columns), figsize = (15,8))


    for i, feature in enumerate(X_train.columns):
        ax[i].boxplot(X_train[feature])
        ax[i].set_ylabel(feature, fontsize = 20)
        right_side = ax[i].spines["right"]
        top_side = ax[i].spines["top"]
        bottom_side = ax[i].spines["bottom"]



        right_side.set_visible(False)
        top_side.set_visible(False)
        bottom_side.set_visible(False)
        ax[i].axes.get_xaxis().set_visible(False)
#     fig.savefig(r'./Images/{}.png'.format(save_file_name), dpi=300)

In [None]:
box_plot(X_train, "before_outlier_removal")

In [None]:
# identify outliers in the training dataset
iso = IsolationForest(contamination=0.1)
yhat = iso.fit_predict(X_train)
# select all rows that are not outliers
mask = yhat != -1
X_train, y_train = X_train[mask], y_train[mask]
# summarize the shape of the updated training dataset
print(X_train.shape, y_train.shape)

In [None]:
box_plot(X_train, "after_outlier_removal")

In [None]:
df = data_mip3h.loc[: ,features].dropna()
X_blind = df.drop(['DEPT','RHOZ',target], axis=1)
y_blind = df.loc[:,[target]]

# X_blind = data_boggess.loc[: ,features].drop([target], axis=1)
# y_blind = data_boggess.loc[:,[target]]


In [None]:
X_test.shape

In [None]:
X_blind.shape

## Model Building

In [None]:
def modelfit(X_train, X_test, X_blind, y_train, y_test, y_blind, algorithm, hyper_parameters, scaler, classification,printFeatureImportance=True, cv_folds=3):
    """
    function to tune the gradient boosting model and return the optimum
    
    Parameters
    ----------
    X_train : DataFrame
        The input features for the training set
    X_test : DataFrame
        The input features for the testing set
    X_blind : DataFrame
        The input features for the blind set
    y_train : DataFrame 
        The output feature for the training set
    y_test : DataFrame 
        The output feature for the testing set
    y_blind : DataFrame 
        The output feature for the blind set
    algorithm : {'neural','svm','gradientboosting'}
        The Machine Learning model 
    hyper_parameters : dict
        A dictionary of the hyperparameters of the models that will be tuned
    scaler : {'standard','minmax'}
        Scaling technique to employ.
    classification : bool 
        Flag to specify the modeling technique. True for classification and False for regression
    printFeatureImportance : bool
        Flag to specify if to display the feature importance histogram.
    cv_folds : int
        Number of cross-validation folds. default is 3.
    
    Returns
    -------
    model : an object of the trained gradient boosting which can be deployed or saved
    """   
    #step to assign the selected standardaziation 
    if scaler == 'standard':
        scaler = StandardScaler()
    elif scaler == 'minmax':
        scaler =MinMaxScaler()
    else:
        print("invalid scaler: use 'standard' or 'minmax'")
        
    
    #step to assign the selected machine learning algorithm
    if algorithm == 'svm':
        if classification is True:
            algo = SVC(random_state=83)
        else:
            algo = SVR()
        
    elif algorithm == 'neural':
        if classification is True:
            algo = MLPClassifier(random_state=677)
        else:
            algo = MLPRegressor(random_state=134)
    
    elif algorithm == 'gradientboosting':
        if classification is True:
            algo = gbC(random_state=10)
        else:
            algo = gbR(random_state=824)
    else:
        print("invalid scaler: use 'svm' or 'neural' or 'gradientboosting'")
    
     
        
    if classification is True:
        pipe = Pipeline(steps=[('scaler', scaler), ('model', algo)])
        model = GridSearchCV(estimator = pipe,
                             param_grid = hyper_parameters,
                             scoring='accuracy',n_jobs=-1, cv=cv_folds, verbose = 1)
        
        #Fit the model on the data
        model.fit(X_train.values, y_train.values.ravel())

        #Predict training set:
        y_train_pred = model.predict(X_train)

        #Predict testing set:
        y_test_pred = model.predict(X_test)
        
        #Predict blind set
        y_blind_pred = model.predict(X_blind)

        #Print model report:
        print("Model Report")
        print("-------------------------------")
        print("The training accuracy : {0:.4g}".format(metrics.accuracy_score(y_train.values, y_train_pred)))
        print("The testing accuracy is : {0:.4g}".format(metrics.accuracy_score(y_test.values,y_test_pred)))
        print("The blind well accuracy is : {0:.4g}".format(metrics.accuracy_score(y_blind.values,y_blind_pred)))
        print("CV best score : {0:.4g}".format(model.best_score_))
        print("CV best parameter combinations : {}".format(model.best_params_))
        
        if algorithm == 'gradientboosting':
            #Print Feature Importance:
            if printFeatureImportance:
                feat_imp = pd.Series(model.best_estimator_.named_steps.model.feature_importances_, X_train.columns).sort_values(ascending=False)
                feat_imp.plot(kind='barh', title='Feature Importances')
                plt.xlabel('Feature Importance Score')
           
    else:
        pipe = Pipeline(steps=[('scaler', scaler), ('model', algo)])
        model = GridSearchCV(estimator = pipe,
                             param_grid = hyper_parameters,
                             scoring='r2',n_jobs=-1,
                             cv=cv_folds, verbose = 1)
        
        #Fit the model on the data
        model.fit(X_train.values, y_train.values.ravel())

        #Predict training set:
        y_train_pred = model.predict(X_train)

        #Predict testing set:
        y_test_pred = model.predict(X_test)
                        
        #Predict blind set
        y_blind_pred = model.predict(X_blind)

        #Print model report:
        print("Model Report")
        print("-------------------------------")
        print("The training R2 score : {0:.4g}".format(metrics.r2_score(y_train.values, y_train_pred)))
        print("The testing R2 score is : {0:.4g}".format(metrics.r2_score(y_test.values,y_test_pred)))
        print("The blind well R2 score is : {0:.4g}".format(metrics.r2_score(y_blind.values,y_blind_pred)))
        print("CV best score : {0:.4g}".format(model.best_score_))
        print("CV best parameter combinations : {}".format(model.best_params_))
        
        if algorithm == 'gradientboosting':
            #Print Feature Importance:
            if printFeatureImportance:
                feat_imp = pd.Series(model.best_estimator_.named_steps.model.feature_importances_, X_train.columns).sort_values(ascending=False)
                feat_imp.plot(kind='barh', title='Feature Importances')
                plt.xlabel('Feature Importance Score')

    return model.best_estimator_


In [None]:
"model__min_samples_split" : [2,3,4,5],
                     "model__min_samples_leaf": [1,2,3,4,5],
                     "model__max_depth"        : range(4,8,1)
                    "model__n_estimators"    : range(100,301,50)

## Training the Gradient Boosting

In [None]:
#use the documentation of SVR() to understand the parameters
#put new parameters in the grid by using "model__" before the parameter name as below
hyper_parameters =  {
                    }

In [None]:
model_gb = modelfit(X_train, X_test, X_blind, y_train, y_test, y_blind, algorithm='gradientboosting', 
             hyper_parameters=hyper_parameters, scaler='minmax', 
             classification=False,printFeatureImportance=True, cv_folds=3)

In [None]:
#sample size vs score 

m = Pipeline(steps=[('scaler', StandardScaler()), ('model', gbR(max_depth= 7, min_samples_leaf= 1, min_samples_split= 3))]) 

size = np.arange(500,X_train.shape[0], 500)

train_scores = []
test_scores = []
blind_scores = []

for i in size:
    m.fit(X_train.iloc[:i,:].values, y_train.iloc[:i,:].values.ravel())
    train_scores.append(metrics.r2_score(y_train.iloc[:i,:].values, m.predict(X_train.iloc[:i,:].values)))
    test_scores.append(metrics.r2_score(y_test.values, m.predict(X_test)))
    blind_scores.append(metrics.r2_score(y_blind.values, m.predict(X_blind)))


In [None]:
plt.plot(size, train_scores, label = 'train')
plt.plot(size, test_scores, label = 'test')
plt.plot(size, blind_scores, label = 'blind')

plt.legend()

In [None]:
feat_imp = pd.Series(model_gb.named_steps.model.feature_importances_, X_train.columns).sort_values(ascending=False)
feat_imp.plot(kind='barh', title='Feature Importances')
plt.xlabel('Feature Importance Score')
# plt.savefig(r'./Images/{}.png'.format('gb_feature_importance'), dpi=300)

In [None]:
# model2 = modelfit(X_train2, X_test2, X_blind2, y_train2, y_test2, y_blind2, algorithm='gradientboosting', 
#              hyper_parameters=hyper_parameters, scaler='standard', 
#              classification=True,printFeatureImportance=True, cv_folds=3)

## Training the SVM

In [None]:
'model__kernel': ['linear', 'poly','rbf','sigmoid']
'model__gamma': ['scale', 'auto']
'model__C': [1,10,100]

In [None]:
#use the documentation of SVR() to understand the parameters
#put new parameters in the grid by using "model__" before the parameter name as below
hyper_parameters = {
                    'model__epsilon': np.arange(0.01,0.1,0.01)}

In [None]:
model_svm = modelfit(X_train, X_test, X_blind, y_train, y_test, y_blind, algorithm='svm', 
         hyper_parameters=hyper_parameters, scaler='standard', 
         classification=False,printFeatureImportance=True, cv_folds=3)

In [None]:
model_svm.named_steps.model.support_vectors_.shape

In [None]:
#sample size vs score 

m = Pipeline(steps=[('scaler', StandardScaler()), ('model', SVR(epsilon=0.02))])

size = np.arange(500,X_train.shape[0], 500)

train_scores = []
test_scores = []
blind_scores = []

for i in size:
    m.fit(X_train.iloc[:i,:].values, y_train.iloc[:i,:].values.ravel())
    train_scores.append(metrics.r2_score(y_train.iloc[:i,:].values, m.predict(X_train.iloc[:i,:].values)))
    test_scores.append(metrics.r2_score(y_test.values, m.predict(X_test)))
    blind_scores.append(metrics.r2_score(y_blind.values, m.predict(X_blind)))

In [None]:
plt.plot(size, train_scores, label = 'train')
plt.plot(size, test_scores, label = 'test')
plt.plot(size, blind_scores, label = 'blind')

plt.legend()

## Training the Neural Network

In [None]:
#use the documentation of MLPClassifier() to understand the parameters
#put new parameters in the grid by using "model__" before the parameter name as below
hyper_parameters =  {'model__hidden_layer_sizes': [(10,10,),(19,19,),(20,),(20,20,)],
                     'model__tol': [0.0001,0.00001,0.001],
                    'model__solver': ['lbfgs'],
                    'model__max_iter': [1000]}

In [None]:
model_nn = modelfit(X_train, X_test, X_blind, y_train, y_test, y_blind, algorithm='neural', 
         hyper_parameters=hyper_parameters, scaler='minmax', 
         classification=False,printFeatureImportance=True, cv_folds=3)

In [None]:
#sample size vs score 

m = Pipeline(steps=[('scaler', StandardScaler()), ('model', MLPRegressor(hidden_layer_sizes= (19, 19), max_iter= 1000, solver= 'lbfgs', tol= 1e-05))])

size = np.arange(500,X_train.shape[0], 500)

train_scores = []
test_scores = []
blind_scores = []

for i in size:
    m.fit(X_train.iloc[:i,:].values, y_train.iloc[:i,:].values.ravel())
    train_scores.append(metrics.r2_score(y_train.iloc[:i,:].values, m.predict(X_train.iloc[:i,:].values)))
    test_scores.append(metrics.r2_score(y_test.values, m.predict(X_test)))
    blind_scores.append(metrics.r2_score(y_blind.values, m.predict(X_blind)))

In [None]:
plt.plot(size, train_scores, label = 'train')
plt.plot(size, test_scores, label = 'test')
plt.plot(size, blind_scores, label = 'blind')

plt.legend()

## Visualizing the Result

In [None]:
#create folder to save images
if os.path.exists(r'./Images'):
    pass
else:
    os.mkdir(r'./Images')

In [None]:
def plot_logs2(data, well_name, model_gb, model_svm, model_nn, formation):
    """
    function to plot the log data and the predictions
    
    Parameters
    ----------
    data : DataFrame
        The well data to be plotted
    well_name : str
        The name of the well being plotted
    model:
        The trained model used for the prediction
    formation : dict
        The formation tops ( names as keys and depth interval as the item in a list)
    
    Returns
    -------
    A plot of the well logs
    """   
    #assigning the logs to variable names to make the code cleaner and easier to read
    MD = data.DEPT
    GR = data.GR
    RHOB = data.RHOZ
    NPHI = data.NPHI
    DT= data.DTCO
    PEFZ = data.PEFZ
    BA = data.Brittleness_new

    #creating the figure
    fig, ax = plt.subplots(nrows=1, ncols=6,figsize=(15,10), sharey=True, gridspec_kw={'width_ratios': [3,3,3,3,3,3]})
        
#     fig.suptitle("O  {}".format(well_name), fontsize=25)
    fig.subplots_adjust(top=0.85, wspace=0.2)

#     ax[0].set_ylim(formation['Upper Marcellus'][0],formation['Lower Marcellus'][1])  #display only a depth range
    ax[0].set_ylim(7600, formation['Lower Marcellus'][1])  #display only a depth range
    ax[0].invert_yaxis()
    ax[0].set_ylabel('MD (M)',fontsize=20)
    ax[0].yaxis.grid(True)
    ax[0].get_xaxis().set_visible(False) #removing the x-axis label at the bottom of the fig

    ##Track 1
    ##Gamma_ray and PEF 
    ax_GR = ax[0].twiny()  #share the depth axis
    ax_GR.set_xlim(0,270)
    ax_GR.plot(GR,MD, color='black')
    ax_GR.set_xlabel('GR (API)',color='black')
    ax_GR.tick_params('x',colors='black')  ##change the color of the x-axis tick label
    ax[0].get_xaxis().set_visible(False)
    ax[0].yaxis.grid(True)
    ax_GR.grid(True,alpha=0.5)

    #variable colorfill
    GR_range = abs(GR.min() - GR.max())
    cmap = plt.get_cmap('nipy_spectral')   #color map
    color_index = np.arange(GR.min(), GR.max(), GR_range / 20)

    #loop through each value in the color_index
    for index in sorted(color_index):
        index_value = (index - GR.min())/GR_range
        color = cmap(index_value) #obtain colour for color index value
        ax_GR.fill_betweenx(MD, 0 , GR, where = GR >= index,  color = color)


    ax_PEFZ = ax[0].twiny()
    ax_PEFZ.plot(PEFZ,MD, color='red')
    ax_PEFZ.set_xlabel('PEFZ',color='red')
    ax_PEFZ.tick_params('x',colors='red')  ##change the color of the x-axis tick label
    ax_PEFZ.spines['top'].set_position(('outward',40)) ##move the x-axis up
    ax_PEFZ.spines["top"].set_edgecolor("red")

    #Track 2
    ##NPHI and RHOB
    ax_NPHI = ax[1].twiny()
    ax_NPHI.set_xlim(-0.1,0.4)
    ax_NPHI.invert_xaxis()
    ax_NPHI.plot(NPHI, MD, label='NPHI[%]', color='green') 
    ax_NPHI.spines['top'].set_position(('outward',0))
    ax_NPHI.set_xlabel('NPHI[%]', color='green')    
    ax_NPHI.tick_params(axis='x', colors='green')
    ax_NPHI.spines["top"].set_edgecolor("green")

    ax_RHOB = ax[1].twiny()
    ax_RHOB.set_xlim(1.95,2.95)
    ax_RHOB.invert_xaxis()
    ax_RHOB.plot(RHOB, MD,label='RHOB[g/cc]', color='red') 
    ax_RHOB.spines['top'].set_position(('outward',40))
    ax_RHOB.set_xlabel('RHOB[g/cc]',color='red')
    ax_RHOB.tick_params(axis='x', colors='red')
    ax_RHOB.spines["top"].set_edgecolor('red')

    ax[1].get_xaxis().set_visible(False)
    ax[1].yaxis.grid(True)
    ax_RHOB.grid(True,alpha=0.5)
    ax[1].axis('off')

    # #color fill
    # x = np.array(ax_RHOB.get_xlim())
    # z = np.array(ax_NPHI.get_xlim())

    # nz=((NPHI-np.max(z))/(np.min(z)-np.max(z)))*(np.max(x)-np.min(x))+np.min(x)

    # ax_RHOB.fill_betweenx(MD, RHOB, nz, where=RHOB>=nz, interpolate=True, color='green')
    # ax_RHOB.fill_betweenx(MD, RHOB, nz, where=RHOB<=nz, interpolate=True, color='yellow')
    

    #Track 3
    ##Sonic 
    ax_DT = ax[2].twiny()
    ax_DT.grid(True)
    ax_DT.set_xlim(100,50)
    ax_DT.spines['top'].set_position(('outward',0))
    ax_DT.plot(DT, MD, label='DT[us/ft]', color='blue')
    ax_DT.set_xlabel('DT[us/ft]', color='blue')    
    ax_DT.tick_params(axis='x', colors='blue')
    ax_DT.spines["top"].set_edgecolor("blue")

    ax[2].get_xaxis().set_visible(False)
    ax[2].yaxis.grid(True)
    ax_DT.grid(True,alpha=0.5)
    ax[2].axis('off')

    #Track 4
    #gb model
    ax_BA1 = ax[3].twiny()
    ax_BA1.grid(True)
    ax_BA1.set_xlim(0,1)
    ax_BA1.spines['top'].set_position(('outward',0))
    ax_BA1.plot(BA, MD, label='BRITTLENESS ESTIMATE', color='black')
    ax_BA1.set_xlabel('BRITTLENESS ESTIMATE', color='black')    
    ax_BA1.tick_params(axis='x', colors='black')

    ##Ploting the predicted data
    ###work on this for generalization
    ax_pred = ax[3].twiny()
    df = data.loc[: , features].dropna()
    pred = model_gb.predict(df.drop(['DEPT','RHOZ',target], axis=1))
    df['Brittleness_predict'] = pred  
    ax_BA1.plot(df.Brittleness_predict, df.DEPT, color='red', linestyle='--')

    ax_pred.spines['top'].set_position(('outward',40))
    ax_pred.set_xlabel('BRITTLENESS (GB)',color='red')
    ax_pred.tick_params(axis='x', colors='red')
    ax_pred.spines["top"].set_edgecolor('red')


    ax[3].get_xaxis().set_visible(False)
    ax[3].yaxis.grid(True)
    ax[3].axis('off')
    ax_BA1.grid(True,alpha=0.5)

    #Track 4
    ##Brittleness
    ## nn model
    ax_BA2 = ax[4].twiny()
    ax_BA2.grid(True)
    ax_BA2.set_xlim(0,1)
    ax_BA2.spines['top'].set_position(('outward',0))
    ax_BA2.plot(BA, MD, label='BRITTLENESS ESTIMATE', color='black')
    ax_BA2.set_xlabel('BRITTLENESS ESTIMATE', color='black')    
    ax_BA2.tick_params(axis='x', colors='black')

    ##Ploting the predicted data
    ###work on this for generalization
    ax_pred = ax[4].twiny()
    df = data.loc[: , features].dropna()
    pred = model_nn.predict(df.drop(['DEPT','RHOZ', target], axis=1))
    df['Brittleness_predict'] = pred  
    ax_BA2.plot(df.Brittleness_predict, df.DEPT, color='blue', linestyle='--')

    ax_pred.spines['top'].set_position(('outward',40))
    ax_pred.set_xlabel('BRITTLENESS (NN)',color='blue')
    ax_pred.tick_params(axis='x', colors='blue')
    ax_pred.spines["top"].set_edgecolor('blue')


    ax[4].get_xaxis().set_visible(False)
    ax[4].yaxis.grid(True)
    ax[4].axis('off')
    ax_BA2.grid(True,alpha=0.5)

    
    #Track 4
    ##Brittleness
    ##svm model
    ax_BA3 = ax[5].twiny()
    ax_BA3.grid(True)
    ax_BA3.set_xlim(0,1)
    ax_BA3.spines['top'].set_position(('outward',0))
    ax_BA3.plot(BA, MD, label='BRITTLENESS ESTIMATE', color='black')
    ax_BA3.set_xlabel('BRITTLENESS ESTIMATE', color='black')    
    ax_BA3.tick_params(axis='x', colors='black')

    ##Ploting the predicted data
    ###work on this for generalization
    ax_pred = ax[5].twiny()
    df = data.loc[: , features].dropna()
    pred = model_svm.predict(df.drop(['DEPT','RHOZ',target], axis=1))
    df['Brittleness_predict'] = pred  
    ax_BA3.plot(df.Brittleness_predict, df.DEPT, color='purple', linestyle='--')

    ax_pred.spines['top'].set_position(('outward',40))
    ax_pred.set_xlabel('BRITTLENESS (SVM)',color='purple')
    ax_pred.tick_params(axis='x', colors='purple')
    ax_pred.spines["top"].set_edgecolor('purple')


    ax[5].get_xaxis().set_visible(False)
    ax[5].yaxis.grid(True)
    ax[5].axis('off')
    ax_BA3.grid(True,alpha=0.5)

    
#     #formation top
#     ax_top = ax[-1]
#     ax[-1].axis('off')

#     formation_midpoints = []
#     for key, value in formation.items():
#         #Calculate mid point of the formation
#         formation_midpoints.append(value[0] + (value[1]-value[0])/2)

#     zone_colours = ["red", "blue", "green"]

#     for ax in [ax_GR, ax_NPHI, ax_BA1, ax_BA2, ax_BA3, ax_top]:
#         # loop through the formations dictionary and zone colours
#         for depth, colour in zip(formation.values(), zone_colours):
#             # use the depths and colours to shade across the subplots
#             ax.axhspan(depth[0], depth[1], color=colour, alpha=0.1)

#     for label, formation_mid in zip(formation.keys(), 
#                                         formation_midpoints):
#         ax_top.text(0.5, formation_mid, label, rotation=90,
#                     verticalalignment='center', horizontalalignment='center', fontweight='bold',
#                     fontsize='large')
#     fig.savefig(r'./Images/{}.png'.format(well_name), dpi=600)

In [None]:
X_train.columns

In [None]:
# formation = {'Tully': [7195,7310],
#              'Mahantango': [7310,7455],
#             'Marcellus': [7455,7560]}

formation = {'Upper Marcellus': [7453,7476],
             'Middle Marcellus': [7476,7517],
            'Lower Marcellus': [7517,7555]}

In [None]:
plot_logs2(data_mip3h, "MIP3H2", model_gb, model_svm, model_nn, formation)

In [None]:
# formation2 = {'Tully': [7604,7670],
#              'Mahantango': [7670,7882],
#             'Marcellus': [7882,8052]}
formation_pos = {'Upper Marcellus': [7883,7961],
             'Middle Marcellus': [7961,8015],
            'Lower Marcellus': [8015,8052]}

In [None]:
plot_logs2(data_poseidon, "Poseidon", model_gb, model_svm, model_nn, formation_pos)

In [None]:
formation_bog = {'Upper Marcellus': [7877,7905],
             'Middle Marcellus': [7905, 7951],
            'Lower Marcellus': [7951,7974]}
plot_logs2(data_boggess, "Boggess", model_gb, model_svm, model_nn, formation_bog)

In [None]:
formation_whip = {'Upper Marcellus': [7736, 7785],
             'Middle Marcellus': [7785, 7811],
            'Lower Marcellus': [7811, 7835]}
plot_logs2(data_whipkey, "Whipkey", model_gb, model_svm, model_nn, formation_whip)