In [1]:
import openpyxl
from openpyxl import load_workbook
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib notebook
import warnings
warnings.filterwarnings('ignore')

# Code for section 4.4

In [2]:
df = pd.read_excel("dataset.xlsx", sheet_name=None, header=0)

In [3]:
all_df = {}
for i, d in enumerate(df.items()):
    name, data = d
    all_df[name] = data
all_data = pd.concat(df.values())

In [4]:
all_data = all_data[[ "DEPTH", "DT", "GR", "LLD", "NPHI", "RHOB", "Log_Facies", "Fluid"]]
all_data = all_data.dropna(axis=0, how='any')
data = all_data

In [5]:
data = data[data['DEPTH'] == data["DEPTH"]]   # drop NaN value row

In [6]:
data['Log_Facies'] = data['Log_Facies'].apply(lambda x: int(x))

In [7]:
comb = [(1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3)]
def label_facies(row):
    f = row['Fluid']
    lf = row['Log_Facies']
    return comb.index((f, lf))
data['MixLabel'] = data.apply(label_facies, axis=1)

In [8]:
data.shape

(4695, 9)

In [9]:
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler

In [10]:
X = data[["DEPTH", "DT", "GR", "LLD", "NPHI", "RHOB",]]
ss = StandardScaler()
X = ss.fit_transform(X)

In [11]:
Y1 = data['Log_Facies'].values
Y2 = data['Fluid'].values
Y3 = data['MixLabel'].values
Y_Label = ['Log_Facies', 'Fluid', 'MixLabel']

In [12]:
num_of_shuffler = 10
shuffler1 = StratifiedShuffleSplit(n_splits=num_of_shuffler, test_size=0.1).split(X, Y1)
shuffler2 = StratifiedShuffleSplit(n_splits=num_of_shuffler, test_size=0.1).split(X, Y2)
shuffler3 = StratifiedShuffleSplit(n_splits=num_of_shuffler, test_size=0.1).split(X, Y3)

In [13]:
s1 = [x for x in shuffler1]
s2 = [x for x in shuffler2]
s3 = [x for x in shuffler3]

In [14]:
indices1 = [(train_idx, validation_idx) for train_idx, validation_idx in s1]
indices2 = [(train_idx, validation_idx) for train_idx, validation_idx in s2]
indices3 = [(train_idx, validation_idx) for train_idx, validation_idx in s3]

In [15]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [16]:
CLF = LogisticRegression


In [18]:
count = []
shape = 1
ALL_Y = [Y1, Y2, Y3]
all_indices = [indices1, indices2, indices3]
seq = 0
for p in [0, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95]:
    for (j, indices) in enumerate([indices1, indices2, indices3]):
        all_total, all_tested, all_correct = 0, 0, 0
        for i in range(num_of_shuffler):
            Y = ALL_Y[j]
            X_train = X[indices[i][0]]
            Y_train = Y[indices[i][0]]
            X_test = X[indices[i][1]]
            Y_test = Y[indices[i][1]]
            shape = Y_test.shape[0]
            model = CLF()
            model.fit(X_train, Y_train)
            pred = model.predict(X_test)
            pred_p = model.predict_proba(X_test)

            y_pred_filter = pred[pred_p.max(axis=1)>p]
            y_test_filter = Y_test[pred_p.max(axis=1)>p]
            
            total = pred.shape[0]
            tested = y_pred_filter.shape[0]
            correct = y_pred_filter[y_pred_filter==y_test_filter].shape[0]
            all_total += total
            all_correct += correct
            all_tested += tested
        all_total /= 10
        all_correct /= 10
        all_tested /= 10
        seq += 1
        print("seq: {} label: {} prob: {} total: {} tested: {} tested percent: {:.2f} correct: {} correct percent {:.3f}".format(seq, Y_Label[j], p, all_total, all_tested, all_tested/all_total, all_correct, all_correct/all_tested ))
#             print(correct, tested, total, i, j, p)
    #         print(correct, total, correct/total)
#         ave = sum(count)/len(count)
#         print("average:", ave, ave/shape)

seq: 1 label: Log_Facies prob: 0 total: 470.0 tested: 470.0 tested percent: 1.00 correct: 378.5 correct percent 0.805
seq: 2 label: Fluid prob: 0 total: 470.0 tested: 470.0 tested percent: 1.00 correct: 445.6 correct percent 0.948
seq: 3 label: MixLabel prob: 0 total: 470.0 tested: 470.0 tested percent: 1.00 correct: 370.9 correct percent 0.789
seq: 4 label: Log_Facies prob: 0.5 total: 470.0 tested: 461.6 tested percent: 0.98 correct: 375.5 correct percent 0.813
seq: 5 label: Fluid prob: 0.5 total: 470.0 tested: 470.0 tested percent: 1.00 correct: 445.6 correct percent 0.948
seq: 6 label: MixLabel prob: 0.5 total: 470.0 tested: 407.6 tested percent: 0.87 correct: 339.6 correct percent 0.833
seq: 7 label: Log_Facies prob: 0.6 total: 470.0 tested: 407.3 tested percent: 0.87 correct: 346.4 correct percent 0.850
seq: 8 label: Fluid prob: 0.6 total: 470.0 tested: 455.7 tested percent: 0.97 correct: 439.7 correct percent 0.965
seq: 9 label: MixLabel prob: 0.6 total: 470.0 tested: 327.9 teste