In [1]:
###### Importing all used packages
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from mpl_toolkits.axes_grid1 import make_axes_locatable
import seaborn as sns

from pandas import set_option
# set_option("display.max_rows", 10)
pd.options.mode.chained_assignment = None

###### Import packages needed for the make_vars functions
import Feature_Engineering as FE

##### import stuff from scikit learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold, cross_val_score,LeavePGroupsOut, LeaveOneGroupOut, cross_val_predict
from sklearn.metrics import confusion_matrix, make_scorer, f1_score, accuracy_score, recall_score, precision_score

filename = '../facies_vectors.csv'
training_data = pd.read_csv(filename)
training_data.head()

Unnamed: 0,Facies,Formation,Well Name,Depth,GR,ILD_log10,DeltaPHI,PHIND,PE,NM_M,RELPOS
0,3,A1 SH,SHRIMPLIN,2793.0,77.45,0.664,9.9,11.915,4.6,1,1.0
1,3,A1 SH,SHRIMPLIN,2793.5,78.26,0.661,14.2,12.565,4.1,1,0.979
2,3,A1 SH,SHRIMPLIN,2794.0,79.05,0.658,14.8,13.05,3.6,1,0.957
3,3,A1 SH,SHRIMPLIN,2794.5,86.1,0.655,13.9,13.115,3.5,1,0.936
4,3,A1 SH,SHRIMPLIN,2795.0,74.58,0.647,13.5,13.3,3.4,1,0.915


In [3]:
##### cD From wavelet db1
dwt_db1_cD_df = FE.make_dwt_vars_cD(wells_df=training_data, logs=['GR', 'ILD_log10', 'DeltaPHI', 'PE', 'PHIND'],
                       levels=[1, 2, 3, 4], wavelet='db1')

##### cA From wavelet db1
dwt_db1_cA_df = FE.make_dwt_vars_cA(wells_df=training_data, logs=['GR', 'ILD_log10', 'DeltaPHI', 'PE', 'PHIND'],
                       levels=[1, 2, 3, 4], wavelet='db1')

##### cD From wavelet db3
dwt_db3_cD_df = FE.make_dwt_vars_cD(wells_df=training_data, logs=['GR', 'ILD_log10', 'DeltaPHI', 'PE', 'PHIND'],
                       levels=[1, 2, 3, 4], wavelet='db3')

##### cA From wavelet db3
dwt_db3_cA_df = FE.make_dwt_vars_cA(wells_df=training_data, logs=['GR', 'ILD_log10', 'DeltaPHI', 'PE', 'PHIND'],
                       levels=[1, 2, 3, 4], wavelet='db3')

##### From entropy
entropy_df = FE.make_entropy_vars(wells_df=training_data, logs=['GR', 'ILD_log10', 'DeltaPHI', 'PE', 'PHIND'],
                               l_foots=[2, 3, 4, 5, 7, 10])

###### From gradient
gradient_df = FE.make_gradient_vars(wells_df=training_data, logs=['GR', 'ILD_log10', 'DeltaPHI', 'PE', 'PHIND'],
                                 dx_list=[2, 3, 4, 5, 6, 10, 20])

##### From rolling average
moving_av_df = FE.make_moving_av_vars(wells_df=training_data, logs=['GR', 'ILD_log10', 'DeltaPHI', 'PE', 'PHIND'],
                                   windows=[1, 2, 5, 10, 20])

##### From rolling standard deviation
moving_std_df = FE.make_moving_std_vars(wells_df=training_data, logs=['GR', 'ILD_log10', 'DeltaPHI', 'PE', 'PHIND'],
                                     windows=[3 , 4, 5, 7, 10, 15, 20])

##### From rolling max
moving_max_df = FE.make_moving_max_vars(wells_df=training_data, logs=['GR', 'ILD_log10', 'DeltaPHI', 'PE', 'PHIND'],
                                     windows=[3, 4, 5, 7, 10, 15, 20])

##### From rolling min
moving_min_df = FE.make_moving_min_vars(wells_df=training_data, logs=['GR', 'ILD_log10', 'DeltaPHI', 'PE', 'PHIND'],
                                     windows=[3 , 4, 5, 7, 10, 15, 20])

###### From rolling NM/M ratio
rolling_marine_ratio_df = FE.make_rolling_marine_ratio_vars(wells_df=training_data, windows=[5, 10, 15, 20, 30, 50, 75, 100, 200])

###### From distance to NM and M, up and down
dist_M_up_df = FE.make_distance_to_M_up_vars(wells_df=training_data)
dist_M_down_df = FE.make_distance_to_M_down_vars(wells_df=training_data)
dist_NM_up_df = FE.make_distance_to_NM_up_vars(wells_df=training_data)
dist_NM_down_df = FE.make_distance_to_NM_down_vars(wells_df=training_data)

In [4]:
list_df_var = [dwt_db1_cD_df, dwt_db1_cA_df, dwt_db3_cD_df, dwt_db3_cA_df,
               entropy_df, gradient_df, moving_av_df, moving_std_df, moving_max_df, moving_min_df,
              rolling_marine_ratio_df, dist_M_up_df, dist_M_down_df, dist_NM_up_df, dist_NM_down_df]
combined_df = training_data
for var_df in list_df_var:
    temp_df = var_df
    combined_df = pd.concat([combined_df,temp_df],axis=1)
combined_df.replace(to_replace=np.nan, value='-1', inplace=True)

(4149, 299)


Unnamed: 0,Facies,Formation,Well Name,Depth,GR,ILD_log10,DeltaPHI,PHIND,PE,NM_M,...,Marine_ratio_20_centered,Marine_ratio_30_centered,Marine_ratio_50_centered,Marine_ratio_75_centered,Marine_ratio_100_centered,Marine_ratio_200_centered,dist_M_up,dist_M_down,dist_NM_up,dist_NM_down
0,3,A1 SH,SHRIMPLIN,2793.0,77.45,0.664,9.9,11.915,4.6,1,...,1.0,1.0,1.0,1.0,1.14,1.51,-1.0,21.5,0.0,0.0
1,3,A1 SH,SHRIMPLIN,2793.5,78.26,0.661,14.2,12.565,4.1,1,...,1.0,1.0,1.0,1.0,1.156863,1.50495,-1.0,21.0,0.0,0.0
2,3,A1 SH,SHRIMPLIN,2794.0,79.05,0.658,14.8,13.05,3.6,1,...,1.0,1.0,1.0,1.0,1.173077,1.5,-1.0,20.5,0.0,0.0
3,3,A1 SH,SHRIMPLIN,2794.5,86.1,0.655,13.9,13.115,3.5,1,...,1.0,1.0,1.0,1.0,1.188679,1.495146,-1.0,20.0,0.0,0.0
4,3,A1 SH,SHRIMPLIN,2795.0,74.58,0.647,13.5,13.3,3.4,1,...,1.0,1.0,1.0,1.0,1.203704,1.490385,-1.0,19.5,0.0,0.0


In [5]:
X = combined_df.iloc[:, 4:]
y = combined_df['Facies']
groups = combined_df['Well Name']

In [6]:
############# NOT NECESSARY
scoring_param = ['accuracy', 'recall_weighted', 'precision_weighted','f1_weighted']
scores = []

Cl = RandomForestClassifier(n_estimators=100, max_features=0.1, min_samples_leaf=25,
                            min_samples_split=50, class_weight='balanced', random_state=42, n_jobs=-1)

lpgo = LeavePGroupsOut(n_groups=2)

for scoring in scoring_param:
    
    cv=lpgo.split(X, y, groups)
    validated = cross_val_score(Cl, X, y, scoring=scoring, cv=cv, n_jobs=-1)
    scores.append(validated)
    
scores = np.array(scores)
scores = np.swapaxes(scores, 0, 1)
scores = pd.DataFrame(data=scores, columns=scoring_param)

KeyboardInterrupt: 

In [None]:
########## NOT NECESSARY
sns.set_style('white')
fig,ax = plt.subplots(figsize=(8,6))
sns.boxplot(data=scores)
plt.xlabel('scoring parameters')
plt.ylabel('score')
plt.title('Classification scores for tuned parameters');

In [6]:
###### Doing a first prediction of lithologies using predict_proba
cv=LeaveOneGroupOut().split(X, y, groups)
Cl = RandomForestClassifier(n_estimators=100, max_features=0.1, min_samples_leaf=25,
                            min_samples_split=50, class_weight='balanced', random_state=42, n_jobs=-1)
proba = cross_val_predict(Cl, X, y, cv=cv, method='predict_proba')

(4149, 9)

In [7]:
###### creating new variables from predict proba
list_facies = ['SS', 'CSiS', 'FSiS', 'SiSh', 'MS', 'WS', 'D', 'PS', 'BS']
proba = pd.DataFrame(proba, columns=list_facies)
proba = pd.concat([combined_df.iloc[:, :4], proba], axis=1)

Unnamed: 0,Facies,Formation,Well Name,Depth,SS,CSiS,FSiS,SiSh,MS,WS,D,PS,BS
0,3,A1 SH,SHRIMPLIN,2793.0,0.041213,0.49733,0.43424,0.007693,0.005071,0.004132,0.0,0.002495,0.007825
1,3,A1 SH,SHRIMPLIN,2793.5,0.051621,0.460541,0.453946,0.011721,0.006854,0.004132,0.0,0.003361,0.007825
2,3,A1 SH,SHRIMPLIN,2794.0,0.049199,0.492634,0.428767,0.016295,0.006505,0.00534,0.0,0.001259,0.0
3,3,A1 SH,SHRIMPLIN,2794.5,0.046428,0.495186,0.4325,0.016295,0.002992,0.00534,0.0,0.001259,0.0
4,3,A1 SH,SHRIMPLIN,2795.0,0.060449,0.492805,0.418167,0.011273,0.006073,0.005814,0.0,0.005418,0.0


In [8]:
###### From gradient
gradient_df = FE.make_gradient_vars(wells_df=proba, logs=list_facies, dx_list=[2, 3, 4, 5, 6, 10, 20])

##### From rolling average
moving_av_df = FE.make_moving_av_vars(wells_df=proba, logs=list_facies, windows=[1, 2, 5, 10, 20])

##### From rolling standard deviation
moving_std_df = FE.make_moving_std_vars(wells_df=proba, logs=list_facies, windows=[3 , 4, 5, 7, 10, 15, 20])

##### From rolling max
moving_max_df = FE.make_moving_max_vars(wells_df=proba, logs=list_facies, windows=[3, 4, 5, 7, 10, 15, 20])

##### From rolling min
moving_min_df = FE.make_moving_min_vars(wells_df=proba, logs=list_facies, windows=[3 , 4, 5, 7, 10, 15, 20])

list_df_var = [gradient_df, moving_av_df, moving_std_df, moving_max_df, moving_min_df]
combined_df = proba
for var_df in list_df_var:
    combined_df = pd.concat([combined_df,var_df],axis=1)
combined_df.replace(to_replace=np.nan, value='-1', inplace=True)

(4149, 310)


Unnamed: 0,Facies,Formation,Well Name,Depth,SS,CSiS,FSiS,SiSh,MS,WS,...,PS_moving_min_10ft,PS_moving_min_15ft,PS_moving_min_20ft,BS_moving_min_3ft,BS_moving_min_4ft,BS_moving_min_5ft,BS_moving_min_7ft,BS_moving_min_10ft,BS_moving_min_15ft,BS_moving_min_20ft
0,3,A1 SH,SHRIMPLIN,2793.0,0.041213,0.49733,0.43424,0.007693,0.005071,0.004132,...,0.001259,0.001259,0.001259,0.007825,0.007825,0.0,0.0,0.0,0.0,0.0
1,3,A1 SH,SHRIMPLIN,2793.5,0.051621,0.460541,0.453946,0.011721,0.006854,0.004132,...,0.001259,0.001259,0.001259,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,A1 SH,SHRIMPLIN,2794.0,0.049199,0.492634,0.428767,0.016295,0.006505,0.00534,...,0.001259,0.001259,0.001259,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,A1 SH,SHRIMPLIN,2794.5,0.046428,0.495186,0.4325,0.016295,0.002992,0.00534,...,0.001259,0.001259,0.001259,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,3,A1 SH,SHRIMPLIN,2795.0,0.060449,0.492805,0.418167,0.011273,0.006073,0.005814,...,0.001259,0.001259,0.001259,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
X2 = combined_df.iloc[:, 4:]

In [None]:
##########NOT NECESSARY
scoring_param = ['accuracy', 'recall_weighted', 'precision_weighted','f1_weighted']
scores = []

Cl = RandomForestClassifier(n_estimators=100, max_features=0.7, min_samples_leaf=0.01,
                            min_samples_split=100, class_weight='balanced', random_state=42, n_jobs=-1)

lpgo = LeavePGroupsOut(n_groups=2)

for scoring in scoring_param:
    
    cv=lpgo.split(X2, y, groups)
    validated = cross_val_score(Cl, X2, y, scoring=scoring, cv=cv, n_jobs=-1)
    scores.append(validated)
    
scores = np.array(scores)
scores = np.swapaxes(scores, 0, 1)
scores = pd.DataFrame(data=scores, columns=scoring_param)

In [None]:
########## NOT NECESSARY
sns.set_style('white')
fig,ax = plt.subplots(figsize=(8,6))
sns.boxplot(data=scores)
plt.xlabel('scoring parameters')
plt.ylabel('score')
plt.title('Classification scores for tuned parameters');

In [10]:
filename = '../validation_data_nofacies.csv'
test_data = pd.read_csv(filename)
test_data.head(5)

Unnamed: 0,Formation,Well Name,Depth,GR,ILD_log10,DeltaPHI,PHIND,PE,NM_M,RELPOS
0,A1 SH,STUART,2808.0,66.276,0.63,3.3,10.65,3.591,1,1.0
1,A1 SH,STUART,2808.5,77.252,0.585,6.5,11.95,3.341,1,0.978
2,A1 SH,STUART,2809.0,82.899,0.566,9.4,13.6,3.064,1,0.956
3,A1 SH,STUART,2809.5,80.671,0.593,9.5,13.25,2.977,1,0.933
4,A1 SH,STUART,2810.0,75.971,0.638,8.7,12.35,3.02,1,0.911


In [11]:
##### cD From wavelet db1
dwt_db1_cD_df = FE.make_dwt_vars_cD(wells_df=test_data, logs=['GR', 'ILD_log10', 'DeltaPHI', 'PE', 'PHIND'],
                                    levels=[1, 2, 3, 4], wavelet='db1')

##### cA From wavelet db1
dwt_db1_cA_df = FE.make_dwt_vars_cA(wells_df=test_data, logs=['GR', 'ILD_log10', 'DeltaPHI', 'PE', 'PHIND'],
                       levels=[1, 2, 3, 4], wavelet='db1')

##### cD From wavelet db3
dwt_db3_cD_df = FE.make_dwt_vars_cD(wells_df=test_data, logs=['GR', 'ILD_log10', 'DeltaPHI', 'PE', 'PHIND'],
                       levels=[1, 2, 3, 4], wavelet='db3')

##### cA From wavelet db3
dwt_db3_cA_df = FE.make_dwt_vars_cA(wells_df=test_data, logs=['GR', 'ILD_log10', 'DeltaPHI', 'PE', 'PHIND'],
                       levels=[1, 2, 3, 4], wavelet='db3')

##### From entropy
entropy_df = FE.make_entropy_vars(wells_df=test_data, logs=['GR', 'ILD_log10', 'DeltaPHI', 'PE', 'PHIND'],
                               l_foots=[2, 3, 4, 5, 7, 10])

###### From gradient
gradient_df = FE.make_gradient_vars(wells_df=test_data, logs=['GR', 'ILD_log10', 'DeltaPHI', 'PE', 'PHIND'],
                                 dx_list=[2, 3, 4, 5, 6, 10, 20])

##### From rolling average
moving_av_df = FE.make_moving_av_vars(wells_df=test_data, logs=['GR', 'ILD_log10', 'DeltaPHI', 'PE', 'PHIND'],
                                   windows=[1, 2, 5, 10, 20])

##### From rolling standard deviation
moving_std_df = FE.make_moving_std_vars(wells_df=test_data, logs=['GR', 'ILD_log10', 'DeltaPHI', 'PE', 'PHIND'],
                                     windows=[3 , 4, 5, 7, 10, 15, 20])

##### From rolling max
moving_max_df = FE.make_moving_max_vars(wells_df=test_data, logs=['GR', 'ILD_log10', 'DeltaPHI', 'PE', 'PHIND'],
                                     windows=[3, 4, 5, 7, 10, 15, 20])

##### From rolling min
moving_min_df = FE.make_moving_min_vars(wells_df=test_data, logs=['GR', 'ILD_log10', 'DeltaPHI', 'PE', 'PHIND'],
                                     windows=[3 , 4, 5, 7, 10, 15, 20])

###### From rolling NM/M ratio
rolling_marine_ratio_df = FE.make_rolling_marine_ratio_vars(wells_df=test_data, windows=[5, 10, 15, 20, 30, 50, 75, 100, 200])

###### From distance to NM and M, up and down
dist_M_up_df = FE.make_distance_to_M_up_vars(wells_df=test_data)
dist_M_down_df = FE.make_distance_to_M_down_vars(wells_df=test_data)
dist_NM_up_df = FE.make_distance_to_NM_up_vars(wells_df=test_data)
dist_NM_down_df = FE.make_distance_to_NM_down_vars(wells_df=test_data)

In [12]:
combined_test_df = test_data
list_df_var = [dwt_db1_cD_df, dwt_db1_cA_df, dwt_db3_cD_df, dwt_db3_cA_df,
               entropy_df, gradient_df, moving_av_df, moving_std_df, moving_max_df, moving_min_df,
              rolling_marine_ratio_df, dist_M_up_df, dist_M_down_df, dist_NM_up_df, dist_NM_down_df]
for var_df in list_df_var:
    temp_df = var_df
    combined_test_df = pd.concat([combined_test_df,temp_df],axis=1)
combined_test_df.replace(to_replace=np.nan, value='-1', inplace=True)

X_test = combined_test_df.iloc[:, 3:]

(830, 298)


Unnamed: 0,Formation,Well Name,Depth,GR,ILD_log10,DeltaPHI,PHIND,PE,NM_M,RELPOS,...,Marine_ratio_20_centered,Marine_ratio_30_centered,Marine_ratio_50_centered,Marine_ratio_75_centered,Marine_ratio_100_centered,Marine_ratio_200_centered,dist_M_up,dist_M_down,dist_NM_up,dist_NM_down
0,A1 SH,STUART,2808.0,66.276,0.63,3.3,10.65,3.591,1,1.0,...,1.0,1.0,1.0,1.0,1.14,1.57,-1.0,21.5,0.0,0.0
1,A1 SH,STUART,2808.5,77.252,0.585,6.5,11.95,3.341,1,0.978,...,1.0,1.0,1.0,1.0,1.156863,1.574257,-1.0,21.0,0.0,0.0
2,A1 SH,STUART,2809.0,82.899,0.566,9.4,13.6,3.064,1,0.956,...,1.0,1.0,1.0,1.0,1.173077,1.578431,-1.0,20.5,0.0,0.0
3,A1 SH,STUART,2809.5,80.671,0.593,9.5,13.25,2.977,1,0.933,...,1.0,1.0,1.0,1.0,1.188679,1.582524,-1.0,20.0,0.0,0.0
4,A1 SH,STUART,2810.0,75.971,0.638,8.7,12.35,3.02,1,0.911,...,1.0,1.0,1.0,1.0,1.203704,1.586538,-1.0,19.5,0.0,0.0


In [13]:
Cl = RandomForestClassifier(n_estimators=100, max_features=0.1, min_samples_leaf=25,
                            min_samples_split=50, class_weight='balanced', random_state=42, n_jobs=-1)
Cl.fit(X, y)
proba_test = Cl.predict_proba(X_test)

In [14]:
###### creating new variables from predict proba
list_facies = ['SS', 'CSiS', 'FSiS', 'SiSh', 'MS', 'WS', 'D', 'PS', 'BS']
proba_test = pd.DataFrame(proba_test, columns=list_facies)
proba_test = pd.concat([combined_test_df.iloc[:, :3], proba_test], axis=1)

Unnamed: 0,Formation,Well Name,Depth,SS,CSiS,FSiS,SiSh,MS,WS,D,PS,BS
0,A1 SH,STUART,2808.0,0.251377,0.3537,0.369888,0.003871,0.008333,0.005021,0.0,0.005959,0.00185
1,A1 SH,STUART,2808.5,0.206566,0.315778,0.452498,0.008647,0.006523,0.003267,0.0,0.00487,0.00185
2,A1 SH,STUART,2809.0,0.166643,0.322894,0.485708,0.010734,0.006871,0.002019,0.0,0.003282,0.00185
3,A1 SH,STUART,2809.5,0.158835,0.328975,0.487113,0.014454,0.00533,0.000269,0.0,0.003175,0.00185
4,A1 SH,STUART,2810.0,0.154692,0.328109,0.4964,0.012106,0.005728,0.000269,0.0,0.000846,0.00185


In [15]:
###### From gradient
gradient_df = FE.make_gradient_vars(wells_df=proba_test, logs=list_facies, dx_list=[2, 3, 4, 5, 6, 10, 20])

##### From rolling average
moving_av_df = FE.make_moving_av_vars(wells_df=proba_test, logs=list_facies, windows=[1, 2, 5, 10, 20])

##### From rolling standard deviation
moving_std_df = FE.make_moving_std_vars(wells_df=proba_test, logs=list_facies, windows=[3 , 4, 5, 7, 10, 15, 20])

##### From rolling max
moving_max_df = FE.make_moving_max_vars(wells_df=proba_test, logs=list_facies, windows=[3, 4, 5, 7, 10, 15, 20])

##### From rolling min
moving_min_df = FE.make_moving_min_vars(wells_df=proba_test, logs=list_facies, windows=[3 , 4, 5, 7, 10, 15, 20])

list_df_var = [gradient_df, moving_av_df, moving_std_df, moving_max_df, moving_min_df]
combined_df = proba_test
for var_df in list_df_var:
    combined_df = pd.concat([combined_df,var_df],axis=1)
combined_df.replace(to_replace=np.nan, value='-1', inplace=True)

(830, 309)


Unnamed: 0,Formation,Well Name,Depth,SS,CSiS,FSiS,SiSh,MS,WS,D,...,PS_moving_min_10ft,PS_moving_min_15ft,PS_moving_min_20ft,BS_moving_min_3ft,BS_moving_min_4ft,BS_moving_min_5ft,BS_moving_min_7ft,BS_moving_min_10ft,BS_moving_min_15ft,BS_moving_min_20ft
0,A1 SH,STUART,2808.0,0.251377,0.3537,0.369888,0.003871,0.008333,0.005021,0.0,...,0.000846,0.000846,0.000846,0.00185,0.00185,0.00185,0.00185,0.00185,0.00185,0.00185
1,A1 SH,STUART,2808.5,0.206566,0.315778,0.452498,0.008647,0.006523,0.003267,0.0,...,0.000846,0.000846,0.000846,0.00185,0.00185,0.00185,0.00185,0.00185,0.00185,0.00185
2,A1 SH,STUART,2809.0,0.166643,0.322894,0.485708,0.010734,0.006871,0.002019,0.0,...,0.000846,0.000846,0.000846,0.00185,0.00185,0.00185,0.00185,0.00185,0.00185,0.00185
3,A1 SH,STUART,2809.5,0.158835,0.328975,0.487113,0.014454,0.00533,0.000269,0.0,...,0.000846,0.000846,0.000846,0.00185,0.00185,0.00185,0.00185,0.00185,0.00185,0.00185
4,A1 SH,STUART,2810.0,0.154692,0.328109,0.4964,0.012106,0.005728,0.000269,0.0,...,0.000846,0.000846,0.000846,0.00185,0.00185,0.00185,0.00185,0.00185,0.00185,0.00185


In [17]:
X_test2 = combined_df.iloc[:, 3:]
Cl2 = RandomForestClassifier(n_estimators=100, max_features=0.7, min_samples_leaf=0.01,
                            min_samples_split=100, class_weight='balanced', random_state=42, n_jobs=-1)
Cl2.fit(X2, y)
y_test = Cl2.predict(X_test2)
y_test = pd.DataFrame(y_test, columns=['Predicted Facies'])
test_pred_df = pd.concat([combined_test_df[['Well Name', 'Depth']], y_test], axis=1)
print (test_pred_df.shape)
test_pred_df.head()

(830, 298)


Unnamed: 0,Well Name,Depth,Predicted Facies
0,STUART,2808.0,3
1,STUART,2808.5,3
2,STUART,2809.0,3
3,STUART,2809.5,3
4,STUART,2810.0,3


In [None]:
test_pred_df.to_pickle('Prediction_submission3_TwoSteps.pkl')

(830, 3)
