In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv("facies_vectors.csv")

In [3]:
df.head(5)

Unnamed: 0,Facies,Formation,Well Name,Depth,GR,ILD_log10,DeltaPHI,PHIND,PE,NM_M,RELPOS
0,3,A1 SH,SHRIMPLIN,2793.0,77.45,0.664,9.9,11.915,4.6,1,1.0
1,3,A1 SH,SHRIMPLIN,2793.5,78.26,0.661,14.2,12.565,4.1,1,0.979
2,3,A1 SH,SHRIMPLIN,2794.0,79.05,0.658,14.8,13.05,3.6,1,0.957
3,3,A1 SH,SHRIMPLIN,2794.5,86.1,0.655,13.9,13.115,3.5,1,0.936
4,3,A1 SH,SHRIMPLIN,2795.0,74.58,0.647,13.5,13.3,3.4,1,0.915


In [4]:
df.drop(["Depth","Formation","Well Name","PE"],axis=1,inplace=True)

In [5]:
df

Unnamed: 0,Facies,GR,ILD_log10,DeltaPHI,PHIND,NM_M,RELPOS
0,3,77.450,0.664,9.900,11.915,1,1.000
1,3,78.260,0.661,14.200,12.565,1,0.979
2,3,79.050,0.658,14.800,13.050,1,0.957
3,3,86.100,0.655,13.900,13.115,1,0.936
4,3,74.580,0.647,13.500,13.300,1,0.915
...,...,...,...,...,...,...,...
4144,5,46.719,0.947,1.828,7.254,2,0.685
4145,5,44.563,0.953,2.241,8.013,2,0.677
4146,5,49.719,0.964,2.925,8.013,2,0.669
4147,5,51.469,0.965,3.083,7.708,2,0.661


In [6]:
feature_names = ["GR","ILD_log10","DeltaPHI","PHIND","NM_M","RELPOS"]

X = df.loc[:,feature_names]
y = df.loc[:,"Facies"]

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, stratify=y, test_size=0.2, random_state=2)

In [8]:
X_train.shape, X_test.shape

((3319, 6), (830, 6))

In [9]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(min_samples_leaf=5,random_state=0)

rf.fit(X_train, y_train)

RandomForestClassifier(min_samples_leaf=5, random_state=0)

In [10]:
rf.score(X_train, y_train), rf.score(X_test, y_test)

(0.85899367279301, 0.6457831325301204)

In [11]:
from sklearn.metrics import f1_score

y_pred = rf.predict(X_test)

f1_score(y_test, y_pred, average='weighted')

0.639060855862022

In [12]:
from sklearn.ensemble import GradientBoostingClassifier

gb = GradientBoostingClassifier(min_samples_leaf=5, random_state=0)

gb.fit(X_train, y_train)

GradientBoostingClassifier(min_samples_leaf=5, random_state=0)

In [13]:
gb.score(X_train, y_train), gb.score(X_test, y_test)

(0.830973184694185, 0.653012048192771)

In [14]:
y_pred = gb.predict(X_test)

f1_score(y_test, y_pred, average='weighted')

0.6459941908741249

In [15]:
df_train = pd.DataFrame(X_train, columns=feature_names)
df_train["Facies"]=y_train

In [16]:
df_test = pd.DataFrame(X_test, columns=feature_names)
df_test["Facies"]=y_test

In [17]:
df_train.shape, df_test.shape

((3319, 7), (830, 7))

In [18]:
df_train.reset_index(drop=True,inplace=True)
df_train

Unnamed: 0,GR,ILD_log10,DeltaPHI,PHIND,NM_M,RELPOS,Facies
0,88.002,0.60206,4.800,10.5000,2,0.500,8
1,60.563,0.81800,3.299,9.9830,1,0.625,2
2,85.400,0.52900,11.100,13.0500,1,0.700,2
3,83.450,0.82300,-1.900,4.3500,2,0.756,8
4,61.404,0.59000,8.033,10.9355,1,0.378,3
...,...,...,...,...,...,...,...
3314,74.000,0.76800,0.243,9.6700,2,0.200,9
3315,78.970,0.52100,4.400,11.5000,1,0.143,3
3316,57.971,0.25479,-14.600,33.6000,2,1.000,7
3317,65.590,0.92000,5.700,5.2650,2,0.074,6


In [19]:
df_test.reset_index(drop=True,inplace=True)
df_test

Unnamed: 0,GR,ILD_log10,DeltaPHI,PHIND,NM_M,RELPOS,Facies
0,68.770,0.725000,3.800,8.1000,2,0.049,5
1,78.860,0.744000,0.700,10.3200,2,0.417,8
2,13.839,1.075000,3.081,8.0195,2,0.919,8
3,77.446,0.498586,7.000,13.6000,1,0.400,2
4,66.480,0.421000,6.500,12.3500,1,0.245,2
...,...,...,...,...,...,...,...
825,74.900,0.782000,9.300,11.7500,1,1.000,3
826,82.063,0.625000,6.823,12.3670,2,0.652,6
827,82.907,0.613842,11.300,13.0500,1,0.803,2
828,65.540,0.723000,8.100,8.6500,1,0.651,2


In [20]:
df_train.head(5)

Unnamed: 0,GR,ILD_log10,DeltaPHI,PHIND,NM_M,RELPOS,Facies
0,88.002,0.60206,4.8,10.5,2,0.5,8
1,60.563,0.818,3.299,9.983,1,0.625,2
2,85.4,0.529,11.1,13.05,1,0.7,2
3,83.45,0.823,-1.9,4.35,2,0.756,8
4,61.404,0.59,8.033,10.9355,1,0.378,3


In [21]:
df_test.head(5)

Unnamed: 0,GR,ILD_log10,DeltaPHI,PHIND,NM_M,RELPOS,Facies
0,68.77,0.725,3.8,8.1,2,0.049,5
1,78.86,0.744,0.7,10.32,2,0.417,8
2,13.839,1.075,3.081,8.0195,2,0.919,8
3,77.446,0.498586,7.0,13.6,1,0.4,2
4,66.48,0.421,6.5,12.35,1,0.245,2


In [22]:
df_train.to_csv("HW_training_data.csv",index=False)
df_test.to_csv("HW_test_data.csv",index=False)