#### Reading Data

In [1]:
import numpy as np # for array operations and for mean absolute relative error
import pandas as pd # for working with DataFrames

In [2]:
df = pd.read_csv('porocal.csv')
df.head()

Unnamed: 0,GR,NPHI,LLD,CAL,PE,DEPTH,PHIF
0,100.584839,9.71782,10.088133,8.214933,2.641247,1388.0,16.72
1,90.719633,10.742601,8.850653,8.214862,2.608124,1388.1,9.976
2,85.966671,11.569975,8.250872,8.214865,2.602324,1388.2,10.52
3,84.310091,12.136198,8.087051,8.214863,2.601919,1388.3,10.73
4,84.286258,12.447312,8.162846,8.2149,2.605344,1388.4,10.66


In [3]:
df.shape

(921, 7)

In [4]:
X = df.drop(['DEPTH','PHIF'], axis=1) # Features
y = df['PHIF']  # Target

#### Random Forest Regression Model

In [5]:
from sklearn.inspection import permutation_importance
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.ensemble import ExtraTreesRegressor
import time
from sklearn.metrics import r2_score
from sklearn import metrics 
from sklearn.model_selection import cross_val_score, KFold

Data split for test set

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [7]:
X_train.shape

(736, 5)

In [8]:
y_train.shape

(736,)

In [9]:
X_test.shape

(185, 5)

In [10]:
y_test.shape

(185,)

In [11]:
etr=ExtraTreesRegressor(random_state=0) #random forest regression model

In [12]:
kfold = KFold(n_splits=10, shuffle=True, random_state=0)

In [13]:
cross_val_score = cross_val_score(etr, X_train, y_train, cv=kfold)

In [14]:
cross_val_score

array([0.91366494, 0.9654063 , 0.97223052, 0.92831132, 0.92330876,
       0.97312795, 0.93615168, 0.9515975 , 0.97502774, 0.97640422])

In [15]:
r2_val=cross_val_score.mean()

In [16]:
start = time.time() #start the timer
etr.fit(X_train, y_train)
end = time.time() #end the timer

In [17]:
run_time=(end - start)
print("Total Computaional Time (S): ", run_time) #runtime in seconds

Total Computaional Time (S):  0.2863802909851074


In [18]:
y_train_pred = etr.predict(X_train)

In [19]:
y_test_pred = etr.predict(X_test)

#### Model Evaluation

##### Train Model

In [20]:
r2trn = r2_score(y_train, y_train_pred)
print("R2 of the Train Model =", r2trn)

R2 of the Train Model = 0.9999999999408555


In [21]:
msetrn = metrics.mean_squared_error(y_train, y_train_pred)
print("Mean Square Error of Train Model =", msetrn)

Mean Square Error of Train Model = 2.445652173800276e-10


In [22]:
rmsetrn = msetrn**0.5
print("RMSE of the Train Model =", rmsetrn)

RMSE of the Train Model = 1.5638581053920065e-05


In [23]:
maetrn = metrics.mean_absolute_error(y_train, y_train_pred)
print("Mean Absolute Error of Train Model =", maetrn)

Mean Absolute Error of Train Model = 8.152174041412118e-07


##### Validation Model

In [24]:
print("R2 of the Validation Model =", r2_val) 

R2 of the Validation Model = 0.9515230915100072


##### Test Model

In [25]:
r2tst = r2_score(y_test, y_test_pred)
print("R2 of the Test Model =", r2tst)

R2 of the Test Model = 0.9593804405841084


In [26]:
msetst = metrics.mean_squared_error(y_test, y_test_pred)
print("Mean Square Error of Test Model =", msetst)

Mean Square Error of Test Model = 0.160743026324865


In [27]:
rmsetst = msetst**0.5
print("RMSE of the Test Model =", rmsetst)

RMSE of the Test Model = 0.4009277071054893


In [28]:
maetst = metrics.mean_absolute_error(y_test, y_test_pred)
print("Mean Absolute Error of Test Model =", maetst)

Mean Absolute Error of Test Model = 0.22932383783783758
