# Testing XGB on well log datasets 

In [1]:
import numpy as np
from coremdlr.datasets import WellLoader, FaciesDataset
from coremdlr.models import FeaturePredictor, LambdaModel

Using TensorFlow backend.


### Picking your training and testing wells

In [2]:
fdset = FaciesDataset(["205-21b-3", "204-20-6a","204-24a-6","204-20-1Z"],
                    test_wells=["204-19-6"],
                    features=["logs"],
                    label_resolution=32*2, # 32 pixels ~ .5cm
                    logs_args={'scaler_kind': 'standard'})

In [3]:
fdset.load_or_generate_data()

Loading Well:  205-21b-3  from  /home/administrator/Dropbox/core_data/facies/train_data


Header section Parameter regexp=~P was not found.


Adding NaN log:  SP
Adding NaN log:  DTS
Feature shapes:  [('depth', (1920,)), ('top', (1920,)), ('base', (1920,)), ('logs', (1920, 11))]
Loading Well:  204-20-6a  from  /home/administrator/Dropbox/core_data/facies/train_data


Header section Parameter regexp=~P was not found.


Adding NaN log:  SP
Adding NaN log:  DTS1
Adding NaN log:  DTS2


ValueError: b'os' is not in list

In [4]:
import hyperopt
from hyperopt import hp
from hyperopt.pyll.base import scope
from sklearn.metrics import f1_score, log_loss
from sklearn.utils.class_weight import compute_sample_weight

# for balanced log_loss computation
sample_weights = compute_sample_weight('balanced', fdset.y_test) 

fmodel_args = {
    'logs': {
        'model': 'LambdaModel',
        'model_args': {'feature': 'logs'}
    }
}

XGB_SEARCH_SPACE = {
    'model_type' : 'XGB',
    'max_depth' : scope.int(hp.quniform('max_depth', 3, 10, 1)),
    'learning_rate' : hp.uniform('learning_rate', 0.01, 0.2),
    'n_estimators' : scope.int(hp.quniform('n_estimators', 10, 1000, 1)),
    'objective' : 'multi:softprob',
    'n_jobs' : 2,
    'gamma' : hp.uniform('gamma', 0, 0.5),
    'subsample' : hp.uniform('subsample', 0.3, 1),
    'colsample_bytree' : hp.uniform('colsample_bytree', 0.3, 1.0),
    'colsample_bylevel' : 1,
    'reg_alpha' : 0,                                    # L1 penalty
    'reg_lambda' : hp.uniform('reg_lambda', 0.1, 10),   # L2 penalty
    'tree_method' : 'gpu_exact',
}

def train_xgb_model(model_config):
    xgb_predictor = FeaturePredictor(fdset, model_args=model_config, feature_model_args=fmodel_args)
    test_acc = xgb_predictor.fit(fdset, verbose=False)
    y_pred = xgb_predictor.predict(fdset.X_test)
    print('F1 score:', f1_score(fdset.y_test, y_pred, average='macro'))
    return log_loss(fdset.y_test, xgb_predictor.predict_proba(fdset.X_test), sample_weight=sample_weights)

In [5]:
best_params = hyperopt.fmin(
    fn=train_xgb_model,
    space=XGB_SEARCH_SPACE,
    algo=hyperopt.rand.suggest,
    max_evals=50
)

Training model for feature:  logs
F1 score: 0.19325485051610644
Training model for feature:  logs
F1 score: 0.12080616546360314
Training model for feature:  logs
F1 score: 0.17436836501990682
Training model for feature:  logs
F1 score: 0.2046141582605648
Training model for feature:  logs
F1 score: 0.16248938083347025
Training model for feature:  logs
F1 score: 0.18545532128887673
Training model for feature:  logs
F1 score: 0.17430471683617624
Training model for feature:  logs
F1 score: 0.14285795937913512
Training model for feature:  logs
F1 score: 0.16321311334130664
Training model for feature:  logs
F1 score: 0.18431736325676976
Training model for feature:  logs
F1 score: 0.1938633743030063
Training model for feature:  logs
F1 score: 0.18791964694097285
Training model for feature:  logs
F1 score: 0.15098987028066574
Training model for feature:  logs
F1 score: 0.16143279831795954
Training model for feature:  logs
F1 score: 0.16752969978154503
Training model for feature:  logs
F1 score

In [6]:
best_params

{'colsample_bytree': 0.7656135429860518,
 'gamma': 0.22210123019269762,
 'learning_rate': 0.08047373959576883,
 'max_depth': 6.0,
 'n_estimators': 13.0,
 'reg_lambda': 2.3383880017595646,
 'subsample': 0.8965617242269515}

In [7]:
params = {**XGB_SEARCH_SPACE, **best_params, **{'max_depth':6, 'n_estimators':13}}
xgb_predictor = FeaturePredictor(fdset, model_args=params, feature_model_args=fmodel_args)
xgb_predictor.fit(fdset, verbose=True)

imps = list(zip(fdset.wells[0].logs_args['which_logs'], xgb_predictor.model.feature_importances_))
imps.sort(key = lambda p: p[1])
[print(pair) for pair in imps[::-1]]

Training model for feature:  logs
                      precision    recall  f1-score   support

           sandstone       0.51      0.08      0.14       467
clay-prone sandstone       0.20      0.54      0.29       156
      sandy mudstone       0.13      0.31      0.18       143
            mudstone       0.30      0.14      0.19       178

           micro avg       0.21      0.21      0.21       944
           macro avg       0.28      0.27      0.20       944
        weighted avg       0.36      0.21      0.18       944

Total accuracy Score :  0.2055084745762712
Confusion Matrix: 
 [[ 39 189 191  48]
 [  4  85  65   2]
 [ 23  66  45   9]
 [ 10  95  48  25]]
('RDEP', 0.125)
('NEUT', 0.12402724)
('DENS', 0.12013619)
('GR', 0.11527237)
('DTC', 0.11478599)
('RSHAL', 0.1118677)
('PEF', 0.10651751)
('DTS', 0.09581712)
('DTS1', 0.050583657)
('DTS2', 0.027237354)
('SP', 0.008754863)


[None, None, None, None, None, None, None, None, None, None, None]

In [25]:
xgb_predictor.preds_dataframe('204-19-6', logs=fdset.test_wells[0].logs_args['which_logs'], save_csv='preds_204-19-6.csv')

Unnamed: 0,base,confidence,depth,proba_0,proba_1,proba_2,proba_3,regression,top,y_pred,...,SP,DENS,NEUT,PEF,RDEP,RSHAL,DTC,DTS,DTS1,DTS2
0,2208.085920,0.422048,2208.080467,0.060476,0.204947,0.422048,0.312529,1.983288,2208.075180,2,...,-0.149169,-1.692359,-1.072813,-1.721089,1.344717,1.232942,1.414411,0.0,-0.155858,-0.487929
1,2208.096661,0.432120,2208.091207,0.061919,0.185974,0.432120,0.319987,2.012720,2208.085920,2,...,-0.175051,-1.713029,-1.111747,-1.744785,1.349537,1.232334,1.421290,0.0,-0.131260,-0.471586
2,2208.107401,0.390059,2208.101947,0.055892,0.167872,0.390059,0.386177,2.133152,2208.096661,2,...,-0.192787,-1.733596,-1.143298,-1.761094,1.352661,1.231798,1.428199,0.0,-0.110155,-0.454461
3,2208.118141,0.390059,2208.112687,0.055892,0.167872,0.390059,0.386177,2.133152,2208.107401,2,...,-0.186814,-1.753859,-1.153364,-1.755902,1.350852,1.231476,1.435192,0.0,-0.099211,-0.435058
4,2208.128881,0.390059,2208.123427,0.055892,0.167872,0.390059,0.386177,2.133152,2208.118141,2,...,-0.180841,-1.774122,-1.163429,-1.750710,1.349043,1.231154,1.442185,0.0,-0.088268,-0.415655
5,2208.139621,0.390059,2208.134167,0.055892,0.167872,0.390059,0.386177,2.133152,2208.128881,2,...,-0.174868,-1.794385,-1.173495,-1.745517,1.347233,1.230832,1.449178,0.0,-0.077325,-0.396251
6,2208.150361,0.390142,2208.144907,0.055904,0.167695,0.390142,0.386259,2.133447,2208.139621,2,...,-0.168895,-1.814648,-1.183561,-1.740325,1.345424,1.230509,1.456172,0.0,-0.066382,-0.376848
7,2208.161101,0.395792,2208.155647,0.056713,0.155642,0.395792,0.391853,2.153480,2208.150361,2,...,-0.162922,-1.834911,-1.193627,-1.735133,1.343615,1.230187,1.463165,0.0,-0.055438,-0.357445
8,2208.171841,0.395792,2208.166387,0.056713,0.155642,0.395792,0.391853,2.153480,2208.161101,2,...,-0.156950,-1.855174,-1.203693,-1.729941,1.341805,1.229865,1.470158,0.0,-0.044495,-0.338042
9,2208.182581,0.379869,2208.177127,0.054432,0.189610,0.379869,0.376089,2.097019,2208.171841,2,...,-0.150977,-1.875436,-1.213758,-1.724748,1.339996,1.229543,1.477151,0.0,-0.033552,-0.318639
