# Testing XGB on Image datasets 

In [1]:
import numpy as np
from coremdlr.datasets import WellLoader, FaciesDataset
from coremdlr.models import FeaturePredictor, LambdaModel

Using TensorFlow backend.


### Picking your training and testing wells

In [2]:
fdset = FaciesDataset(["205-21b-3", "204-19-6","204-24a-6"],
                    test_wells=["204-20-6a"],
                    features=["image"])

/home/administrator/Dropbox/core_data/facies/train_data 205-21b-3
/home/administrator/Dropbox/core_data/facies/train_data 204-19-6
/home/administrator/Dropbox/core_data/facies/train_data 204-24a-6
/home/administrator/Dropbox/core_data/facies/train_data 204-20-6a


In [3]:
fdset.load_or_generate_data()

Loading Well:  205-21b-3


AttributeError: 'WellLoader' object has no attribute 'row_labels'

In [6]:
import hyperopt
from hyperopt import hp
from hyperopt.pyll.base import scope
from sklearn.metrics import f1_score, log_loss
from sklearn.utils.class_weight import compute_sample_weight

# for balanced log_loss computation
sample_weights = compute_sample_weight('balanced', fdset.y_test) 

fmodel_args = {
    'image': {
        'model': 'LambdaModel',
        'model_args': {'feature': 'image' }
    }
}

XGB_SEARCH_SPACE = {
    'model_type' : 'XGB',
    'max_depth' : scope.int(hp.quniform('max_depth', 3, 10, 1)),
    'learning_rate' : hp.uniform('learning_rate', 0.01, 0.2),
    'n_estimators' : scope.int(hp.quniform('n_estimators', 10, 1000, 1)),
    'objective' : 'multi:softprob',
    'n_jobs' : 2,
    'gamma' : hp.uniform('gamma', 0, 0.5),
    'subsample' : hp.uniform('subsample', 0.3, 1),
    'colsample_bytree' : hp.uniform('colsample_bytree', 0.3, 1.0),
    'colsample_bylevel' : 1,
    'reg_alpha' : 0,                                    # L1 penalty
    'reg_lambda' : hp.uniform('reg_lambda', 0.1, 10),   # L2 penalty
    'tree_method' : 'gpu_exact',
}

def train_xgb_model(model_config):
    xgb_predictor = FeaturePredictor(fdset, model_args=model_config, feature_model_args=fmodel_args)
    test_acc = xgb_predictor.fit(fdset, verbose=False)
    y_pred = xgb_predictor.predict(fdset.X_test)
    print('F1 score:', f1_score(fdset.y_test, y_pred, average='macro'))
    return log_loss(fdset.y_test, xgb_predictor.predict_proba(fdset.X_test), sample_weight=sample_weights)

In [7]:
## This is where it falls down

In [8]:
best_params = hyperopt.fmin(
    fn=train_xgb_model,
    space=XGB_SEARCH_SPACE,
    algo=hyperopt.rand.suggest,
    max_evals=5
)

Training model for feature:  image


ValueError: Input numpy.ndarray must be 2 dimensional

In [6]:
best_params

{'colsample_bytree': 0.5741619668699196,
 'gamma': 0.019014187472295607,
 'learning_rate': 0.013618311000111526,
 'max_depth': 9.0,
 'n_estimators': 12.0,
 'reg_lambda': 7.490824693434014,
 'subsample': 0.8779575326570961}

In [7]:
params = {**XGB_SEARCH_SPACE, **best_params, **{'max_depth':5, 'n_estimators':705}}
xgb_predictor = FeaturePredictor(fdset, model_args=params, feature_model_args=fmodel_args)
xgb_predictor.fit(fdset, verbose=True)
list(zip(fdset.wells[0].which_logs, xgb_predictor.model.feature_importances_))

Training model for feature:  logs
                      precision    recall  f1-score   support

           sandstone       0.00      0.00      0.00       603
clay-prone sandstone       0.16      0.62      0.26       595
      sandy mudstone       0.47      0.47      0.47      1622
            mudstone       0.00      0.00      0.00      1051

         avg / total       0.22      0.29      0.24      3871

Total accuracy Score :  0.2913975716869026


  if diff:
  'precision', 'predicted', average, warn_for)


[('GR', 0.11285536),
 ('SP', 0.12004061),
 ('DENS', 0.11412449),
 ('NEUT', 0.09059669),
 ('PEF', 0.12652296),
 ('RDEP', 0.11398782),
 ('RSHAL', 0.099539205),
 ('DTS', 0.0),
 ('DTS1', 0.13579741),
 ('DTS2', 0.08653546)]

In [8]:
f1_score(fdset.y_test, xgb_predictor.predict(fdset.X_test), average='macro')

  if diff:
  'precision', 'predicted', average, warn_for)


0.18172616799966956

In [16]:
from abc import ABC, abstractmethod

class Parent(ABC):
    def __init__(self, args):
        if 'from_file' in args.keys():
            print(args['from_file'])
            return 
        else:
            self.args = args
            self.save(args['path'])
        
    def save(self, path):
        print('PARENT.save: ', path)
        
        
class Child(Parent):
    def __init__(self, args={}):
        Parent.__init__(self, args)
        print('not returned')
    
    def save(self, path):
        Parent.save(self, path)
        print('CHILD.save: ', path)
        

c = Child({'path': 'some_filename'})
    

PARENT.save:  some_filename
CHILD.save:  some_filename
not returned


In [9]:
c.__dict__

{}

In [10]:
c.something = 5
c.__dict__

{'something': 5}

In [11]:
del c.something
c.__dict__

{}