In [1]:
import gc

import numpy as np
import pandas as pd
from random import sample

import hyperopt
from hyperopt import hp, Trials
from hyperopt.pyll.base import scope

from sklearn.metrics import f1_score, log_loss, accuracy_score
from sklearn.utils.class_weight import compute_sample_weight

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

from coremdlr.models import NetworkModel
from coremdlr.config import defaults

from coremdlr.datasets import WellLoader, FaciesDataset
from coremdlr.datasets.utils import infer_test_wells

import matplotlib.pyplot as plt
%matplotlib inline

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [2]:
WAVENET_SEARCH_SPACE = {
    'feature' : 'pseudoGR',
    'network' : 'wavenet',
    'summary' : False,
    'sequence_size' : 128,
    'network_args' : {
        'num_blocks' : scope.int(hp.quniform('num_blocks', 2, 20, 2)),
        'block_filters' : scope.int(hp.quniform('block_filters', 16, 256, 16)),
        'residual_filters' : scope.int(hp.quniform('residual_filters', 8, 128, 8)),
        'output_resolution' : 32,
        'dropout_rate' : 0.5,
    },
    'optimizer_args' : {
        'optimizer' : 'Adam',
    },
    'loss' : {'categorical_crossentropy': hp.uniform('cat_loss', 0.1, 1.0),
              'ordinal_squared_error': hp.uniform('ord_loss', 0.1, 1.0)}
}


fit_args = {
    'batch_size' : 16,
    'epochs' : 20,
    'class_weighted' : True
}
CHKPT_FNAME = 'temp_best_weights.h5'


def make_params_usable(params):
    """
    Convert params returned by hyperopt to usable `model_args`
    """
    p = {**WAVENET_SEARCH_SPACE, **params}
    p['network_args']['num_blocks'] = int(params['network_args']['num_blocks'])
    p['network_args']['block_filters'] = int(params['network_args']['block_filters'])
    p['network_args']['residual_filters'] = int(params['network_args']['residual_filters'])
    return p

In [None]:
def run_experiment(num_train, max_evals=15, search_space=WAVENET_SEARCH_SPACE):
    
    assert num_train in [3, 6, 9], 'Must use one of predefined train/test splits'
    
    train_sets = getattr(defaults, f'TRAIN_SETS_{num_train}')
    
    datasets = [FaciesDataset(s, infer_test_wells(s), features=["pseudoGR"],
                              pseudoGR_args={'features' : ['mean', 'var'], 
                                             'per_channel' : True}) for s in train_sets[:2]]
    
    for dset in datasets:
        dset.load_or_generate_data()
    
    def train_model(model_config):
        
        acc_scores, f1_scores, log_losses = [], [], []
        
        for dset in datasets:
            
            model = NetworkModel(dset, model_args=model_config)
            
            chkpt_callback = ModelCheckpoint(CHKPT_FNAME, monitor='val_loss', verbose=1,
                                        save_best_only=True, 
                                        save_weights_only=True)
            earlystop_callback = EarlyStopping(monitor='val_loss', patience=3)
            fit_args['callbacks'] = [chkpt_callback, earlystop_callback]
            
            print(f'Training with config: {model_config}')
            
            model.fit(dset, **fit_args)
            
            model.network.load_weights(CHKPT_FNAME)
            
            y_proba = model.predict_proba(dset.X_test)
            y_pred = np.argmax(y_proba, -1)
            
            acc_scores.append(accuracy_score(dset.y_test, y_pred))
            
            f1_scores.append(f1_score(dset.y_test, y_pred, average='macro'))
            
            log_losses.append(log_loss(dset.y_test, y_proba, 
                                      sample_weight=compute_sample_weight('balanced', dset.y_test)))
            
        return {'loss' : np.mean(log_losses),
                'acc_scores' : acc_scores,
                'f1_scores' : f1_scores,
                'log_losses' : log_losses,
                'status' : hyperopt.STATUS_OK}
    
    
    trials = Trials()
    
    best_params = hyperopt.fmin(
        fn=train_model,
        space=WAVENET_SEARCH_SPACE,
        algo=hyperopt.rand.suggest,
        max_evals=max_evals,
        trials=trials
    )
    
    return trials, best_params

In [None]:
trails3, best_params3 = run_experiment(3)

In [None]:
trails3.results

In [None]:
best_params3

In [None]:
trails3.best_trial

In [3]:
labels_dict = {str(i) : str(i) for i in range(8)}

labeled_wells = [
    '204-19-3A',
    '204-19-6',
    '204-20-3',
    '204-20-6a',
    '204-24a-6'
]
    
pred_dfs = []

def pred_results_df(model_config):
    
    gc.collect()
    
    for i, test_well in enumerate(labeled_wells):
        
        dset = FaciesDataset([w for w in labeled_wells if w != test_well], [test_well], 
                             labels_ext='_labelsII.npy',
                             lithology_classes=labels_dict,
                             features=['pseudoGR'],
                             pseudoGR_args={'features' : ['mean', 'var'], 'per_channel' : True})
        
        dset.load_or_generate_data()
        
        model = NetworkModel(dset, model_args=model_config)
        
        chkpt_callback = ModelCheckpoint(CHKPT_FNAME, monitor='val_acc', verbose=1,
                                        save_best_only=True, 
                                        save_weights_only=True)
        earlystop_callback = EarlyStopping(monitor='val_acc', patience=6)
        fit_args['callbacks'] = [chkpt_callback, earlystop_callback]
        
        model.fit(dset, verbose=False, **fit_args)
        
        model.network.load_weights(CHKPT_FNAME)
        
        #print('predict_proba: ', model.predict_proba(dset.X_test))
        
        for test_well_name in dset.test_well_names:
            df = model.preds_dataframe(test_well_name)
            df['well'] = test_well_name
            df['set_number'] = i
            pred_dfs.append(df)
        
        del dset; del model;
        
    return pd.concat(pred_dfs)

In [4]:
params = {
    'feature' : 'pseudoGR',
    'network' : 'wavenet',
    'summary' : False,
    'sequence_size' : 128,
    'network_args' : {
        'num_blocks' : 18,
        'block_filters' : 128,
        'residual_filters' : 32,
        'output_resolution' : 32,
        'dropout_rate' : 0.5,
    },
    'optimizer_args' : {
        'optimizer' : 'Adam'
    },
    'loss' : 'categorical_crossentropy'
}

preds_df_3 = pred_results_df(params)
preds_df_3.to_csv('pgr_wavenet_results_labelsII.csv')

[b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7']
Loading Well:  204-19-6  from  /home/ross/Dropbox/core_data/facies/train_data


  output_features.append(np.nanmean(img, axis=1))
  output_features.append(np.nanvar(img, axis=1))


Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (1879,)), ('top', (1879,)), ('base', (1879,)), ('pseudoGR', (1879, 32, 8))]
Loading Well:  204-20-3  from  /home/ross/Dropbox/core_data/facies/train_data
Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (11392,)), ('top', (11392,)), ('base', (11392,)), ('pseudoGR', (11392, 32, 8))]
Loading Well:  204-20-6a  from  /home/ross/Dropbox/core_data/facies/train_data
Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (3536,)), ('top', (3536,)), ('base', (3536,)), ('pseudoGR', (3536, 32, 8))]
Loading Well:  204-24a-6  from  /home/ross/Dropbox/core_data/facies/train_data
Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (12152,)), ('top', (12152,)), ('base', (1215

  output_features.append(np.nanmean(img, axis=1))
  output_features.append(np.nanvar(img, axis=1))


Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (8946,)), ('top', (8946,)), ('base', (8946,)), ('pseudoGR', (8946, 32, 8))]
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Shapes of `(batch_X, batch_y)`: (16, 4096, 8), (16, 128, 7)
Epoch 1/20
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 00001: val_acc improved from -inf to 0.25062, saving model to temp_best_weights.h5
Epoch 2/20
Epoch 00002: val_acc did not improve from 0.25062
Epoch 3/20
Epoch 00003: val_acc did not improve from 0.25062
Epoch 4/20
Epoch 00004: val_acc did not improve from 0.25062
Epoch 5/20
Epoch 00005: val_acc did not improve from 0.25062
Epoch 6/20
Epoch 00006: val_acc did not improve from 0.25062
Epoch 7/20
Epoch 00007: val_acc did not improve from 0.25062
[b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7']
Loading Well:  

  output_features.append(np.nanmean(img, axis=1))
  output_features.append(np.nanvar(img, axis=1))


Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (8946,)), ('top', (8946,)), ('base', (8946,)), ('pseudoGR', (8946, 32, 8))]
Loading Well:  204-20-3  from  /home/ross/Dropbox/core_data/facies/train_data
Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (11392,)), ('top', (11392,)), ('base', (11392,)), ('pseudoGR', (11392, 32, 8))]
Loading Well:  204-20-6a  from  /home/ross/Dropbox/core_data/facies/train_data
Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (3536,)), ('top', (3536,)), ('base', (3536,)), ('pseudoGR', (3536, 32, 8))]
Loading Well:  204-24a-6  from  /home/ross/Dropbox/core_data/facies/train_data
Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (12152,)), ('top', (12152,)), ('base', (1215

  output_features.append(np.nanmean(img, axis=1))
  output_features.append(np.nanvar(img, axis=1))


Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (1879,)), ('top', (1879,)), ('base', (1879,)), ('pseudoGR', (1879, 32, 8))]
Shapes of `(batch_X, batch_y)`: (16, 4096, 8), (16, 128, 7)
Epoch 1/20
Epoch 00001: val_acc improved from -inf to 0.52344, saving model to temp_best_weights.h5
Epoch 2/20
Epoch 00002: val_acc did not improve from 0.52344
Epoch 3/20
Epoch 00003: val_acc did not improve from 0.52344
Epoch 4/20
Epoch 00004: val_acc did not improve from 0.52344
Epoch 5/20
Epoch 00005: val_acc did not improve from 0.52344
Epoch 6/20
Epoch 00006: val_acc did not improve from 0.52344
Epoch 7/20
Epoch 00007: val_acc did not improve from 0.52344
[b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7']
Loading Well:  204-19-3A  from  /home/ross/Dropbox/core_data/facies/train_data


  output_features.append(np.nanmean(img, axis=1))
  output_features.append(np.nanvar(img, axis=1))


Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (8946,)), ('top', (8946,)), ('base', (8946,)), ('pseudoGR', (8946, 32, 8))]
Loading Well:  204-19-6  from  /home/ross/Dropbox/core_data/facies/train_data
Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (1879,)), ('top', (1879,)), ('base', (1879,)), ('pseudoGR', (1879, 32, 8))]
Loading Well:  204-20-6a  from  /home/ross/Dropbox/core_data/facies/train_data
Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (3536,)), ('top', (3536,)), ('base', (3536,)), ('pseudoGR', (3536, 32, 8))]
Loading Well:  204-24a-6  from  /home/ross/Dropbox/core_data/facies/train_data
Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (12152,)), ('top', (12152,)), ('base', (12152,))

  output_features.append(np.nanmean(img, axis=1))
  output_features.append(np.nanvar(img, axis=1))


Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (11392,)), ('top', (11392,)), ('base', (11392,)), ('pseudoGR', (11392, 32, 8))]
Shapes of `(batch_X, batch_y)`: (16, 4096, 8), (16, 128, 7)
Epoch 1/20
Epoch 00001: val_acc improved from -inf to 0.11865, saving model to temp_best_weights.h5
Epoch 2/20
Epoch 00002: val_acc improved from 0.11865 to 0.14160, saving model to temp_best_weights.h5
Epoch 3/20
Epoch 00003: val_acc improved from 0.14160 to 0.42765, saving model to temp_best_weights.h5
Epoch 4/20
Epoch 00004: val_acc improved from 0.42765 to 0.43768, saving model to temp_best_weights.h5
Epoch 5/20
Epoch 00005: val_acc did not improve from 0.43768
Epoch 6/20
Epoch 00006: val_acc did not improve from 0.43768
Epoch 7/20
Epoch 00007: val_acc improved from 0.43768 to 0.45761, saving model to temp_best_weights.h5
Epoch 8/20
Epoch 00008: val_acc did not improve from 0.45761
Epoch 9/20
Epoch 00009: val_acc improved fr

  output_features.append(np.nanmean(img, axis=1))
  output_features.append(np.nanvar(img, axis=1))


Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (8946,)), ('top', (8946,)), ('base', (8946,)), ('pseudoGR', (8946, 32, 8))]
Loading Well:  204-19-6  from  /home/ross/Dropbox/core_data/facies/train_data
Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (1879,)), ('top', (1879,)), ('base', (1879,)), ('pseudoGR', (1879, 32, 8))]
Loading Well:  204-20-3  from  /home/ross/Dropbox/core_data/facies/train_data
Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (11392,)), ('top', (11392,)), ('base', (11392,)), ('pseudoGR', (11392, 32, 8))]
Loading Well:  204-24a-6  from  /home/ross/Dropbox/core_data/facies/train_data
Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (12152,)), ('top', (12152,)), ('base', (12152

  output_features.append(np.nanmean(img, axis=1))
  output_features.append(np.nanvar(img, axis=1))


Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (3536,)), ('top', (3536,)), ('base', (3536,)), ('pseudoGR', (3536, 32, 8))]
Shapes of `(batch_X, batch_y)`: (16, 4096, 8), (16, 128, 7)
Epoch 1/20
Epoch 00001: val_acc improved from -inf to 0.27474, saving model to temp_best_weights.h5
Epoch 2/20
Epoch 00002: val_acc did not improve from 0.27474
Epoch 3/20
Epoch 00003: val_acc improved from 0.27474 to 0.42810, saving model to temp_best_weights.h5
Epoch 4/20
Epoch 00004: val_acc did not improve from 0.42810
Epoch 5/20
Epoch 00005: val_acc did not improve from 0.42810
Epoch 6/20
Epoch 00006: val_acc did not improve from 0.42810
Epoch 7/20
Epoch 00007: val_acc did not improve from 0.42810
Epoch 8/20
Epoch 00008: val_acc improved from 0.42810 to 0.48539, saving model to temp_best_weights.h5
Epoch 9/20
Epoch 00009: val_acc did not improve from 0.48539
Epoch 10/20
Epoch 00010: val_acc did not improve from 0.48539
Epoch 11

  output_features.append(np.nanmean(img, axis=1))
  output_features.append(np.nanvar(img, axis=1))


Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (8946,)), ('top', (8946,)), ('base', (8946,)), ('pseudoGR', (8946, 32, 8))]
Loading Well:  204-19-6  from  /home/ross/Dropbox/core_data/facies/train_data
Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (1879,)), ('top', (1879,)), ('base', (1879,)), ('pseudoGR', (1879, 32, 8))]
Loading Well:  204-20-3  from  /home/ross/Dropbox/core_data/facies/train_data
Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (11392,)), ('top', (11392,)), ('base', (11392,)), ('pseudoGR', (11392, 32, 8))]
Loading Well:  204-20-6a  from  /home/ross/Dropbox/core_data/facies/train_data
Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (3536,)), ('top', (3536,)), ('base', (3536,))

  output_features.append(np.nanmean(img, axis=1))
  output_features.append(np.nanvar(img, axis=1))


Extracted pGR features:  ['Umean', 'Rmean', 'Gmean', 'Bmean', 'Uvar', 'Rvar', 'Gvar', 'Bvar']
Feature shapes:  [('depth', (12152,)), ('top', (12152,)), ('base', (12152,)), ('pseudoGR', (12152, 32, 8))]
Shapes of `(batch_X, batch_y)`: (16, 4096, 8), (16, 128, 7)
Epoch 1/20
Epoch 00001: val_acc improved from -inf to 0.27635, saving model to temp_best_weights.h5
Epoch 2/20
Epoch 00002: val_acc improved from 0.27635 to 0.34109, saving model to temp_best_weights.h5
Epoch 3/20
Epoch 00003: val_acc did not improve from 0.34109
Epoch 4/20
Epoch 00004: val_acc did not improve from 0.34109
Epoch 5/20
Epoch 00005: val_acc improved from 0.34109 to 0.59346, saving model to temp_best_weights.h5
Epoch 6/20
Epoch 00006: val_acc did not improve from 0.59346
Epoch 7/20
Epoch 00007: val_acc did not improve from 0.59346
Epoch 8/20
Epoch 00008: val_acc did not improve from 0.59346
Epoch 9/20
Epoch 00009: val_acc did not improve from 0.59346
Epoch 10/20
Epoch 00010: val_acc did not improve from 0.59346
Epoc

In [5]:
accuracy_score(preds_df_3.y_pred.values, preds_df_3.y_true.values)

0.5701079501401562

In [7]:
from sklearn.metrics import confusion_matrix

confusion_matrix(preds_df_3.y_true.values, preds_df_3.y_pred.values)

array([[8770, 1926,    0,    0,    0,  144,   85],
       [2453, 4379,    0,    0,    0,  407,  195],
       [  80,  147,    0,    0,    0,   96,   92],
       [ 702,  382,    0,    0,    0,  109,  220],
       [ 491,  184,    0,    0,    0,   57,  102],
       [1644, 1091,    0,    0,    0,  239, 1870],
       [ 364, 1518,    0,    0,    0,   57, 5730]])

In [18]:
labels_paths = [p for p in defaults.DEFAULT_TRAIN_PATH.glob('*labelsII.npy')]

for p in labels_paths:
    data = np.load(p)
    labels = np.unique(data)
    if b'nc' in labels:
        data[np.where(data == b'nc')] = b'0'
    
    np.save(p, data)
    
    #print(f'{p} : {np.unique(data)}')

In [19]:
for p in labels_paths:
    data = np.load(p)
    print(f'{p} : {np.unique(data)}')

/home/ross/Dropbox/core_data/facies/train_data/204-19-6_labelsII.npy : [b'0' b'1' b'3' b'4' b'5' b'6' b'7']
/home/ross/Dropbox/core_data/facies/train_data/204-24a-6_labelsII.npy : [b'0' b'1' b'3' b'4' b'5' b'6' b'7']
/home/ross/Dropbox/core_data/facies/train_data/204-20-3_labelsII.npy : [b'0' b'1' b'2' b'3' b'4' b'5' b'6' b'7']
/home/ross/Dropbox/core_data/facies/train_data/204-20-6a_labelsII.npy : [b'0' b'1' b'2' b'3' b'4' b'5' b'6' b'7']
/home/ross/Dropbox/core_data/facies/train_data/204-19-3A_labelsII.npy : [b'0' b'1' b'2' b'3' b'4' b'5' b'6' b'7']
