In [1]:
import numpy as np
import pandas as pd
from random import sample

import hyperopt
from hyperopt import hp, Trials
from hyperopt.pyll.base import scope

from sklearn.metrics import f1_score, log_loss, accuracy_score
from sklearn.utils.class_weight import compute_sample_weight

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

from coremdlr.models import NetworkModel
from coremdlr.config import defaults

from coremdlr.datasets import WellLoader, FaciesDataset
from coremdlr.datasets.utils import infer_test_wells

import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [2]:
BCNN_SEARCH_SPACE = {
    'feature' : 'image',
    'network' : 'bilinear_cnn',
    'summary' : False,
    'sequence_size' : 32,
    'network_args' : {
        'backbone_cnn' : 'vgg16',
        'conv1x1' : scope.int(hp.quniform('conv1x1', 16, 32, 4)),
        'apply_rowwise' : True,
        'lstm_features' : None,
        'dropout_rate' : 0.5,
    },
    'optimizer_args' : {
        'optimizer' : 'Adam',
    }
}

IMAGE_ARGS = {
    'image_width' : 200,
    'crop_method' : 'center',
}


fit_args = {
    'batch_size' : 4,
    'epochs' : 20,
    'class_weighted' : True
}

CHKPT_FNAME = 'temp_best_weights.h5'

In [3]:
def run_experiment(num_train, max_evals=15, search_space=BCNN_SEARCH_SPACE):
    
    assert num_train in [3, 6, 9], 'Must use one of predefined train/test splits'
    
    train_sets = getattr(defaults, f'TRAIN_SETS_{num_train}')
    
    #datasets = [FaciesDataset(s, infer_test_wells(s), features=['image'],
    #                          image_args=IMAGE_ARGS) for s in train_sets[:2]]
    
    #for dset in datasets:
    #    dset.load_or_generate_data()
    
    def train_model(model_config):
        
        acc_scores, f1_scores, log_losses = [], [], []
        
        for i, train_set in enumerate(train_sets):
            dset = FaciesDataset(train_set, infer_test_wells(train_set), features=['image'],
                                image_args=IMAGE_ARGS)
        
            dset.load_or_generate_data()
            
            model = NetworkModel(dset, model_args=model_config)
            
            chkpt_callback = ModelCheckpoint(CHKPT_FNAME, monitor='val_loss', verbose=1,
                                        save_best_only=True, 
                                        save_weights_only=True)
            earlystop_callback = EarlyStopping(monitor='val_loss', patience=3)
            fit_args['callbacks'] = [chkpt_callback, earlystop_callback]
            
            print(f'Training with config: {model_config}')
            
            model.fit(dset, **fit_args)
            
            model.network.load_weights(CHKPT_FNAME)
            
            y_proba = model.predict_proba(dset.X_test)
            y_pred = np.argmax(y_proba, -1)
            
            acc_scores.append(accuracy_score(dset.y_test, y_pred))
            
            f1_scores.append(f1_score(dset.y_test, y_pred, average='macro'))
            
            log_losses.append(log_loss(dset.y_test, y_proba, 
                                      sample_weight=compute_sample_weight('balanced', dset.y_test)))
            
        return {'loss' : np.mean(log_losses),
                'acc_scores' : acc_scores,
                'f1_scores' : f1_scores,
                'log_losses' : log_losses,
                'status' : hyperopt.STATUS_OK}
    
    
    trials = Trials()
    
    best_params = hyperopt.fmin(
        fn=train_model,
        space=BCNN_SEARCH_SPACE,
        algo=hyperopt.rand.suggest,
        max_evals=max_evals,
        trials=trials
    )
    
    return trials, best_params

In [None]:
trails3, best_params3 = run_experiment(3, max_evals=3)

Loading Well:                                      
204-24a-7                                          
 from                                              
/home/ross/Dropbox/core_data/facies/train_data     
Feature shapes:                                    
[('depth', (19294,)), ('top', (19294,)), ('base', (19294,)), ('image', (19294, 32, 200, 3))]
Loading Well:                                      
204-24a-6                                          
 from                                              
/home/ross/Dropbox/core_data/facies/train_data     
Feature shapes:                                    
[('depth', (13006,)), ('top', (13006,)), ('base', (13006,)), ('image', (13006, 32, 200, 3))]
Loading Well:                                      
204-20-3                                           
 from                                              
/home/ross/Dropbox/core_data/facies/train_data     
Feature shapes:                                    
[('depth', (10906,)), ('top', (109




Loading Well:                                      
204-19-7                                           
 from                                              
/home/ross/Dropbox/core_data/facies/train_data     
Feature shapes:                                    
[('depth', (9807,)), ('top', (9807,)), ('base', (9807,)), ('image', (9807, 32, 200, 3))]
Loading Well:                                      
205-21b-3                                          
 from                                              
/home/ross/Dropbox/core_data/facies/train_data     
Feature shapes:                                    
[('depth', (3842,)), ('top', (3842,)), ('base', (3842,)), ('image', (3842, 32, 200, 3))]
Loading Well:                                      
204-20a-7                                          
 from                                              
/home/ross/Dropbox/core_data/facies/train_data     
Feature shapes:                                    
[('depth', (3105,)), ('top', (3105,)), ('b



W0718 15:21:31.241253 140291958511424 deprecation.py:506] From /home/ross/anaconda3/envs/core-dev/lib/python3.7/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor



Training with config: {'feature': 'image', 'network': 'bilinear_cnn', 'network_args': {'apply_rowwise': True, 'backbone_cnn': 'vgg16', 'conv1x1': 20, 'dropout_rate': 0.5, 'lstm_features': None}, 'optimizer_args': {}, 'sequence_size': 32, 'summary': False}
Shapes of `(batch_X, batch_y)`: (4, 1024, 200, 3), (4, 32, 5)
Epoch 1/20                                         
  0%|          | 0/3 [01:44<?, ?it/s, best loss: ?]

In [None]:
trails3.results

In [None]:
best_params3

In [None]:
trails3.best_trial

In [None]:
def pred_results_df(num_train, model_config):
    
    assert num_train in [3, 6, 9], 'Must use one of predefined train/test splits'
    
    train_sets = getattr(defaults, f'TRAIN_SETS_{num_train}')
    
    pred_dfs = []
    
    for i, train_set in enumerate(train_sets):
        dset = FaciesDataset(train_set, infer_test_wells(train_set), features=['pseudoGR'], 
                             pseudoGR_args={'features' : ['mean', 'var'], 'per_channel' : True})
        
        dset.load_or_generate_data()
        
        model = NetworkModel(dset, model_args=model_config)
        
        chkpt_callback = ModelCheckpoint(CHKPT_FNAME, monitor='val_loss', verbose=1,
                                        save_best_only=True, 
                                        save_weights_only=True)
        
        model.fit(dset, callbacks=[chkpt_callback], verbose=False, **fit_args)
        
        model.network.load_weights(CHKPT_FNAME)
        
        #print('predict_proba: ', model.predict_proba(dset.X_test))
        
        for test_well_name in dset.test_well_names:
            df = model.preds_dataframe(test_well_name)
            df['well'] = test_well_name
            df['set_number'] = i
            pred_dfs.append(df)
        
    return pd.concat(pred_dfs)

In [None]:
params = {
    'feature' : 'pseudoGR',
    'network' : 'wavenet',
    'summary' : False,
    'sequence_size' : 128,
    'network_args' : {
        'num_blocks' : 8,
        'block_filters' : 208,
        'residual_filters' : 40,
        'output_resolution' : 32,
        'dropout_rate' : 0.5,
    },
    'optimizer_args' : {
        'optimizer' : 'Adam'
    }
}

preds_df_3 = pred_results_df(3, params)
preds_df_3.to_csv('pgr_wavenet_results_train_sets3.csv')

In [None]:
accuracy_score(preds_df_3.y_pred.values, preds_df_3.y_true.values)