In [1]:
import sys
sys.path.insert(0, '../')
from gw_tools import data_prep
from gw_tools.gw_cnn import gw_cnn

import numpy as np
import pandas as pd
import pickle

from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [2]:
prediction_path = '../data/prediction_pickles/'
score_path = '../data/prediction_pickles/'
## RMSE: 2.3402760903147106
params_AEK201 = {'model__BATCH_SIZE': 32,
                 'model__C1_FILTERS': 64,
                 'model__C1_KERNEL': 16,
                 'model__C1_LAYER': True,
                 'model__C2_FILTERS': 64,
                 'model__C2_KERNEL': 12,
                 'model__C2_LAYER': True,
                 'model__C3_FILTERS': 64,
                 'model__C3_KERNEL': 36,
                 'model__C3_LAYER': True,
                 'model__D_MAX_LAYERS': 8,
                 'model__D_MIN_UNITS': 3,
                 'model__D_TOP_UNITS': 150,
                 'model__D_UNIT_SCALE': 0.75,
                 'model__EPOCHS': 100,
                 'model__LEARNING_RATE': 0.001,
                 'model__STOP_DELTA': 0.1}

## RMSE: 2.45326918726607
params_AFL259 = {'model__BATCH_SIZE': 32,
                 'model__C1_FILTERS': 32,
                 'model__C1_KERNEL': 8,
                 'model__C1_LAYER': True,
                 'model__C2_FILTERS': 128,
                 'model__C2_KERNEL': 24,
                 'model__C2_LAYER': True,
                 'model__C3_FILTERS': 64,
                 'model__C3_KERNEL': 18,
                 'model__C3_LAYER': True,
                 'model__D_MAX_LAYERS': 8,
                 'model__D_MIN_UNITS': 3,
                 'model__D_TOP_UNITS': 150,
                 'model__D_UNIT_SCALE': 0.75,
                 'model__EPOCHS': 100,
                 'model__LEARNING_RATE': 0.001,
                 'model__STOP_DELTA': 0.1}

## RMSE: 1.9353950671555313
params_APK309 = {'model__BATCH_SIZE': 32,
                 'model__C1_FILTERS': 16,
                 'model__C1_KERNEL': 8,
                 'model__C1_LAYER': True,
                 'model__C2_FILTERS': 32,
                 'model__C2_KERNEL': 24,
                 'model__C2_LAYER': True,
                 'model__C3_FILTERS': 64,
                 'model__C3_KERNEL': 18,
                 'model__C3_LAYER': True,
                 'model__D_MAX_LAYERS': 8,
                 'model__D_MIN_UNITS': 3,
                 'model__D_TOP_UNITS': 150,
                 'model__D_UNIT_SCALE': 0.75,
                 'model__EPOCHS': 100,
                 'model__LEARNING_RATE': 0.001,
                 'model__STOP_DELTA': 0.1} 

## RMSE: 2.416546971184388
params_APK310 = {'model__BATCH_SIZE': 32,
                 'model__C1_FILTERS': 32,
                 'model__C1_KERNEL': 8,
                 'model__C1_LAYER': True,
                 'model__C2_FILTERS': 128,
                 'model__C2_KERNEL': 24,
                 'model__C2_LAYER': True,
                 'model__C3_FILTERS': 64,
                 'model__C3_KERNEL': 18,
                 'model__C3_LAYER': True,
                 'model__D_MAX_LAYERS': 8,
                 'model__D_MIN_UNITS': 3,
                 'model__D_TOP_UNITS': 150,
                 'model__D_UNIT_SCALE': 0.75,
                 'model__EPOCHS': 100,
                 'model__LEARNING_RATE': 0.001,
                 'model__STOP_DELTA': 0.1}

wells = ['AEK201', 'AFL259', 'APK309', 'APK310']
well_params = [params_AEK201, params_AFL259, params_APK309, params_APK310]

score_summary = pd.DataFrame({'Model':['Baseline', 'Linear Regression', 'CNN', 'RNN (fake)'],'RMSE':[0,0,0,0], 'MAE':[0,0,0,0]})

In [3]:
## Define a scaler
scaler =  StandardScaler(copy=True)

for well, params in zip(wells, well_params):
    ## Prepare the data for training
    df = data_prep.load_data(well)
    df = data_prep.select_features(df)
    df = data_prep.add_toy_signal(df)
    X_train, X_holdout, y_train, y_holdout, dt_train, dt_holdout = data_prep.prep_data_for_training(df)
    
    ## Get average for baseline prediction
    train_mean = y_train.mean()
    
    ## Train the linear regression model
    model_lr = LinearRegression(copy_X=True)
    model_lr.fit(X_train, y_train)
    
    ## Train the CNN
    model_cnn = gw_cnn()
    pipe_cnn = Pipeline([('scaler', scaler), ('model', model_cnn)])
    pipe_cnn.set_params(**params)
    pipe_cnn.fit(X_train, y_train)
    
    ## Prepare data for predictions and scoring
    well_data = data_prep.load_data(well)
    features = data_prep.load_data('FEATS')
    features = data_prep.select_features(features, no_target=True)
    features = features.loc[features.date >= data_prep.get_start_date(well)].copy()
    features = data_prep.add_toy_signal(features).dropna().copy()
    
    ## Keep track of the length of predictions
    len_pred = len(y_holdout)
    len_pred_full = features.shape[0]
    
    ## Make baseline predictions
    pred_avg = np.ones(len_pred)*train_mean
    pred_avg_full = np.ones(len_pred_full)*train_mean
    
    ## Make linear regression predictions
    pred_lin = model_lr.predict(X_holdout)
    pred_lin_full = model_lr.predict(features.drop('date', axis=1).values)
    
    ## Makce CNN predictions
    pred_cnn = pipe_cnn.predict(X_holdout)
    pred_cnn_full = pipe_cnn.predict(features.drop('date', axis=1).values)
    
    ## Gather the predictions and actual data into a single dataframe
    full_predict = well_data[['date', 'avg_well_depth']].merge(features[['date']], on='date', how='outer')
    full_predict['pred_baseline'] = pred_avg_full
    full_predict['pred_lin_reg'] = pred_lin_full
    full_predict['pred_cnn'] = pred_cnn_full
    
    ## Add a placeholder column for CNN predictions
    noise = np.random.normal(0,2,len_pred_full)
    full_predict['pred_rnn_fake'] = pred_cnn_full
    full_predict['pred_rnn_fake'] = full_predict['pred_rnn_fake'] + noise
    
    ## Isolate the prediction on the holdout set for scoring
    test_predict = full_predict.loc[full_predict['date']<=data_prep.get_end_date(well)][-365:].copy()
    
    ## Compute scores on the holdout set
    RMSE_baseline = np.sqrt(np.mean((test_predict.avg_well_depth.values - test_predict.pred_baseline)**2))
    MAE_baseline = np.mean(np.abs(test_predict.avg_well_depth.values - test_predict.pred_baseline))
    
    RMSE_lin_reg = np.sqrt(np.mean((test_predict.avg_well_depth.values - test_predict.pred_lin_reg)**2))
    MAE_lin_reg = np.mean(np.abs(test_predict.avg_well_depth.values - test_predict.pred_lin_reg))
    
    RMSE_cnn = np.sqrt(np.mean((test_predict.avg_well_depth.values - test_predict.pred_cnn)**2))
    MAE_cnn = np.mean(np.abs(test_predict.avg_well_depth.values - test_predict.pred_cnn))
    
    RMSE_rnn_fake = np.sqrt(np.mean((test_predict.avg_well_depth.values - test_predict.pred_rnn_fake)**2))
    MAE_rnn_fake = np.mean(np.abs(test_predict.avg_well_depth.values - test_predict.pred_rnn_fake))
    
    ## Update the scores in the dataframe
    score_summary.RMSE = [RMSE_baseline, RMSE_lin_reg, RMSE_cnn, RMSE_rnn_fake]
    score_summary.MAE = [MAE_baseline, MAE_lin_reg, MAE_cnn, MAE_rnn_fake]
    
    ## Save the results for this well in a dataframe
    prediction_filename = 'model_predictions_'+well+'.pkl'
    score_filename = 'model_scores_'+well+'.pkl'

    full_predict.to_pickle(prediction_path+prediction_filename)
    score_summary.to_pickle(score_path+score_filename)

2023-11-21 11:46:43.034801: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Max
2023-11-21 11:46:43.034818: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 64.00 GB
2023-11-21 11:46:43.034822: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 24.00 GB
2023-11-21 11:46:43.034862: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-11-21 11:46:43.034875: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/100


2023-11-21 11:46:43.494084: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
  1/206 [..............................] - ETA: 1s

2023-11-21 11:46:57.103138: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 1/100
  1/124 [..............................] - ETA: 46s - loss: 8335.2021

2023-11-21 11:46:58.079921: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100


2023-11-21 11:47:09.841479: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 1/100
  1/115 [..............................] - ETA: 42s - loss: 2230.7771

2023-11-21 11:47:10.831200: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
  1/197 [..............................] - ETA: 3s

2023-11-21 11:47:23.616292: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 1/100
  1/113 [..............................] - ETA: 39s - loss: 12991.8379

2023-11-21 11:47:24.534306: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
  1/197 [..............................] - ETA: 1s

2023-11-21 11:47:34.168098: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




In [4]:
pd.read_pickle('../data/prediction_pickles/model_scores_AEK201.pkl')

Unnamed: 0,Model,RMSE,MAE
0,Baseline,5.149668,4.179381
1,Linear Regression,7.535651,6.660118
2,CNN,7.290208,6.464932
3,RNN (fake),7.661599,6.728764


In [5]:
pd.read_pickle('../data/prediction_pickles/model_scores_AFL259.pkl')

Unnamed: 0,Model,RMSE,MAE
0,Baseline,7.941349,6.351202
1,Linear Regression,8.405366,6.231486
2,CNN,7.144537,5.755452
3,RNN (fake),7.543401,6.120652


In [6]:
pd.read_pickle('../data/prediction_pickles/model_scores_APK309.pkl')

Unnamed: 0,Model,RMSE,MAE
0,Baseline,3.535709,2.666648
1,Linear Regression,3.738059,2.991859
2,CNN,3.790616,3.160527
3,RNN (fake),4.433306,3.542046


In [7]:
pd.read_pickle('../data/prediction_pickles/model_scores_APK310.pkl')

Unnamed: 0,Model,RMSE,MAE
0,Baseline,5.060397,3.689585
1,Linear Regression,2.324117,1.571088
2,CNN,2.278128,1.747107
3,RNN (fake),3.06487,2.453561
