In [1]:
import keras

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
from keras.models import Model
from keras.layers import Input, LSTM, Dense
from keras.callbacks import ModelCheckpoint
from keras.utils import to_categorical
import numpy as np

In [3]:
import pandas as pd
df = pd.read_pickle('training_set.pkl')
df.head()

Unnamed: 0,idWlb,FM,S
0,82.0,82.0,95
1,82.0,82.0,95
2,82.0,82.0,95
3,82.0,82.0,95
4,82.0,82.0,95


In [4]:

wells = [well for well in df.idWlb.drop_duplicates()]
formations = [int(f) for f in df.FM.drop_duplicates()]
print(formations)
blind_wells = [1486, 107, 1140, 6678, 874, 82, 1411, 3558, 5916, 4244]
training_wells = np.setdiff1d(wells, blind_wells)

print(len(blind_wells))
print(len(training_wells))

list_of_wells = [df.loc[df.idWlb == well,['S','FM']] for well in training_wells]
list_of_wells[0].head()

to_formation_id = np.vectorize(lambda x: formations.index(x))

input_sequences = [np.expand_dims(df['S'].values.astype(dtype=np.int), axis=1) for df in list_of_wells]
output_sequences = [np.expand_dims(to_formation_id(df['FM'].values.astype(dtype=np.int)), axis=1) for df in list_of_wells]

[82, 63, 159, 118, 174, 46, 150, 83, 47]
10
43


In [11]:
batch_size = 1
time_step = 5
latent_dim=256

from keras.preprocessing.sequence import TimeseriesGenerator

training_set = []
for symbols, targets in zip(input_sequences, output_sequences):
    training_set.append(TimeseriesGenerator(symbols, targets,
                               length=time_step,
                               sampling_rate=1,
                               batch_size=batch_size))
print(len(training_set))

43


In [8]:
input = Input(batch_shape=(batch_size, time_step, 1))
lstm = LSTM(latent_dim,
            batch_input_shape=(batch_size, time_step, 1))(input)
output = Dense(1)(lstm)

In [9]:
model = Model(input, output)
model.compile(optimizer='rmsprop', loss='mse')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (1, 5, 1)                 0         
_________________________________________________________________
lstm_1 (LSTM)                (1, 256)                  264192    
_________________________________________________________________
dense_1 (Dense)              (1, 1)                    257       
Total params: 264,449
Trainable params: 264,449
Non-trainable params: 0
_________________________________________________________________


In [11]:
checkpoint = ModelCheckpoint('./1xLSTM_intervals.acc.hdf5', monitor='acc', verbose=1, save_best_only=True, mode='max')

num_iter = 1
history = []

for i in range(0, num_iter):
    print("iter", i)
    for w, well in enumerate(training_set):
        print("well", w)
        history.append(model.fit_generator(well,
                      epochs=1,
                      callbacks=[checkpoint]))

iter 0
well 0
Epoch 1/1
well 1
Epoch 1/1
   18/10756 [..............................] - ETA: 1:08 - loss: 10.9391



well 2
Epoch 1/1
well 3
Epoch 1/1
well 4
Epoch 1/1
well 5
Epoch 1/1
well 6
Epoch 1/1
well 7
Epoch 1/1
well 8
Epoch 1/1
well 9
Epoch 1/1
well 10
Epoch 1/1
well 11
Epoch 1/1
well 12
Epoch 1/1
well 13
Epoch 1/1
well 14
Epoch 1/1
well 15
Epoch 1/1
well 16
Epoch 1/1
well 17
Epoch 1/1
well 18
Epoch 1/1
well 19
Epoch 1/1
well 20
Epoch 1/1
well 21
Epoch 1/1
well 22
Epoch 1/1
well 23
Epoch 1/1
well 24
Epoch 1/1
well 25
Epoch 1/1
well 26
Epoch 1/1
well 27
Epoch 1/1
well 28
Epoch 1/1
well 29
Epoch 1/1
well 30
Epoch 1/1
well 31
Epoch 1/1
well 32
Epoch 1/1
well 33
Epoch 1/1
well 34
Epoch 1/1
well 35
Epoch 1/1
well 36
Epoch 1/1
well 37
Epoch 1/1
well 38
Epoch 1/1
well 39
Epoch 1/1
well 40
Epoch 1/1
well 41
Epoch 1/1
well 42
Epoch 1/1


In [7]:
# model.save('lstm_1iter_step5.hdf5')
model = keras.models.load_model('lstm_1iter_step5.hdf5')

In [8]:
list_of_blind_wells = [df.loc[df.idWlb == well,['S','FM']] for well in blind_wells]
list_of_blind_wells[0].head()


predict_input = [np.expand_dims(df['S'].values.astype(dtype=np.int), axis=1) for df in list_of_blind_wells]
predict_target = [np.expand_dims(to_formation_id(df['FM'].values.astype(dtype=np.int)), axis=1) for df in list_of_blind_wells]

In [12]:
from keras.preprocessing.sequence import TimeseriesGenerator
inference_set = []
for symbols, targets in zip(predict_input, predict_target):
    inference_set.append(TimeseriesGenerator(symbols, targets,
                               length=time_step,
                               sampling_rate=1,
                               batch_size=batch_size))

In [29]:
import math
results = []
for w, well_data in enumerate(inference_set):
    print(w, len(well_data))
    result = []
    for n in range(0, len(well_data)):
        symbols, _ = well_data[n]
        r = model.predict(symbols)[0][0]
        f = formations[math.floor(r+0.5)]
        result.append(f)   
    results.append(result)
    
# print(results)

0 6995
1 15572
2 5380
3 7050
4 10643
5 3483
6 11273
7 4265
8 13279
9 2934


In [30]:
print(blind_wells)

[1486, 107, 1140, 6678, 874, 82, 1411, 3558, 5916, 4244]


In [33]:
import gzip
import cloudpickle as pickle
with gzip.GzipFile('lstm_step5.gzip', 'w') as fs:
        pickle.dump(results,fs)
        pickle.dump(blind_wells,fs)
