# IDB_drilling_monitoring_evaluation

## Import packages

In [None]:
import gc
import IPython
import librosa
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import scipy.io as scio
import soundfile
import time
from keras.layers import Activation, BatchNormalization, Concatenate, Conv1D, Conv2D, Dense, Dropout, Flatten
from keras.layers import GlobalAveragePooling1D, GlobalAveragePooling2D, Input, MaxPooling1D, MaxPooling2D
from keras.models import load_model, Model, Sequential
from keras.utils import to_categorical
from pyts.image import RecurrencePlot, MarkovTransitionField, GramianAngularField
from sklearn.metrics import confusion_matrix, mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tqdm import tqdm

!nvidia-smi
HOST_path = "/root/autodl-tmp"

SETS_path = os.path.join(HOST_path, "IDB_drilling_monitoring/datasets")
SAVE_path = os.path.join(HOST_path, "IDB_drilling_monitoring/evaluation")

if not os.path.exists(SAVE_path):
    os.makedirs(SAVE_path)

## Import datasets

In [None]:
X_set = np.load(os.path.join(DATA_path, "X_set.npy"))
X_set_cnn = np.load(os.path.join(DATA_path, "X_set_cnn.npy"))
Y_set = np.load(os.path.join(DATA_path, "Y_set.npy"))

print(X_set.shape)
print(X_set_cnn.shape)
print(Y_set.shape)

In [None]:
ALL7 = np.load(os.path.join(DATA_path, "ALL7.npy"))
ALL6 = np.load(os.path.join(DATA_path, "ALL6.npy"))
HEIGHT = np.load(os.path.join(DATA_path, "HEIGHT.npy"))
RPM = np.load(os.path.join(DATA_path, "RPM.npy"))
TORQUE = np.load(os.path.join(DATA_path, "TORQUE.npy"))
PUMPR = np.load(os.path.join(DATA_path, "PUMPR.npy"))
PUMPP = np.load(os.path.join(DATA_path, "PUMPP.npy"))
ROP = np.load(os.path.join(DATA_path, "ROP.npy"))
WOB = np.load(os.path.join(DATA_path, "WOB.npy"))

print(ALL7.shape)
print(ALL6.shape)
print(HEIGHT.shape)
print(RPM.shape)
print(TORQUE.shape)
print(PUMPR.shape)
print(PUMPP.shape)
print(ROP.shape)
print(WOB.shape)

### Evaluation

#### group

In [None]:
DATA = "ALL"

In [None]:
DATA_path = os.path.join(SETS_path, DATA)

if not os.path.exists(DATA_path):
    os.makedirs(DATA_path)

In [None]:
INPUT = X_set_cnn
OUTPUT = eval(DATA)

In [None]:
if OUTPUT.ndim == 1:
    num_targets = 1
else:
    num_targets = OUTPUT.shape[1]

x_train, x_test, y_train, y_test = train_test_split(INPUT, OUTPUT, test_size=0.25, random_state=42)

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
print(num_targets)

In [None]:
# np.save(os.path.join(DATA_path, "x_train.npy"), x_train)
np.save(os.path.join(DATA_path, "y_train.npy"), y_train)
# np.save(os.path.join(DATA_path, "x_test.npy"), x_test)
np.save(os.path.join(DATA_path, "y_test.npy"), y_test)

In [None]:
model = load_model(os.path.join(HOST_path, "IDB_drilling_monitoring/outputs_"+DATA+"/best_model.hdf5"))

In [None]:
y_train_pred = model.predict(x_train)
y_test_pred = model.predict(x_test)

print(y_train_pred.shape)
print(y_test_pred.shape)

In [None]:
np.save(os.path.join(DATA_path, "y_train_pred.npy"), y_train_pred)
np.save(os.path.join(DATA_path, "y_test_pred.npy"), y_test_pred)

In [None]:
print("MAE =", mean_absolute_error(y_test, y_test_pred))         # 残差绝对值
print("MSE =", mean_squared_error(y_test, y_test_pred))         # 通过平方放大了残差
print("RMSE=", np.sqrt(mean_squared_error(y_test, y_test_pred))) # 通过开方，还原被放大残差
print("R^2 =", r2_score(y_test, y_test_pred))                    # R2是相对比例值

In [None]:
if num_targets != 1:
    for i in range(num_targets):
        print("index: ", str(i))
        print("MAE =", mean_absolute_error(y_test[:,i], y_test_pred[:,i]))         
        print("MSE =", mean_squared_error(y_test[:,i], y_test_pred[:,i]))         
        print("RMSE=", np.sqrt(mean_squared_error(y_test[:,i], y_test_pred[:,i]))) 
        print("R^2 =", r2_score(y_test[:,i], y_test_pred[:,i])) 

#### Show and Save

In [None]:
hist = np.load((os.path.join(HOST_path, "IDB_drilling_monitoring/outputs_"+DATA+"/history.npy")), allow_pickle=True).item()

In [None]:
plt.plot(hist['loss'])
plt.plot(hist['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train set', 'Test set'], loc='upper right')
plt.savefig(os.path.join(DATA_path, "loss.png"), dpi=300, bbox_inches='tight', transparent=True)
plt.show()

In [None]:
def plot_train1(i):
    plt.figure(figsize=(5,5))
    plt.scatter(y_train[:,i], y_train_pred[:,i], s=20, edgecolor="black",c="darkorange", label="data")
    xlim = plt.xlim()
    ylim = plt.ylim()
    MIN = min(xlim[0], ylim[0])
    MAX = max(xlim[1], ylim[1])
    plt.xlim(MIN, MAX)
    plt.ylim(MIN, MAX)
    plt.savefig(os.path.join(DATA_path, "train_true_pred_"+str(i)), dpi=300, bbox_inches='tight', transparent=True)
    
def plot_train0():
    plt.figure(figsize=(5,5))
    plt.scatter(y_train, y_train_pred, s=20, edgecolor="black",c="darkorange", label="data")
    xlim = plt.xlim()
    ylim = plt.ylim()
    MIN = min(xlim[0], ylim[0])
    MAX = max(xlim[1], ylim[1])
    plt.xlim(MIN, MAX)
    plt.ylim(MIN, MAX)
    plt.savefig(os.path.join(DATA_path, "train_true_pred"), dpi=300, bbox_inches='tight', transparent=True)

In [None]:
if num_targets != 1:
    for i in range(num_targets):
        plot_train1(i)    
else:
    plot_train0()

In [None]:
def plot_test1(i):
    plt.figure(figsize=(5,5))
    plt.scatter(y_test[:,i], y_test_pred[:,i], s=20, edgecolor="black",c="darkorange", label="data")
    xlim = plt.xlim()
    ylim = plt.ylim()
    MIN = min(xlim[0], ylim[0])
    MAX = max(xlim[1], ylim[1])
    plt.xlim(MIN, MAX)
    plt.ylim(MIN, MAX)
    plt.savefig(os.path.join(DATA_path, "test_true_pred_"+str(i)), dpi=300, bbox_inches='tight', transparent=True)
    
def plot_test0():
    plt.figure(figsize=(5,5))
    plt.scatter(y_test, y_test_pred, s=20, edgecolor="black",c="darkorange", label="data")
    xlim = plt.xlim()
    ylim = plt.ylim()
    MIN = min(xlim[0], ylim[0])
    MAX = max(xlim[1], ylim[1])
    plt.xlim(MIN, MAX)
    plt.ylim(MIN, MAX)
    plt.savefig(os.path.join(DATA_path, "test_true_pred"), dpi=300, bbox_inches='tight', transparent=True)

In [None]:
if num_targets != 1:
    for i in range(num_targets):
        plot_test1(i)    
else:
    plot_test0()

In [None]:
def save_data(array):
    arr = eval(array)
    np.savetxt(os.path.join(DATA_path, array+'.csv'), arr, delimiter=', ', fmt='%f')

In [None]:
save_data("y_train")
save_data("y_train_pred")
save_data("y_test")
save_data("y_test_pred")