In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import KBinsDiscretizer, StandardScaler
from sklearn.pipeline import Pipeline
from tensorflow.keras import Sequential
from tensorflow.keras import layers, regularizers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import joblib
from transformers import Discretizer  # Import the Discretizer class

# Load datasets
experimental_data = pd.read_csv("EXPERIMENTAL_DATA_RENAMED.csv")
rashidi_data = pd.read_csv("Rashidi.csv")

# Define features and target
features = [
    "Solution Gas Oil Ratio, (SCF/STB)", 
    "Temperature, (F)", 
    "API, (-)", 
    "Bubble Point Pressure, (Psi)"
]
target = "Oil Formation Volume Factor, (bbl/STB)"

# Prepare datasets
experimental_data = experimental_data[features + ["Bo"]].rename(columns={"Bo": target}).dropna()
rashidi_data = rashidi_data[features + [target]].dropna()

# Combine datasets
combined_data = pd.concat([experimental_data, rashidi_data], ignore_index=True)

# Split features and target
X = combined_data[features].values
y = combined_data[target].values

# Define the pipeline
pipeline = Pipeline([
    ('discretizer', Discretizer(feature_indices=[0, 1, 3])),
    ('scaler', StandardScaler()),
])

# K-Fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Prepare a function to create the model
def create_model():
    model = Sequential([
        layers.Input(shape=(X.shape[1],)),
        layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(1e-6)),
        layers.Dense(137, activation='relu', kernel_regularizer=regularizers.l2(1e-6)),
        layers.Dense(163, activation='relu', kernel_regularizer=regularizers.l2(1e-6)),
        layers.Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error', metrics=['mae'])
    return model

# Train the model using K-Fold
for train_index, val_index in kf.split(X):
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    # Preprocess the data
    X_train_transformed = pipeline.fit_transform(X_train, y_train)
    X_val_transformed = pipeline.transform(X_val)

    # Create and train the model
    model = create_model()
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

    history = model.fit(
        X_train_transformed, y_train,
        validation_data=(X_val_transformed, y_val),
        epochs=1000,
        batch_size=32,
        callbacks=[early_stopping],
        verbose=1
    )

# Save the preprocessing pipeline
joblib.dump(pipeline, 'preprocessing_pipeline.pkl')

# Save the trained Keras model
model.save('trained_model.h5')

print("Pipeline and model saved successfully!")

Epoch 1/1000




[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step - loss: 1.3446 - mae: 1.0381 - val_loss: 0.2001 - val_mae: 0.3634
Epoch 2/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.1953 - mae: 0.3565 - val_loss: 0.1904 - val_mae: 0.3402
Epoch 3/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.1464 - mae: 0.2986 - val_loss: 0.1081 - val_mae: 0.2638
Epoch 4/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0914 - mae: 0.2398 - val_loss: 0.0679 - val_mae: 0.2092
Epoch 5/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0536 - mae: 0.1806 - val_loss: 0.0417 - val_mae: 0.1577
Epoch 6/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0338 - mae: 0.1342 - val_loss: 0.0231 - val_mae: 0.1117
Epoch 7/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0168



[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 19ms/step - loss: 1.0770 - mae: 0.9188 - val_loss: 0.1889 - val_mae: 0.3507
Epoch 2/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.1982 - mae: 0.3629 - val_loss: 0.1214 - val_mae: 0.2772
Epoch 3/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.1116 - mae: 0.2680 - val_loss: 0.0868 - val_mae: 0.2345
Epoch 4/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0795 - mae: 0.2226 - val_loss: 0.0527 - val_mae: 0.1670
Epoch 5/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0412 - mae: 0.1534 - val_loss: 0.0333 - val_mae: 0.1239
Epoch 6/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0270 - mae: 0.1076 - val_loss: 0.0244 - val_mae: 0.0796
Epoch 7/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0186



[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 20ms/step - loss: 1.0340 - mae: 0.8881 - val_loss: 0.1898 - val_mae: 0.3592
Epoch 2/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.1594 - mae: 0.3207 - val_loss: 0.1362 - val_mae: 0.2945
Epoch 3/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0982 - mae: 0.2500 - val_loss: 0.0976 - val_mae: 0.2375
Epoch 4/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0536 - mae: 0.1838 - val_loss: 0.0667 - val_mae: 0.1791
Epoch 5/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0398 - mae: 0.1441 - val_loss: 0.0428 - val_mae: 0.1222
Epoch 6/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0202 - mae: 0.0974 - val_loss: 0.0306 - val_mae: 0.0993
Epoch 7/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0156



[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 21ms/step - loss: 1.0838 - mae: 0.9135 - val_loss: 0.1761 - val_mae: 0.3385
Epoch 2/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.1862 - mae: 0.3472 - val_loss: 0.1607 - val_mae: 0.3264
Epoch 3/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.1148 - mae: 0.2749 - val_loss: 0.1060 - val_mae: 0.2595
Epoch 4/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0771 - mae: 0.2188 - val_loss: 0.0763 - val_mae: 0.2077
Epoch 5/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0584 - mae: 0.1830 - val_loss: 0.0567 - val_mae: 0.1742
Epoch 6/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0407 - mae: 0.1462 - val_loss: 0.0411 - val_mae: 0.1365
Epoch 7/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0299



[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 19ms/step - loss: 1.1654 - mae: 0.9539 - val_loss: 0.1784 - val_mae: 0.3543
Epoch 2/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.2096 - mae: 0.3813 - val_loss: 0.1183 - val_mae: 0.2786
Epoch 3/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.1181 - mae: 0.2779 - val_loss: 0.0682 - val_mae: 0.2146
Epoch 4/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0850 - mae: 0.2364 - val_loss: 0.0390 - val_mae: 0.1632
Epoch 5/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0479 - mae: 0.1720 - val_loss: 0.0166 - val_mae: 0.1031
Epoch 6/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0300 - mae: 0.1221 - val_loss: 0.0089 - val_mae: 0.0721
Epoch 7/1000
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0217



Pipeline and model saved successfully!
