In [None]:
#@title ##Install Packages and Files
#@markdown Write short explanation
%%capture
%%bash
git clone https://github.com/thanktua-spp/welltest.git

cd /content/welltest/ #switch to welltest module
pip install -r requirements.txt # install the requirements file
pip install gradio

In [None]:
#@title ##Input the data
@markdown Write short explanation
import pandas as pd
from welltest.utility import data_processing, standardize_column_names
#from google.colab import files
#uploaded_files = files.upload()
from google.colab import files
uploaded = files.upload()

import io
file_name = '/content/Well 4SS x Inhouse Data.xlsx'

def get_data(data_path):
    """_summary_
    """
    return pd.read_excel(data_path, header=1)

welltest_df = get_data(file_name) #ok 'welltest/6LS Well Inhouse Data.xlsx'
welltest_df

def standardize_column_names(df):
    preferred_columns = [
    'Data Note', 'Well Name', 'Date', 'Time', 'Choke', 'FTHP', 'FTHT', 'FLP', 'Tsep', 'Psep',
    'Pmani', 'Meter Totalizer(Bbls)', 'Meter Factor', 'LiqRate', '%Water', '%Sediment',
    'BS&W', 'OilRate', 'DOF Plate size(inch)', 'GasDP(InchH20)', 'GasRate', 'GOR',
    'Sand(pptb)', 'Oil gravity (API)'
    ]

    existing_columns = df.columns.tolist()

    column_mapping = {}
    for i, existing_column in enumerate(existing_columns):
        if existing_column in preferred_columns:
            column_mapping[existing_column] = preferred_columns[i]
        else:
            column_mapping[existing_column] = None

    df.rename(columns=column_mapping, inplace=True)
    df = df[preferred_columns]

    return df

welltest_df = standardize_column_names(welltest_df)
welltest_df

In [None]:
#@title ##Perform Data Preprocessing on the Input data
#@markdown Here a grid of time series plots for the given 'welltest' inputs and output is created

model_columns = ['ds','Choke', 'FTHP', 'FLP','BS&W', 'OilRate']
def data_processing(welltest_df):
    """_summary_
    """
    welltest_df = standardize_column_names(welltest_df)
    welltest_df = standardize_column_names(welltest_df)
    welltest_drop_nan = welltest_df.dropna(axis=1).dropna(axis=0)
    X_temp = welltest_drop_nan.drop(['Well Name', 'Date', 'Time'], axis=1)
    X_temp = X_temp.apply(pd.to_numeric, errors='coerce')
    dropped_indices = X_temp[X_temp.isna().any(axis=1)].index
    X_temp = X_temp.dropna(axis=0)
    date = welltest_df['Date'].drop(dropped_indices).to_frame(name='ds')
    X = pd.concat([date, X_temp], axis=1)
    selected_columns = X.select_dtypes(include=['object']).columns
    if len(selected_columns) > 0:
        X_selected = X.astype(float)
        X_dropped = X.drop(selected_columns, axis=1)
        X = pd.concat([X_dropped, X_selected], axis=1)
    assert X.isna().sum().sum() == 0
    return X[model_columns]

welltest_new = data_processing(welltest_df)
welltest_new['ds'] = pd.to_datetime(welltest_new['ds'])
welltest_new

In [None]:
#@title ##Visualise the Input data
#@markdown Plot of the different input features `'ds','Choke', 'FTHP', 'FLP','BS&W'`

import matplotlib.pyplot as plt
import seaborn as sns

welltest_new.plot(x='ds', subplots=True, figsize=(15,10))
plt.show()

# import seaborn as sns
# import pandas as pd
# import matplotlib.pyplot as plt

# features = ['Choke', 'FTHP', 'FTHT', 'FLP','BS&W']

# def visualize_data(df):
#     # Assuming your DataFrame is named 'df'
#     # Convert 'ds' column to datetime if it's not already in datetime format
#     df['ds'] = pd.to_datetime(df['ds'])

#     # Melt the DataFrame to long format to use the 'col' parameter
#     df_melted = df.melt(id_vars='ds', var_name='column', value_name='value')

#     # Use relplot with 'col' parameter to create a grid of time series plots
#     g = sns.relplot(data=df_selected, x='ds', y='value', hue='column',
#                     kind='line', col='column', col_wrap=3, height=3, aspect=2)

#     # Set the x-axis label for all subplots (optional)
#     for ax in g.axes.flat:
#         ax.set_xlabel('Date')

#     # Show the plots
#     plt.show()

# visualize_data(welltest_new)

In [None]:
sns.pairplot(welltest_new, diag_kind='kde')
plt.show()

In [None]:
#@title #Split data and perform model development
#@markdown We use the following as model input `'Choke', 'FTHP', 'FTHT', 'FLP', 'Tsep', 'Psep', 'Pmani','BS&W'`
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_squared_error
from prophet import Prophet

drop_col = ['ds', 'y']
target='y'

# Prepare the data for Prophet model
def prepare_prophet_data(data):
    prophet_data = data.rename(columns={'OilRate': 'y'})
    return prophet_data

# Train the models
def train_models(data):
    X_train = data.drop(drop_col, axis=1)#[features]
    y_train = data[target]
    # print('train data',X_train.columns)
    # Train Decision Tree model
    decision_tree_model = DecisionTreeRegressor(random_state=42)
    decision_tree_model.fit(X_train, y_train)

    # Train Random Forest model
    rf_model = RandomForestRegressor(random_state=42)
    rf_model.fit(X_train, y_train)

    # Train XGBoost model
    xgb_model = XGBRegressor(random_state=42)
    xgb_model.fit(X_train, y_train)

    # Train Prophet model
    prophet_model = Prophet()
    prophet_model.fit(data)

    return decision_tree_model, rf_model, xgb_model, prophet_model

# Make predictions using each model
def make_predictions(models, data):
    X_test = data.drop(drop_col, axis=1)#[features]
    # print('test data', X_test.columns)
    svr_model, rf_model, xgb_model, prophet_model = models

    svr_preds = svr_model.predict(X_test)
    rf_preds = rf_model.predict(X_test)
    xgb_preds = xgb_model.predict(X_test)

    prophet_preds = prophet_model.predict(data)['yhat']

    return svr_preds, rf_preds, xgb_preds, prophet_preds

# Calculate RMSE and R2 for each model
def calculate_metrics(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    return rmse, r2

# Generate dummy data
data = welltest_new.rename(columns={'OilRate': 'y'})
#data.sort_values(by='ds', inplace=True)

print(data.head())
# Prepare data for the Prophet model
data = prepare_prophet_data(data)

# Split the data into training and test sets
train_data, test_data = train_test_split(data, test_size=0.2, shuffle=True)

# Train the models
models = train_models(train_data)

In [None]:
test_data

In [None]:
#@title #Show train Model Results

import pandas as pd
import numpy as np

# reshuffle data
train_data.sort_values(by='ds', inplace=True)
test_data.sort_values(by='ds', inplace=True)

# Make predictions using each model on the train set
decision_tree_train_preds, rf_train_preds, xgb_train_preds, prophet_train_preds = make_predictions(models, train_data)

# Make predictions using each model on the test set
decision_tree_test_preds, rf_test_preds, xgb_test_preds, prophet_test_preds = make_predictions(models, test_data)

# Calculate RMSE and R2 for train and test data
decision_tree_rmse_train, decision_tree_r2_train = calculate_metrics(train_data['y'], decision_tree_train_preds)
decision_tree_rmse_test, decision_tree_r2_test = calculate_metrics(test_data['y'], decision_tree_test_preds)

rf_rmse_train, rf_r2_train = calculate_metrics(train_data['y'], rf_train_preds)
rf_rmse_test, rf_r2_test = calculate_metrics(test_data['y'], rf_test_preds)

xgb_rmse_train, xgb_r2_train = calculate_metrics(train_data['y'], xgb_train_preds)
xgb_rmse_test, xgb_r2_test = calculate_metrics(test_data['y'], xgb_test_preds)

# prophet_rmse_train, prophet_r2_train = calculate_metrics(train_data['y'], prophet_train_preds)
# prophet_rmse_test, prophet_r2_test = calculate_metrics(test_data['y'], prophet_test_preds)


# Results for train data
train_results = {
    'Model': ['Decision Tree', 'Random Forest', 'XGBoost', #'Prophet'
    ],
    'Train RMSE': [decision_tree_rmse_train, rf_rmse_train, xgb_rmse_train,
                   #prophet_rmse_train
                   ],
    'Train R2': [decision_tree_r2_train, rf_r2_train, xgb_r2_train,
                 #prophet_r2_train
                 ]
}
# Create DataFrames
train_results_df = pd.DataFrame(train_results)

train_results_df

In [None]:
#@title #Show test Model Results

# Results for test data
test_results = {
    'Model': ['Decision Tree', 'Random Forest', 'XGBoost',
              #'Prophet'
              ],
    'Test RMSE': [decision_tree_rmse_test, rf_rmse_test, xgb_rmse_test,
                  #prophet_rmse_test
                  ],
    'Test R2': [decision_tree_r2_test, rf_r2_test, xgb_r2_test,
                #prophet_r2_test
                ]
}

test_results_df = pd.DataFrame(test_results)

print("\nTest Data Results:")
test_results_df

In [None]:
train_data

In [None]:
test_data

In [None]:
# @title #Visualize the prediction result in Line Plots
import matplotlib.pyplot as plt

# Create a figure and an array of subplots
fig, axes = plt.subplots(2, 3, figsize=(18, 12))  # 2 rows, 3 columns
fig.suptitle('Model Predictions and Metrics', fontsize=16)

# Data for each model's metrics
models_metrics = {
    'Decision Tree': {'Train RMSE': decision_tree_rmse_train, 'Train R2': decision_tree_r2_train,
                      'Test RMSE': decision_tree_rmse_test, 'Test R2': decision_tree_r2_test},
    'Random Forest': {'Train RMSE': rf_rmse_train, 'Train R2': rf_r2_train,
                      'Test RMSE': rf_rmse_test, 'Test R2': rf_r2_test},
    'XGBoost': {'Train RMSE': xgb_rmse_train, 'Train R2': xgb_r2_train,
                'Test RMSE': xgb_rmse_test, 'Test R2': xgb_r2_test},
    # 'Prophet Model': {'Train RMSE': prophet_rmse_train, 'Train R2': prophet_r2_train,
    #                   'Test RMSE': prophet_rmse_test, 'Test R2': prophet_r2_test}
}

# Plot each model's predictions and metrics for train data
for idx, model_name in enumerate(models_metrics):
    row_idx, col_idx = idx // 3, idx % 3
    ax = axes[row_idx, col_idx]

    # Plot predictions
    ax.plot(train_data['ds'], train_data['y'], label='Train Actual', color='blue')

    # Get predictions for the model
    train_preds = None
    if model_name == 'Decision Tree':
        train_preds = decision_tree_train_preds
    elif model_name == 'Random Forest':
        train_preds = rf_train_preds
    elif model_name == 'XGBoost':
        train_preds = xgb_train_preds
    # elif model_name == 'Prophet Model':
    #     train_preds = prophet_train_preds

    ax.plot(train_data['ds'], train_preds, label=f'{model_name} Train', color='brown')

    # Set axis labels and title for train data
    ax.set_xlabel('Date')
    ax.set_ylabel('Target Value')
    ax.set_title(f'{model_name} Train Predictions vs. Actual\nTrain RMSE: {models_metrics[model_name]["Train RMSE"]:.2f}, Train R2: {models_metrics[model_name]["Train R2"]:.2f}')
    ax.legend()

# Plot each model's predictions and metrics for test data
for idx, model_name in enumerate(models_metrics):
    row_idx, col_idx = idx // 3, idx % 3
    ax = axes[row_idx + 1, col_idx]  # Move to the second row

    # Plot predictions
    ax.plot(test_data['ds'], test_data['y'], label='Test Actual', color='blue', linestyle='--')

    # Get predictions for the model
    test_preds = None
    if model_name == 'Decision Tree':
        test_preds = decision_tree_test_preds
    elif model_name == 'Random Forest':
        test_preds = rf_test_preds
    elif model_name == 'XGBoost':
        test_preds = xgb_test_preds
    # elif model_name == 'Prophet Model':
    #     test_preds = prophet_test_preds

    ax.plot(test_data['ds'], test_preds, label=f'{model_name} Test', color='brown', linestyle='--')

    # Set axis labels and title for test data
    ax.set_xlabel('Date')
    ax.set_ylabel('Target Value')
    ax.set_title(f'{model_name} Test Predictions vs. Actual\nTest RMSE: {models_metrics[model_name]["Test RMSE"]:.2f}, Test R2: {models_metrics[model_name]["Test R2"]:.2f}')
    ax.legend()

# Remove empty subplot in the second row if 'Prophet Model' is not used
if len(models_metrics) < 3:
    fig.delaxes(axes[1, 2])

plt.tight_layout()
plt.show()


In [None]:
# @title #Visualize the prediction result in Linear Plots
import matplotlib.pyplot as plt

# Create a figure and an array of subplots
fig, axes = plt.subplots(1, 3, figsize=(18, 6))  # 1 row, 3 columns
fig.suptitle('True Values vs Predictions', fontsize=16)

# Data for each subplot
subplots_data = [
    (decision_tree_train_preds, 'Decision Tree'),
    (rf_train_preds, 'Random Forest'),
    (xgb_train_preds, 'XGBoost'),
    #(prophet_train_preds, 'Prophet')
]

# Set common limit for x and y axes
lims = [0, 2500]

# Plot each subplot with its own data
for idx, (predictions, model_name) in enumerate(subplots_data):
    ax = axes[idx]
    ax.scatter(train_data['y'], predictions, label=model_name)
    ax.set_xlabel('True Values')
    ax.set_ylabel('Predictions')
    ax.set_xlim(lims)
    ax.set_ylim(lims)
    ax.plot(lims, lims, '--', color='gray', label='Perfect Prediction')
    ax.legend(loc='upper left')
    ax.set_aspect('equal')

plt.tight_layout()
plt.show()


In [None]:
# @title #Visualize Decision tree model

from sklearn.tree import export_graphviz
decision_tree_model, rf_train_model, xgb_train_model, prophet_model = models

# export the decision tree model to a tree_structure.dot file
# paste the contents of the file to webgraphviz.com
dot_data = export_graphviz(decision_tree_model, out_file =None,
               feature_names =['Choke', 'FTHP', 'FLP', 'BS&W'], filled=True)

import graphviz
graph = graphviz.Source(dot_data, format='png')
graph

In [None]:
#@title #Deploy and Test Models in Production

import warnings
warnings.filterwarnings("ignore")


import gradio as gr
import random
def predict_liquid_rate(*input):
    input_list = list(input)
    inp_arr = np.array(input_list[:-1]).reshape(1, -1)
    decision_tree_model, rf_train_model, xgb_train_model, prophet_model = models
    model_selection = input_list[-1]
    print(model_selection)

    result = {}
    model_names = {
        'XGBoost': 'XGBoost Oil Rate',
        'Random Forest': 'Random Forest Oil Rate',
        'Decision Tree': 'Decision Tree Oil Rate',
        'Prophet Model': 'Prophet Model Oil Rate'
    }
    xg_output = ''
    dt_output = ''
    rf_output = ''
    for choice in model_selection:
        if choice == 'XGBoost':
            xg_pred = xgb_train_model.predict(inp_arr)
            result[choice] = xg_pred[0]
            xg_output = f"{model_names['XGBoost']}: {result['XGBoost']:.2f} Bbls/day"
        elif choice == 'Decision Tree':
            dt_pred = decision_tree_model.predict(inp_arr)
            result[choice] = dt_pred[0]
            dt_output = f"{model_names['Decision Tree']}: {result['Decision Tree']:.2f} Bbls/day"
        elif choice == 'Random Forest':
            rf_pred = rf_train_model.predict(inp_arr)
            result[choice] = rf_pred[0]
            rf_output = f"{model_names['Random Forest']}: {result['Random Forest']:.2f} Bbls/day"

    return xg_output, dt_output, rf_output

with gr.Blocks() as demo:
    gr.Markdown(
    """
    # Oil Rate Prediction
    Use this table as Reference for Last Well test data.
    """)
    with gr.Column():
        with gr.Box():
            frame_output = gr.Dataframe(
                value=[['2022-12-23', 32, 1000, 280, 0.45,  775.12]],
                headers=['Date', 'Choke', 'FTHP', 'FLP', 'BS&W', 'OilRate'],
                datatype=["str", "number", "number", "number",  "number", "number"],
                )

        gr.Markdown(
        """    Use the different input slider to select new welltest information
        """)
        with gr.Box():
            choke = gr.Slider(minimum=0, maximum=100, value=32, step=2, label="Choke Size (1/64\")", interactive=True)
            fthp = gr.Slider(minimum=500, maximum=5000, step=1, value=1000, label="Tubing Head Pressure (FTHP)(psi)", interactive=True)
            flp = gr.Slider(minimum=0, maximum=5000, step=1, value=280,label="Flow Line Pressure (FLP)(psi)", interactive=True)
            bsw = gr.Slider(minimum=0, maximum=100, value=0.45, label="Basic Sediment and Water (BS&W)(%)", interactive=True)

        gr.Markdown(
        """    Use the different trained models to perform Oil rate prediction
        """)
        # Output Controls
        with gr.Column():
            select_model = gr.CheckboxGroup(choices=["Random Forest", "XGBoost", "Decision Tree"], value='XGBoost', label="Select Model", info="Select Model to make prediction", interactive=True)
            btn_predict = gr.Button("Test Prediction")
            xg_output = gr.Label(label="XGBoost model")
            dt_output = gr.Label(label="Decision Tree")
            rf_output = gr.Label(label="Random Forest")


    input_items = [choke, fthp, flp, bsw, select_model]
    btn_predict.click(fn=predict_liquid_rate, inputs=input_items, outputs=[xg_output,dt_output,rf_output])
    #gr.describe()
demo.launch(debug=True, share=True)