In [3]:
#Import libs 
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
from tqdm import tqdm
import textwrap
from statistics import mean
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score as r2, mean_absolute_error as mae, mean_squared_error as mse, accuracy_score
from sklearn.metrics.pairwise import euclidean_distances
import random
import mlflow
import mlflow.sklearn
pd.set_option("display.precision", 3)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
pd.set_option('display.max_columns', 18)

## Loading all essensial data

In [5]:
#Loading, distribution wells per Platforms,  and modification of metadata.
metadata_init = pd.read_csv('ACG_wells_metadata.csv', sep=',')
metadata = metadata_init.copy()
metadata = metadata.rename(columns={'X':'X_wellhead', 'Y':'Y_wellhead'})
metadata.Status = metadata.Status.str.strip()
metadata.Status = metadata.Status.str.lower()
metadata.loc[metadata.Status == 'oil', 'Status' ] = 'production oil'
metadata.loc[metadata.Status == 'oil producer', 'Status' ] = 'production oil'
metadata.loc[metadata.Status == 'production', 'Status' ] = 'production oil'
metadata.loc[metadata.Status == 'produiction oil', 'Status' ] = 'production oil'
metadata.loc[metadata.Status == 'production_oil', 'Status' ] = 'production oil'
metadata.loc[metadata.Status == 'abandoned production oil', 'Status' ] = 'abandoned oil'
metadata.loc[metadata.Status == 'abandoned  oil', 'Status' ] = 'abandoned oil'
metadata.loc[metadata.Status == 'abandoned oi', 'Status' ] = 'abandoned oil'
metadata.loc[metadata.Status == 'injector  - water', 'Status' ] = 'injector - water'
metadata.loc[metadata.Status == 'injector water', 'Status' ] = 'injector - water'
metadata.loc[metadata.Status == 'injetor  - water', 'Status' ] = 'injector - water'
metadata.loc[metadata.Status == 'abandoned injector - water per b', 'Status' ] = 'abandoned injector - water'
metadata.loc[metadata.Status == 'plugged and abandoned', 'Status' ] = 'p&a'
metadata.loc[metadata.X_wellhead==118.270, 'X_wellhead'] = 526258.84
metadata.loc[metadata.Y_wellhead==526261.510, 'Y_wellhead'] = 4435802.01
metadata.loc[metadata.well=='C39', 'X_wellhead'] = 526258.840
metadata.loc[metadata.well=='C39', 'Y_wellhead'] = 4435802.010
metadata.loc[metadata.field=='West Azeri', 'field'] = 'WEST AZERI'
metadata.loc[metadata.field=='COP', 'field'] = 'WEST CHIRAG'
metadata.loc[metadata.well=='AZERI2', 'field'] = 'WEST AZERI'
metadata.loc[metadata.well=='AZERI3', 'field'] = 'WEST AZERI'
metadata.loc[metadata.well=='B31', 'field'] = 'CENTRAL AZERI'
metadata.loc[metadata.well=='J28_bpQIP', 'field'] = 'WEST CHIRAG'

In [None]:
#Read data from parquet
df_prq = pd.read_parquet('ACG_wells_JOINT_BEST_v6.parquet.gzip')
df_prq.rename(columns={'wellName':'well'}, inplace=True)
df_prq = df_prq.set_index('well').join(metadata.set_index('well')).reset_index()
#Assign any Fluidcode_mod number by variable gross_pay=1 and gross_pay=0 if Fluidcode_mod as NaN
df_prq.loc[df_prq.Fluidcode_mod>0, 'gross_pay'] = 1
df_prq.loc[df_prq.Fluidcode_mod<=0, 'gross_pay'] = 0
df_prq.gross_pay = df_prq.gross_pay.astype('int')

In [None]:
#Reading df_prq dataset
df_prq_gr = df_prq[['well','FORMATION','field','gross_pay','GR_N', 'PHIT','VSH','TST', 'TVD_SCS']]
#Put several filters to dataset
df_prq_gr8 = df_prq_gr[(df_prq_gr.FORMATION == 'Balakhany VIII sand') & (df_prq_gr.gross_pay == 1) &
                      (df_prq_gr.GR_N > 0) & (df_prq_gr.GR_N < 150) & (df_prq_gr.PHIT > 0.13) &
                      (df_prq_gr.PHIT < 0.3) & (df_prq_gr.VSH > 0) & (df_prq_gr.VSH < 0.5)]
fig = px.scatter(df_prq_gr8, x='VSH', y='PHIT', color='field')
fig.update_traces(opacity=0.5)
fig.update_layout(title = 'Phit=f(Vsh) Balakhany VIII sand net pay intervals', margin=dict(l=0, r=0, t=40, b=0))


In [None]:
#Display gr/vsh vs phit
df_prq_gr8_well = df_prq_gr[(df_prq_gr.FORMATION == 'Balakhany VIII sand') & (df_prq_gr.well == 'G07Z') &
                            (df_prq_gr.gross_pay == 1) &
                            (df_prq_gr.GR_N > 0) & (df_prq_gr.GR_N < 150) & 
                            (df_prq_gr.PHIT >= 0.13) & (df_prq_gr.PHIT < 0.3) & 
                            (df_prq_gr.VSH > 0) & (df_prq_gr.VSH <= 0.5)]
fig = px.scatter(df_prq_gr8_well, x='VSH', y='PHIT', color='TVD_SCS',  trendline="ols", width=700, height=400)
fig.update_layout(title = 'G07Z Balakhany VIII sand', margin=dict(l=0, r=0, t=40, b=0))
fig.show()

In [None]:
df_phit_vsh8 = df_prq_gr[(df_prq_gr.FORMATION == 'Balakhany VIII sand') &
                         (df_prq_gr.gross_pay == 1) &
                         (df_prq_gr.PHIT >= 0.13) & (df_prq_gr.PHIT < 0.3) & 
                         (df_prq_gr.VSH > 0) & (df_prq_gr.VSH <= 0.5)]
df_phit_vsh10 = df_prq_gr[(df_prq_gr.FORMATION == 'Balakhany X sand') &
                         (df_prq_gr.gross_pay == 1) &
                         (df_prq_gr.PHIT >= 0.13) & (df_prq_gr.PHIT < 0.3) & 
                         (df_prq_gr.VSH > 0) & (df_prq_gr.VSH <= 0.5)]
df_phit_vsh10

In [None]:
from sklearn.linear_model import LinearRegression
data = df_prq_gr8_well
X = df_prq_gr8_well.iloc[:]['VSH'].values.reshape(-1, 1)
Y = df_prq_gr8_well.iloc[:]['PHIT'].values.reshape(-1, 1) # -1 means that calculate the dimension of rows, but have 1 column
linear_regressor = LinearRegression()
linear_regressor.fit(X, Y)
Y_pred = linear_regressor.predict(X)
plt.scatter(X, Y)
plt.plot(X, Y_pred, color='red')
plt.show()

In [209]:
data_list8 = []
for wellname in df_phit_vsh8.well.unique()[:]:
    data = df_phit_vsh8[df_phit_vsh8.well == wellname]
    X = data.iloc[:]['VSH'].values.reshape(-1, 1)
    Y = data.iloc[:]['PHIT'].values.reshape(-1, 1)
    linear_regressor = LinearRegression()
    linear_regressor.fit(X, Y)
    data_list8.append((wellname, 
                      round(linear_regressor.coef_[0][0],3), 
                      round(linear_regressor.intercept_[0],3), 
                      round(linear_regressor.score(X, Y),2), 
                      X.shape[0]))
df_linreg_phit_vsh8 = pd.DataFrame(data_list8, columns=['well','coef','intercept','score','shape'])

In [210]:
data_list10 = []
for wellname in df_phit_vsh10.well.unique()[:]:
    data = df_phit_vsh10[df_phit_vsh10.well == wellname]
    X = data.iloc[:]['VSH'].values.reshape(-1, 1)
    Y = data.iloc[:]['PHIT'].values.reshape(-1, 1)
    linear_regressor = LinearRegression()
    linear_regressor.fit(X, Y)
    data_list10.append((wellname, 
                      round(linear_regressor.coef_[0][0],3), 
                      round(linear_regressor.intercept_[0],3), 
                      round(linear_regressor.score(X, Y),2), 
                      X.shape[0]))
df_linreg_phit_vsh10 = pd.DataFrame(data_list10, columns=['well','coef','intercept','score','shape'])

In [None]:
df_linreg_phit_vsh10.head(3)

In [212]:
# Prepating dataset with whole data
df_xy = df_prq[['well', 'FORMATION', 'X', 'Y', 'field']]
df_xy_fm = df_xy.groupby(['well','FORMATION'])[['X', 'Y', 'field']].apply(lambda x: x.iloc[0]).reset_index()
# Assigning numerical values insted text names
df_xy_fm.loc[df_xy_fm.field == 'DDGG', 'field_num'] = 1
df_xy_fm.loc[df_xy_fm.field == 'DWG', 'field_num'] = 2
df_xy_fm.loc[df_xy_fm.field == 'WEST CHIRAG', 'field_num'] = 3
df_xy_fm.loc[df_xy_fm.field == 'CHIRAG', 'field_num'] = 4
df_xy_fm.loc[df_xy_fm.field == 'WEST AZERI', 'field_num'] = 5
df_xy_fm.loc[df_xy_fm.field == 'CENTRAL AZERI', 'field_num'] = 6
df_xy_fm.loc[df_xy_fm.field == 'EAST AZERI', 'field_num'] = 7
# Separation data by formation
df_xy_fm_bal8 = df_xy_fm[df_xy_fm.FORMATION == 'Balakhany VIII sand']
df_xy_fm_bal10 = df_xy_fm[df_xy_fm.FORMATION == 'Balakhany X sand']
# Joing spatial info to linreg dataframe
df_linreg_phit_vsh8_xy = df_linreg_phit_vsh8.set_index('well').join(df_xy_fm_bal8.set_index('well')).reset_index()
df_linreg_phit_vsh10_xy = df_linreg_phit_vsh10.set_index('well').join(df_xy_fm_bal10.set_index('well')).reset_index()

In [None]:
fig = go.Figure()
fig.add_trace(go.Histogram(x=df_linreg_phit_vsh8_xy[df_linreg_phit_vsh8_xy.field_num <=4].score, 
                           xbins=dict(start=0, end=1, size=0.05), marker_color='#0043fa', name='Chirag'))
fig.add_trace(go.Histogram(x=df_linreg_phit_vsh8_xy[df_linreg_phit_vsh8_xy.field_num > 4].score, 
                           xbins=dict(start=0, end=1, size=0.05), marker_color='#c9bc02', name='Azeri'))
fig.update_traces(opacity=0.75)
fig.update_layout(title_text='Linear Regression Phit=f(Vsh) R2 Balakhany VIII sand',xaxis_title_text='R2', yaxis_title_text='Count',
                  autosize=True, width=1000, height=300, margin=dict(l=10,r=10,b=10,t=40))
fig.update_layout(barmode='overlay')
fig.update_xaxes(nticks=40, showgrid=True)

In [None]:
fig = go.Figure()
fig.add_trace(go.Histogram(x=df_linreg_phit_vsh10_xy[df_linreg_phit_vsh8_xy.field_num <=4].score, 
                           xbins=dict(start=0, end=1, size=0.05), marker_color='#00c2a5', name='Chirag'))
fig.add_trace(go.Histogram(x=df_linreg_phit_vsh10_xy[df_linreg_phit_vsh8_xy.field_num > 4].score, 
                           xbins=dict(start=0, end=1, size=0.05), marker_color='#fa8e00', name='Azeri'))
fig.update_traces(opacity=0.75)
fig.update_layout(title_text='Linear Regression Phit=f(Vsh) R2 Balakhany X sand',xaxis_title_text='R2', yaxis_title_text='Count',
                  autosize=True, width=1000, height=300, margin=dict(l=10,r=10,b=10,t=40))
fig.update_layout(barmode='overlay')
fig.update_xaxes(nticks=40, showgrid=True)

In [None]:
# Balakhany VIII sand
fig = go.Figure()
field_avg_coord = metadata.groupby('field')[['X_wellhead','Y_wellhead']].mean().reset_index()
df_linreg_phit_vsh8_xy_high = df_linreg_phit_vsh8_xy[df_linreg_phit_vsh8_xy.score>=0.6]
fig.add_trace(go.Scatter(x=df_linreg_phit_vsh8_xy.X, y=df_linreg_phit_vsh8_xy.Y, customdata = df_linreg_phit_vsh8_xy[['well', 'score','field']],
                         marker=dict(color=df_linreg_phit_vsh8_xy.score, size=12, 
                                     line=dict(color='rgb(47, 57, 61)', width=0.5), colorscale='Viridis', showscale=True),
                         mode='markers', name='R2 linreg Phit=f(Vsh)',
                         hovertemplate="".join(["well:%{customdata[0]}, score:%{customdata[1]}<extra></extra>"])))
fig.add_trace(go.Scatter(x=df_linreg_phit_vsh8_xy_high.X, 
                         y=df_linreg_phit_vsh8_xy_high.Y, 
                         customdata = df_linreg_phit_vsh8_xy_high[['well', 'score']],
                         marker=dict(color='rgba(147,112,219,0.1)', size=12, 
                                     line=dict(color='#fa0000', width=1.5)),
                         mode='markers', name='R2>=0.6 linreg Phit=f(Vsh)',
                         hovertemplate="".join(["well:%{customdata[0]}, score:%{customdata[1]}<extra></extra>"])))
fig.add_trace(go.Scatter(x=field_avg_coord.X_wellhead, y=field_avg_coord.Y_wellhead, customdata = field_avg_coord[['field']],
                         text=field_avg_coord['field'], textposition="middle right",
                         marker=dict(color='rgb(0, 0,0)', size=12),
                         mode='markers+text', name='Platforms', 
                         marker_symbol='square', hovertemplate="".join(["%{customdata[0]}<extra></extra>"])))

fig.update_layout(title_text='Phit=f(Vsh) R2 Balakhany VIII sand, color=f(R2)',
                  autosize=True, width=1200, height=700, margin=dict(l=10,r=10,b=10,t=40))
fig.update_layout(legend=dict( yanchor="top", y=1, xanchor="right", x=1, bgcolor='rgba(255,255,255,1)', bordercolor='Black',borderwidth=1))
fig.show()

In [None]:
# Balakhany X sand 
fig = go.Figure()
field_avg_coord = metadata.groupby('field')[['X_wellhead','Y_wellhead']].mean().reset_index()
df_linreg_phit_vsh10_xy_high = df_linreg_phit_vsh10_xy[df_linreg_phit_vsh10_xy.score>=0.6]
fig.add_trace(go.Scatter(x=df_linreg_phit_vsh10_xy.X, y=df_linreg_phit_vsh10_xy.Y, customdata = df_linreg_phit_vsh10_xy[['well', 'score','field']],
                         marker=dict(color=df_linreg_phit_vsh10_xy.score, size=12, 
                                     line=dict(color='rgb(47, 57, 61)', width=0.5), 
                                     colorscale='Viridis', showscale=True),
                         mode='markers', name='R2 linreg Phit=f(Vsh)',
                         hovertemplate="".join(["well:%{customdata[0]}, score:%{customdata[1]}<extra></extra>"])))
fig.add_trace(go.Scatter(x=df_linreg_phit_vsh10_xy_high.X, 
                         y=df_linreg_phit_vsh10_xy_high.Y, 
                         customdata = df_linreg_phit_vsh10_xy[['well', 'score']],
                         marker=dict(color='rgba(147,112,219,0.1)', size=12, 
                                     line=dict(color='#fa0000', width=1.5)),
                         mode='markers', name='R2>=0.6 linreg Phit=f(Vsh)',
                         hovertemplate="".join(["well:%{customdata[0]}, score:%{customdata[1]}<extra></extra>"])))
fig.add_trace(go.Scatter(x=field_avg_coord.X_wellhead, y=field_avg_coord.Y_wellhead, customdata = field_avg_coord[['field']],
                         text=field_avg_coord['field'], textposition="middle right",
                         marker=dict(color='rgb(0, 0,0)', size=12),
                         mode='markers+text', name='Platforms', 
                         marker_symbol='square', hovertemplate="".join(["%{customdata[0]}<extra></extra>"])))
fig.update_traces(opacity=0.9)
fig.update_layout(title_text='Phit=f(Vsh) R2 Balakhany X sand, color=f(R2)',
                  autosize=True, width=1200, height=700, margin=dict(l=10,r=10,b=10,t=40))
fig.update_layout(legend=dict( yanchor="top", y=1, xanchor="right", x=1, bgcolor='rgba(255,255,255,1)', bordercolor='Black',borderwidth=1))
fig.show()