# Libs

In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random
from scipy.stats.distributions import norm, uniform, chi2
from pyDOE import lhs
from sklearn.metrics import r2_score
import plotly.graph_objects as go
import plotly.io as pio
from tqdm import tqdm
from sklearn.metrics import mean_absolute_error
import psutil

pd.set_option.display_precision = 3
pd.set_option.display_float_format = lambda x: '%.5f' % x
pd.set_option.display_max_columns = 15
pd.set_option.display_max_rows = 6

In [None]:
vm = psutil.virtual_memory()
free_memory = vm.available
free_memory_mb = free_memory / (1024 * 1024)
print(f"Free memory: {free_memory_mb} MB")

In [3]:
def metadata_parquet_loading():
    path = 'C:\\jupyter\\SPP\\input\\'
    metadata_init = pd.read_csv(path + 'ACG_wells_metadata.csv', sep=',')
    metadata = metadata_init.copy()
    metadata = metadata.rename(columns={'X':'X_wellhead', 'Y':'Y_wellhead'})
    metadata.Status = metadata.Status.str.strip()
    metadata.Status = metadata.Status.str.lower()
    metadata.loc[metadata.Status == 'oil', 'Status' ] = 'production oil'
    metadata.loc[metadata.Status == 'oil producer', 'Status' ] = 'production oil'
    metadata.loc[metadata.Status == 'production', 'Status' ] = 'production oil'
    metadata.loc[metadata.Status == 'produiction oil', 'Status' ] = 'production oil'
    metadata.loc[metadata.Status == 'production_oil', 'Status' ] = 'production oil'
    metadata.loc[metadata.Status == 'abandoned production oil', 'Status' ] = 'abandoned oil'
    metadata.loc[metadata.Status == 'abandoned  oil', 'Status' ] = 'abandoned oil'
    metadata.loc[metadata.Status == 'abandoned oi', 'Status' ] = 'abandoned oil'
    metadata.loc[metadata.Status == 'injector  - water', 'Status' ] = 'injector - water'
    metadata.loc[metadata.Status == 'injector water', 'Status' ] = 'injector - water'
    metadata.loc[metadata.Status == 'injetor  - water', 'Status' ] = 'injector - water'
    metadata.loc[metadata.Status == 'abandoned injector - water per b', 'Status' ] = 'abandoned injector - water'
    metadata.loc[metadata.Status == 'plugged and abandoned', 'Status' ] = 'p&a'
    metadata.loc[metadata.X_wellhead==118.270, 'X_wellhead'] = 526258.84
    metadata.loc[metadata.Y_wellhead==526261.510, 'Y_wellhead'] = 4435802.01
    metadata.loc[metadata.well=='C39', 'X_wellhead'] = 526258.840
    metadata.loc[metadata.well=='C39', 'Y_wellhead'] = 4435802.010
    metadata.loc[metadata.field=='West Azeri', 'field'] = 'WEST AZERI'
    metadata.loc[metadata.field=='COP', 'field'] = 'WEST CHIRAG'
    metadata.loc[metadata.well=='AZERI2', 'field'] = 'WEST AZERI'
    metadata.loc[metadata.well=='AZERI3', 'field'] = 'WEST AZERI'
    metadata.loc[metadata.well=='B31', 'field'] = 'CENTRAL AZERI'
    metadata.loc[metadata.well=='J28_bpQIP', 'field'] = 'WEST CHIRAG'

    #Read data from parquet
    path = 'C:\\jupyter\\SPP\\input\\'
    df_prq = pd.read_parquet(path + 'ACG_wells_JOINT_BEST_v10.parquet.gzip')
    df_prq.rename(columns={'wellName':'well'}, inplace=True)
    df_prq = df_prq.set_index('well').join(metadata.set_index('well')).reset_index()
    # Filter data with bad_well_list 
    bad_well_list = ['E10Z','Predrill_J01Z', 'Predrill_J08', 'J28_bpQIP', 'A01W_2']
    df_prq = df_prq[~df_prq.well.isin(bad_well_list)]
    #Assign any Fluidcode_mod number by variable gross_pay=1 and gross_pay=0 if Fluidcode_mod as NaN
    df_prq.loc[df_prq.FLUIDS>0, 'FLUIDS_int'] = 1
    df_prq.loc[df_prq.FLUIDS<=0, 'FLUIDS_int'] = 0
    df_prq.FLUIDS_int = df_prq.FLUIDS_int.astype('int')
    # Unite of FU for each formation
    df_bal = df_prq[df_prq.FORMATION.str.contains('Balakhany')]
    df_bal.loc[df_bal.FORMATION.str.contains('Balakhany VIII'), 'FORMATION_up'] = 'Balakhany VIII'
    df_bal.loc[df_bal.FORMATION.str.contains('Balakhany X'), 'FORMATION_up'] = 'Balakhany X'
    df_bal = df_bal[df_bal.FORMATION_up.notna()]
    #Getting XY mean coords of Balakhany formation
    xy_coord_mean = df_bal[['well', 'FORMATION_up', 'X', 'Y']]
    xy_coord_mean = xy_coord_mean.groupby(['well', 'FORMATION_up']).agg({'X': 'mean', 'Y':'mean'}).reset_index()
    xy_coord_mean = xy_coord_mean.rename(columns={'X':'X_mean', 'Y':'Y_mean'})
    xy_coord_mean = xy_coord_mean[xy_coord_mean.FORMATION_up.str.contains('Balakhany') & (xy_coord_mean.X_mean>0) & (xy_coord_mean.Y_mean>0)]
    df_bal.rename(columns={'X':'X_traj', 'Y':'Y_traj'}, inplace=True)
    df_bal = df_bal.set_index(['well', 'FORMATION_up']).join(xy_coord_mean.set_index(['well', 'FORMATION_up'])).reset_index()
    return df_bal
# df_bal = metadata_parquet_loading()

In [3]:
logs8 = pd.read_csv(r'C:\jupyter\SPP\inputoutput\general_logs\df_bal8_azr_v4.csv')
logs8 = logs8[logs8.phit_flag == 1]
logs10 = pd.read_csv(r'C:\jupyter\SPP\inputoutput\general_logs\df_bal10_vshclp2_v4.csv')
logs10 = logs10[logs10.phit_flag == 1]
logs = pd.concat([logs8, logs10], axis=0)

# Uncertainty calc vsh & phit Bal8

In a normal distribution, if you know the values of P10 (the 10th percentile) and P90 (the 90th percentile), you can estimate the mean $\mu$ and standard deviation $\sigma$ of the distribution. 

For a normal distribution, approximately 68% of the data falls within one standard deviation of the mean, 95% within two standard deviations, and 99.7% within three standard deviations.

1. Calculate the z-scores corresponding to the percentiles P10 and P90. For a normal distribution, these z-scores are approximately -1.28 and 1.28 respectively.

2. Use the z-scores to calculate the standard deviation $\sigma$:

$\sigma = \frac{{P90 - P10}}{2.56}$

3. Now that you have the standard deviation, you can calculate the mean ($\mu$) using either P10 or P90:

$\mu = \frac{{P10 + P90}}{2}$

Once you have calculated $\mu$ and $\sigma$, you have fully defined the normal distribution. The P50 value (the median) will be equal to the mean $\mu$.

This method assumes a symmetric normal distribution, which is often a reasonable assumption but may not hold in all cases.

In [None]:
def vsh_uncertainty(ds_logs, vsh_cutoff_p10, vsh_cutoff_p90, iteration):
    vsh_cutoff_mean = (vsh_cutoff_p10 + vsh_cutoff_p90)/2
    vsh_cutoff_sigma = abs((vsh_cutoff_p90 - vsh_cutoff_p10)/2.56)
    # vsh_cutoff_random = np.random.normal(vsh_cutoff_p50, vsh_sigma, iteration)
    vsh_cutoff_random = lhs(1, samples=iteration)
    vsh_cutoff_random = norm(loc=vsh_cutoff_mean, scale=vsh_cutoff_sigma).ppf(vsh_cutoff_random).flatten()
    # logs_uncert = logs8[['well','TST','VSH','NET_VSH', 'PHIT', 'NET']]
    logs_uncert = ds_logs[['well', 'VSH', 'PHIT', 'NET']]
    logs_uncert = logs_uncert.astype({'well':'string','VSH':'float32','PHIT':'float32','NET':'int32'})
    logs_uncert['vsh_cutoff_rnd'] = 0
    df_lst_vsh = []
    for idx, var in tqdm(enumerate(vsh_cutoff_random)):
        logs_uncert_copy = logs_uncert.copy()
        logs_uncert_copy['vsh_cutoff_rnd'] = var
        logs_uncert_copy['iteration_vsh'] = idx
        df_lst_vsh.append(logs_uncert_copy)
    data_vsh = pd.concat(df_lst_vsh, axis=0).reset_index(drop=True)
    data_vsh['NET_VSH_rnd'] = 0
    data_vsh.loc[data_vsh.VSH <= data_vsh.vsh_cutoff_rnd, 'NET_VSH_rnd'] = 1
    data_vsh = data_vsh.astype({'vsh_cutoff_rnd':'float32','iteration_vsh':'int32','NET_VSH_rnd':'int32'})
    return data_vsh

data_vsh8 = vsh_uncertainty(logs8, 0.45, 0.61, 40)

sns.histplot(data = data_vsh8.vsh_cutoff_rnd.unique(), bins=15, kde=True)
plt.xlabel('VSH_cutoff');

In [None]:
def phit_uncertainty(data_vsh, phit_cutoff_p10, phit_cutoff_p90, iteration):
    phit_cutoff_mean = (phit_cutoff_p10 + phit_cutoff_p90)/2
    phit_cutoff_sigma = abs((phit_cutoff_p90 - phit_cutoff_p10)/2.56)
    # phit_cutoff_random = np.random.normal(phit_cutoff_p50, phit_sigma, iteration)
    phit_cutoff_random = lhs(1, samples=iteration)
    phit_cutoff_random = norm(loc=phit_cutoff_mean, scale=phit_cutoff_sigma).ppf(phit_cutoff_random).flatten()

    df_lst_phit = []
    for idx, var in tqdm(enumerate(phit_cutoff_random)):
        data_vsh_copy = data_vsh.copy()
        data_vsh_copy['phit_cutoff_rnd'] = var
        data_vsh_copy['iteration_phit'] = idx
        df_lst_phit.append(data_vsh_copy)
    data_phit = pd.concat(df_lst_phit, axis=0).reset_index(drop=True)
    data_phit['NET_rnd'] = 0
    data_phit.loc[(data_phit.NET_VSH_rnd==1) & (data_phit.PHIT >= data_phit.phit_cutoff_rnd), 'NET_rnd'] = 1
    data_phit = data_phit.astype({'phit_cutoff_rnd':'float32','iteration_phit':'int32','NET_rnd':'int32'})
    return data_phit
data_phit8 = phit_uncertainty(data_vsh8, 0.12, 0.18, 40)

sns.histplot(data = data_phit8.phit_cutoff_rnd.unique(), bins=15, kde=True)
plt.xlabel('PHIT_cutoff');

In [None]:
data_phit_v2 = data_phit8.copy()
uncert_final = data_phit_v2.groupby(['iteration_vsh','iteration_phit','well'])['NET_rnd'].sum().reset_index().sort_values(by='well')
# uncert_final = uncert_final[uncert_final.NET_rnd > 100]
logs8 = logs[logs.FORMATION_up == 'Balakhany VIII']
logs8_gb = logs8.groupby('well')['NET'].sum().reset_index()
uncert_final_q = uncert_final.groupby('well')['NET_rnd'].quantile([0.1, 0.5, 0.9]).reset_index()

uncert_final_q_well = uncert_final_q.set_index('well').join(logs8_gb.set_index('well'), how='inner').reset_index()
uncert_final_q_well['NET'] = uncert_final_q_well['NET']*0.1
uncert_final_q_well['NET_rnd'] = uncert_final_q_well['NET_rnd']*0.1
uncert_final_q_well = uncert_final_q_well[uncert_final_q_well.NET_rnd != 0]
uncert_final_q_well['level_2'] = 1 - uncert_final_q_well['level_1']
uncert_final_q_well = uncert_final_q_well.rename(columns={'level_2':'quantile'}).drop('level_1', axis=1)
uncert_final_q_well['quantile'] = (uncert_final_q_well['quantile'].round(2)*100).astype('int')

uncert_final_q_well['quantile'] = 'p' + uncert_final_q_well['quantile'].astype('string')
uncert_final_q_well = uncert_final_q_well[~uncert_final_q_well.well.isin(['C27Z', 'D01Z'])]


sns.scatterplot(data=uncert_final_q_well, x='NET', y='NET_rnd', hue='quantile', s=30, palette='bright', alpha=0.5, ec='black')
sns.lineplot(x=[0, 100], y=[0, 100], color='black', linestyle='dashed')
sns.lineplot(x=[0, 100], y=[0, 110], color='red', linestyle='dashed')
plt.text(80, 100, '+10%', c='red')
plt.text(85, 65, '-10%', c='red')
sns.lineplot(x=[0, 100], y=[0, 90], color='red', linestyle='dashed')
plt.grid(True)
print(f'NET.sum() realizations per 1 well: ' + str(uncert_final.groupby('well')['NET_rnd'].count().reset_index().iloc[0][1]) + 'pcs')

In [None]:
uncert_final_q_well['net_rnd/net%'] = abs(((1 - uncert_final_q_well['NET_rnd']/uncert_final_q_well['NET'])*100).round(0).astype('int'))
uncert_final_q_well.loc[uncert_final_q_well['net_rnd/net%'] < 5, 'quality'] = '5%'
uncert_final_q_well.loc[((uncert_final_q_well['net_rnd/net%'] >= 5) & 
                         (uncert_final_q_well['net_rnd/net%'] < 10)), 'quality'] = '10%'
uncert_final_q_well.loc[((uncert_final_q_well['net_rnd/net%'] >= 10) & 
                         (uncert_final_q_well['net_rnd/net%'] < 20)), 'quality'] = '20%'
uncert_final_q_well.loc[(uncert_final_q_well['net_rnd/net%'] >= 20), 'quality'] = 'inf'

sns.scatterplot(data=uncert_final_q_well, x='NET', y='NET_rnd', hue = 'quality', style='quantile', s=30, palette='bright', alpha=0.5, ec='black')
sns.lineplot(x=[20, 85], y=[20, 85], color='black', linestyle='dashed')
plt.grid(True)

In [None]:
uncert_range = uncert_final_q_well.groupby('well')['NET_rnd'].apply(lambda x: x.max() - x.min()).reset_index().rename(columns={'NET_rnd':'NET_rnd_range'}).round(1)
uncert_final_q_well_v2 = uncert_final_q_well.merge(uncert_range, on='well')
uncert_final_q_well_v2 = uncert_final_q_well_v2.set_index('well').join(logs.groupby('well')['field'].first()).reset_index()
sns.histplot(data=uncert_final_q_well_v2, x='NET_rnd_range', bins=20, kde=True, color='green', hue='field', alpha=0.5, multiple='stack')
plt.grid(True)

In [9]:
# uncert_final_q_well_v2.to_csv(r'C:\jupyter\SPP_uncertainty\report\uncert_final_plt.csv', index=False)

# Uncertainty calc vsh & phit Bal10

In [None]:
def vsh_uncertainty(ds_logs, vsh_cutoff_p10, vsh_cutoff_p90, iteration):
    vsh_cutoff_mean = (vsh_cutoff_p10 + vsh_cutoff_p90)/2
    vsh_cutoff_sigma = abs((vsh_cutoff_p90 - vsh_cutoff_p10)/2.56)
    # vsh_cutoff_random = np.random.normal(vsh_cutoff_p50, vsh_sigma, iteration)
    vsh_cutoff_random = lhs(1, samples=iteration)
    vsh_cutoff_random = norm(loc=vsh_cutoff_mean, scale=vsh_cutoff_sigma).ppf(vsh_cutoff_random).flatten()
    # logs_uncert = logs8[['well','TST','VSH','NET_VSH', 'PHIT', 'NET']]
    logs_uncert = ds_logs[['well', 'VSH', 'PHIT', 'NET']]
    logs_uncert = logs_uncert.astype({'well':'string','VSH':'float32','PHIT':'float32','NET':'int32'})
    logs_uncert['vsh_cutoff_rnd'] = 0
    df_lst_vsh = []
    for idx, var in tqdm(enumerate(vsh_cutoff_random)):
        logs_uncert_copy = logs_uncert.copy()
        logs_uncert_copy['vsh_cutoff_rnd'] = var
        logs_uncert_copy['iteration_vsh'] = idx
        df_lst_vsh.append(logs_uncert_copy)
    data_vsh = pd.concat(df_lst_vsh, axis=0).reset_index(drop=True)
    data_vsh['NET_VSH_rnd'] = 0
    data_vsh.loc[data_vsh.VSH <= data_vsh.vsh_cutoff_rnd, 'NET_VSH_rnd'] = 1
    data_vsh = data_vsh.astype({'vsh_cutoff_rnd':'float32','iteration_vsh':'int32','NET_VSH_rnd':'int32'})
    return data_vsh

data_vsh10 = vsh_uncertainty(logs10, 0.45, 0.61, 40)

sns.histplot(data = data_vsh10.vsh_cutoff_rnd.unique(), bins=15, kde=True)
plt.xlabel('VSH_cutoff');

In [None]:
def phit_uncertainty(data_vsh, phit_cutoff_p10, phit_cutoff_p90, iteration):
    phit_cutoff_mean = (phit_cutoff_p10 + phit_cutoff_p90)/2
    phit_cutoff_sigma = abs((phit_cutoff_p90 - phit_cutoff_p10)/2.56)
    # phit_cutoff_random = np.random.normal(phit_cutoff_p50, phit_sigma, iteration)
    phit_cutoff_random = lhs(1, samples=iteration)
    phit_cutoff_random = norm(loc=phit_cutoff_mean, scale=phit_cutoff_sigma).ppf(phit_cutoff_random).flatten()

    df_lst_phit = []
    for idx, var in tqdm(enumerate(phit_cutoff_random)):
        data_vsh_copy = data_vsh.copy()
        data_vsh_copy['phit_cutoff_rnd'] = var
        data_vsh_copy['iteration_phit'] = idx
        df_lst_phit.append(data_vsh_copy)
    data_phit = pd.concat(df_lst_phit, axis=0).reset_index(drop=True)
    data_phit['NET_rnd'] = 0
    data_phit.loc[(data_phit.NET_VSH_rnd==1) & (data_phit.PHIT >= data_phit.phit_cutoff_rnd), 'NET_rnd'] = 1
    data_phit = data_phit.astype({'phit_cutoff_rnd':'float32','iteration_phit':'int32','NET_rnd':'int32'})
    return data_phit
data_phit10 = phit_uncertainty(data_vsh10, 0.11, 0.18, 40)

sns.histplot(data = data_phit10.phit_cutoff_rnd.unique(), bins=15, kde=True)
plt.xlabel('PHIT_cutoff');

In [None]:
data_phit_v2 = data_phit10.copy()
uncert_final = data_phit_v2.groupby(['iteration_vsh','iteration_phit','well'])['NET_rnd'].sum().reset_index().sort_values(by='well')
# uncert_final = uncert_final[uncert_final.NET_rnd > 100]
logs10 = logs[logs.FORMATION_up == 'Balakhany X']
logs10_gb = logs10.groupby('well')['NET'].sum().reset_index()
uncert_final_q = uncert_final.groupby('well')['NET_rnd'].quantile([0.1, 0.5, 0.9]).reset_index()

uncert_final_q_well = uncert_final_q.set_index('well').join(logs10_gb.set_index('well'), how='inner').reset_index()
uncert_final_q_well['NET'] = uncert_final_q_well['NET']*0.1
uncert_final_q_well['NET_rnd'] = uncert_final_q_well['NET_rnd']*0.1
uncert_final_q_well = uncert_final_q_well[uncert_final_q_well.NET_rnd != 0]
uncert_final_q_well['level_2'] = 1 - uncert_final_q_well['level_1']
uncert_final_q_well = uncert_final_q_well.rename(columns={'level_2':'quantile'}).drop('level_1', axis=1)
uncert_final_q_well['quantile'] = (uncert_final_q_well['quantile'].round(2)*100).astype('int')

uncert_final_q_well['quantile'] = 'p' + uncert_final_q_well['quantile'].astype('string')
uncert_final_q_well = uncert_final_q_well[uncert_final_q_well.NET < 100]
# uncert_final_q_well = uncert_final_q_well[~uncert_final_q_well.well.isin(['C27Z', 'D01Z'])]

sns.scatterplot(data=uncert_final_q_well, x='NET', y='NET_rnd', hue='quantile', s=30, palette='bright', alpha=0.5, ec='black')
sns.lineplot(x=[0, 100], y=[0, 100], color='black', linestyle='dashed')
sns.lineplot(x=[0, 100], y=[0, 110], color='red', linestyle='dashed')
plt.text(80, 100, '+10%', c='red')
plt.text(85, 65, '-10%', c='red')
sns.lineplot(x=[0, 100], y=[0, 90], color='red', linestyle='dashed')
plt.grid(True)
print(f'NET.sum() realizations per 1 well: ' + str(uncert_final.groupby('well')['NET_rnd'].count().reset_index().iloc[0][1]) + 'pcs')

In [None]:
uncert_final_q_well['net_rnd/net%'] = abs(((1 - uncert_final_q_well['NET_rnd']/uncert_final_q_well['NET'])*100).round(0).astype('int'))
uncert_final_q_well.loc[uncert_final_q_well['net_rnd/net%'] < 5, 'quality'] = '5%'
uncert_final_q_well.loc[((uncert_final_q_well['net_rnd/net%'] >= 5) & 
                         (uncert_final_q_well['net_rnd/net%'] < 10)), 'quality'] = '10%'
uncert_final_q_well.loc[((uncert_final_q_well['net_rnd/net%'] >= 10) & 
                         (uncert_final_q_well['net_rnd/net%'] < 20)), 'quality'] = '20%'
uncert_final_q_well.loc[(uncert_final_q_well['net_rnd/net%'] >= 20), 'quality'] = 'inf'

sns.scatterplot(data=uncert_final_q_well, x='NET', y='NET_rnd', hue = 'quality', style='quantile', s=30, palette='bright', alpha=0.5, ec='black')
sns.lineplot(x=[20, 85], y=[20, 85], color='black', linestyle='dashed')
plt.grid(True)

In [None]:
uncert_range = uncert_final_q_well.groupby('well')['NET_rnd'].apply(lambda x: x.max() - x.min()).reset_index().rename(columns={'NET_rnd':'NET_rnd_range'}).round(1)
uncert_final_q_well_v2 = uncert_final_q_well.merge(uncert_range, on='well')
uncert_final_q_well_v2 = uncert_final_q_well_v2.set_index('well').join(logs.groupby('well')['field'].first()).reset_index()
sns.histplot(data=uncert_final_q_well_v2, x='NET_rnd_range', bins=20, kde=True, color='green', hue='field', alpha=0.5, multiple='stack')
plt.grid(True)

In [13]:
uncert_final_q_well_v2.to_csv(r'C:\jupyter\SPP_uncertainty\report\uncert_final_plt_bal10.csv', index=False)

# Uncertainty calc vsh & phit v2 Bal8

In [None]:
def vsh_uncertainty(ds_logs, vsh_cutoff_p10, vsh_cutoff_p90, iteration):
    vsh_cutoff_mean = (vsh_cutoff_p10 + vsh_cutoff_p90)/2
    vsh_cutoff_sigma = abs((vsh_cutoff_p90 - vsh_cutoff_p10)/2.56)
    # vsh_cutoff_random = np.random.normal(vsh_cutoff_p50, vsh_sigma, iteration)
    vsh_cutoff_random = lhs(1, samples=iteration)
    vsh_cutoff_random = norm(loc=vsh_cutoff_mean, scale=vsh_cutoff_sigma).ppf(vsh_cutoff_random).flatten()
    # logs_uncert = logs8[['well','TST','VSH','NET_VSH', 'PHIT', 'NET']]
    logs_uncert = ds_logs[['well', 'VSH', 'PHIT', 'NET']]
    logs_uncert = logs_uncert.astype({'well':'string','VSH':'float32','PHIT':'float32','NET':'int32'})
    logs_uncert['vsh_cutoff_rnd'] = 0
    df_lst_vsh = []
    for idx, var in tqdm(enumerate(vsh_cutoff_random)):
        logs_uncert_copy = logs_uncert.copy()
        logs_uncert_copy['vsh_cutoff_rnd'] = var
        logs_uncert_copy['iteration_vsh'] = idx
        df_lst_vsh.append(logs_uncert_copy)
    data_vsh = pd.concat(df_lst_vsh, axis=0).reset_index(drop=True)
    data_vsh['NET_VSH_rnd'] = 0
    data_vsh.loc[data_vsh.VSH <= data_vsh.vsh_cutoff_rnd, 'NET_VSH_rnd'] = 1
    data_vsh = data_vsh.astype({'vsh_cutoff_rnd':'float32','iteration_vsh':'int32','NET_VSH_rnd':'int32'})
    return data_vsh

data_vsh8 = vsh_uncertainty(logs8, 0.45, 0.61, 40)

sns.histplot(data = data_vsh8.vsh_cutoff_rnd.unique(), bins=15, kde=True)
plt.xlabel('VSH_cutoff');

In [None]:
def phit_uncertainty(data_vsh, phit_cutoff_p10, phit_cutoff_p90, iteration):
    phit_cutoff_mean = (phit_cutoff_p10 + phit_cutoff_p90)/2
    phit_cutoff_sigma = abs((phit_cutoff_p90 - phit_cutoff_p10)/2.56)
    # phit_cutoff_random = np.random.normal(phit_cutoff_p50, phit_sigma, iteration)
    phit_cutoff_random = lhs(1, samples=iteration)
    phit_cutoff_random = norm(loc=phit_cutoff_mean, scale=phit_cutoff_sigma).ppf(phit_cutoff_random).flatten()

    df_lst_phit = []
    for idx, var in tqdm(enumerate(phit_cutoff_random)):
        data_vsh_copy = data_vsh.copy()
        data_vsh_copy['phit_cutoff_rnd'] = var
        data_vsh_copy['iteration_phit'] = idx
        df_lst_phit.append(data_vsh_copy)
    data_phit = pd.concat(df_lst_phit, axis=0).reset_index(drop=True)
    data_phit['NET_rnd'] = 0
    data_phit.loc[(data_phit.NET_VSH_rnd==1) & (data_phit.PHIT >= data_phit.phit_cutoff_rnd), 'NET_rnd'] = 1
    data_phit = data_phit.astype({'phit_cutoff_rnd':'float32','iteration_phit':'int32','NET_rnd':'int32'})
    return data_phit
data_phit8 = phit_uncertainty(data_vsh8, 0.125, 0.1455, 40)

sns.histplot(data = data_phit8.phit_cutoff_rnd.unique(), bins=15, kde=True)
plt.xlabel('PHIT_cutoff');

In [None]:
data_phit_v2 = data_phit8.copy()
uncert_final = data_phit_v2.groupby(['iteration_vsh','iteration_phit','well'])['NET_rnd'].sum().reset_index().sort_values(by='well')
# uncert_final = uncert_final[uncert_final.NET_rnd > 100]
logs8 = logs[logs.FORMATION_up == 'Balakhany VIII']
logs8_gb = logs8.groupby('well')['NET'].sum().reset_index()
uncert_final_q = uncert_final.groupby('well')['NET_rnd'].quantile([0.1, 0.5, 0.9]).reset_index()

uncert_final_q_well = uncert_final_q.set_index('well').join(logs8_gb.set_index('well'), how='inner').reset_index()
uncert_final_q_well['NET'] = uncert_final_q_well['NET']*0.1
uncert_final_q_well['NET_rnd'] = uncert_final_q_well['NET_rnd']*0.1
uncert_final_q_well = uncert_final_q_well[uncert_final_q_well.NET_rnd != 0]
uncert_final_q_well['level_2'] = 1 - uncert_final_q_well['level_1']
uncert_final_q_well = uncert_final_q_well.rename(columns={'level_2':'quantile'}).drop('level_1', axis=1)
uncert_final_q_well['quantile'] = (uncert_final_q_well['quantile'].round(2)*100).astype('int')

uncert_final_q_well['quantile'] = 'p' + uncert_final_q_well['quantile'].astype('string')
uncert_final_q_well = uncert_final_q_well[~uncert_final_q_well.well.isin(['C27Z', 'D01Z'])]


sns.scatterplot(data=uncert_final_q_well, x='NET', y='NET_rnd', hue='quantile', s=30, palette='bright', alpha=0.5, ec='black')
sns.lineplot(x=[0, 100], y=[0, 100], color='black', linestyle='dashed')
sns.lineplot(x=[0, 100], y=[0, 110], color='red', linestyle='dashed')
plt.text(80, 100, '+10%', c='red')
plt.text(85, 65, '-10%', c='red')
sns.lineplot(x=[0, 100], y=[0, 90], color='red', linestyle='dashed')
plt.grid(True)
print(f'NET.sum() realizations per 1 well: ' + str(uncert_final.groupby('well')['NET_rnd'].count().reset_index().iloc[0][1]) + 'pcs')

In [None]:
uncert_final_q_well['net_rnd/net%'] = abs(((1 - uncert_final_q_well['NET_rnd']/uncert_final_q_well['NET'])*100).round(0).astype('int'))
uncert_final_q_well.loc[uncert_final_q_well['net_rnd/net%'] < 5, 'quality'] = '5%'
uncert_final_q_well.loc[((uncert_final_q_well['net_rnd/net%'] >= 5) & 
                         (uncert_final_q_well['net_rnd/net%'] < 10)), 'quality'] = '10%'
uncert_final_q_well.loc[((uncert_final_q_well['net_rnd/net%'] >= 10) & 
                         (uncert_final_q_well['net_rnd/net%'] < 20)), 'quality'] = '20%'
uncert_final_q_well.loc[(uncert_final_q_well['net_rnd/net%'] >= 20), 'quality'] = 'inf'

sns.scatterplot(data=uncert_final_q_well, x='NET', y='NET_rnd', hue = 'quality', style='quantile', s=30, palette='bright', alpha=0.5, ec='black')
sns.lineplot(x=[20, 85], y=[20, 85], color='black', linestyle='dashed')
plt.grid(True)

In [None]:
uncert_range = uncert_final_q_well.groupby('well')['NET_rnd'].apply(lambda x: x.max() - x.min()).reset_index().rename(columns={'NET_rnd':'NET_rnd_range'}).round(1)
uncert_final_q_well_v2 = uncert_final_q_well.merge(uncert_range, on='well')
uncert_final_q_well_v2 = uncert_final_q_well_v2.set_index('well').join(logs.groupby('well')['field'].first()).reset_index()
sns.histplot(data=uncert_final_q_well_v2, x='NET_rnd_range', bins=20, kde=True, color='green', hue='field', alpha=0.5, multiple='stack')
plt.grid(True)

In [11]:
# uncert_final_q_well_v2.to_csv(r'C:\jupyter\SPP_uncertainty\report\uncert_final_phit.csv', index=False)

# Uncertainty calc vsh & phit v2 Bal10

In [None]:
def vsh_uncertainty(ds_logs, vsh_cutoff_p10, vsh_cutoff_p90, iteration):
    vsh_cutoff_mean = (vsh_cutoff_p10 + vsh_cutoff_p90)/2
    vsh_cutoff_sigma = abs((vsh_cutoff_p90 - vsh_cutoff_p10)/2.56)
    # vsh_cutoff_random = np.random.normal(vsh_cutoff_p50, vsh_sigma, iteration)
    vsh_cutoff_random = lhs(1, samples=iteration)
    vsh_cutoff_random = norm(loc=vsh_cutoff_mean, scale=vsh_cutoff_sigma).ppf(vsh_cutoff_random).flatten()
    # logs_uncert = logs8[['well','TST','VSH','NET_VSH', 'PHIT', 'NET']]
    logs_uncert = ds_logs[['well', 'VSH', 'PHIT', 'NET']]
    logs_uncert = logs_uncert.astype({'well':'string','VSH':'float32','PHIT':'float32','NET':'int32'})
    logs_uncert['vsh_cutoff_rnd'] = 0
    df_lst_vsh = []
    for idx, var in tqdm(enumerate(vsh_cutoff_random)):
        logs_uncert_copy = logs_uncert.copy()
        logs_uncert_copy['vsh_cutoff_rnd'] = var
        logs_uncert_copy['iteration_vsh'] = idx
        df_lst_vsh.append(logs_uncert_copy)
    data_vsh = pd.concat(df_lst_vsh, axis=0).reset_index(drop=True)
    data_vsh['NET_VSH_rnd'] = 0
    data_vsh.loc[data_vsh.VSH <= data_vsh.vsh_cutoff_rnd, 'NET_VSH_rnd'] = 1
    data_vsh = data_vsh.astype({'vsh_cutoff_rnd':'float32','iteration_vsh':'int32','NET_VSH_rnd':'int32'})
    return data_vsh

data_vsh10 = vsh_uncertainty(logs10, 0.45, 0.61, 40)

sns.histplot(data = data_vsh10.vsh_cutoff_rnd.unique(), bins=15, kde=True)
plt.xlabel('VSH_cutoff');

In [None]:
def phit_uncertainty(data_vsh, phit_cutoff_p10, phit_cutoff_p90, iteration):
    phit_cutoff_mean = (phit_cutoff_p10 + phit_cutoff_p90)/2
    phit_cutoff_sigma = abs((phit_cutoff_p90 - phit_cutoff_p10)/2.56)
    # phit_cutoff_random = np.random.normal(phit_cutoff_p50, phit_sigma, iteration)
    phit_cutoff_random = lhs(1, samples=iteration)
    phit_cutoff_random = norm(loc=phit_cutoff_mean, scale=phit_cutoff_sigma).ppf(phit_cutoff_random).flatten()

    df_lst_phit = []
    for idx, var in tqdm(enumerate(phit_cutoff_random)):
        data_vsh_copy = data_vsh.copy()
        data_vsh_copy['phit_cutoff_rnd'] = var
        data_vsh_copy['iteration_phit'] = idx
        df_lst_phit.append(data_vsh_copy)
    data_phit = pd.concat(df_lst_phit, axis=0).reset_index(drop=True)
    data_phit['NET_rnd'] = 0
    data_phit.loc[(data_phit.NET_VSH_rnd==1) & (data_phit.PHIT >= data_phit.phit_cutoff_rnd), 'NET_rnd'] = 1
    data_phit = data_phit.astype({'phit_cutoff_rnd':'float32','iteration_phit':'int32','NET_rnd':'int32'})
    return data_phit
data_phit10 = phit_uncertainty(data_vsh10, 0.124, 0.14525, 40)

sns.histplot(data = data_phit10.phit_cutoff_rnd.unique(), bins=15, kde=True)
plt.xlabel('PHIT_cutoff');

In [None]:
data_phit_v2 = data_phit10.copy()
uncert_final = data_phit_v2.groupby(['iteration_vsh','iteration_phit','well'])['NET_rnd'].sum().reset_index().sort_values(by='well')
# uncert_final = uncert_final[uncert_final.NET_rnd > 100]
logs10 = logs[logs.FORMATION_up == 'Balakhany X']
logs10_gb = logs10.groupby('well')['NET'].sum().reset_index()
uncert_final_q = uncert_final.groupby('well')['NET_rnd'].quantile([0.1, 0.5, 0.9]).reset_index()

uncert_final_q_well = uncert_final_q.set_index('well').join(logs10_gb.set_index('well'), how='inner').reset_index()
uncert_final_q_well['NET'] = uncert_final_q_well['NET']*0.1
uncert_final_q_well['NET_rnd'] = uncert_final_q_well['NET_rnd']*0.1
uncert_final_q_well = uncert_final_q_well[uncert_final_q_well.NET_rnd != 0]
uncert_final_q_well['level_2'] = 1 - uncert_final_q_well['level_1']
uncert_final_q_well = uncert_final_q_well.rename(columns={'level_2':'quantile'}).drop('level_1', axis=1)
uncert_final_q_well['quantile'] = (uncert_final_q_well['quantile'].round(2)*100).astype('int')

uncert_final_q_well['quantile'] = 'p' + uncert_final_q_well['quantile'].astype('string')
uncert_final_q_well = uncert_final_q_well[uncert_final_q_well.NET < 100]
# uncert_final_q_well = uncert_final_q_well[~uncert_final_q_well.well.isin(['C27Z', 'D01Z'])]


sns.scatterplot(data=uncert_final_q_well, x='NET', y='NET_rnd', hue='quantile', s=30, palette='bright', alpha=0.5, ec='black')
sns.lineplot(x=[0, 100], y=[0, 100], color='black', linestyle='dashed')
sns.lineplot(x=[0, 100], y=[0, 110], color='red', linestyle='dashed')
plt.text(80, 100, '+10%', c='red')
plt.text(85, 65, '-10%', c='red')
sns.lineplot(x=[0, 100], y=[0, 90], color='red', linestyle='dashed')
plt.grid(True)
print(f'NET.sum() realizations per 1 well: ' + str(uncert_final.groupby('well')['NET_rnd'].count().reset_index().iloc[0][1]) + 'pcs')

In [None]:
uncert_final_q_well['net_rnd/net%'] = abs(((1 - uncert_final_q_well['NET_rnd']/uncert_final_q_well['NET'])*100).round(0).astype('int'))
uncert_final_q_well.loc[uncert_final_q_well['net_rnd/net%'] < 5, 'quality'] = '5%'
uncert_final_q_well.loc[((uncert_final_q_well['net_rnd/net%'] >= 5) & 
                         (uncert_final_q_well['net_rnd/net%'] < 10)), 'quality'] = '10%'
uncert_final_q_well.loc[((uncert_final_q_well['net_rnd/net%'] >= 10) & 
                         (uncert_final_q_well['net_rnd/net%'] < 20)), 'quality'] = '20%'
uncert_final_q_well.loc[(uncert_final_q_well['net_rnd/net%'] >= 20), 'quality'] = 'inf'

sns.scatterplot(data=uncert_final_q_well, x='NET', y='NET_rnd', hue = 'quality', style='quantile', s=30, palette='bright', alpha=0.5, ec='black')
sns.lineplot(x=[20, 85], y=[20, 85], color='black', linestyle='dashed')
plt.grid(True)

In [None]:
uncert_range = uncert_final_q_well.groupby('well')['NET_rnd'].apply(lambda x: x.max() - x.min()).reset_index().rename(columns={'NET_rnd':'NET_rnd_range'}).round(1)
uncert_final_q_well_v2 = uncert_final_q_well.merge(uncert_range, on='well')
uncert_final_q_well_v2 = uncert_final_q_well_v2.set_index('well').join(logs.groupby('well')['field'].first()).reset_index()
sns.histplot(data=uncert_final_q_well_v2, x='NET_rnd_range', bins=20, kde=True, color='green', hue='field', alpha=0.5, multiple='stack')
plt.grid(True)

In [12]:
uncert_final_q_well_v2.to_csv(r'C:\jupyter\SPP_uncertainty\report\uncert_final_phit_bal10.csv', index=False)

# Porosity replication Bal8

In [10]:
# Techlog script is here
# rhoma = 2.67
# a = 2.665
# b = 0.074
# if vsh != MissingValue:
#     if vsh <= 0.4:
#         rhoma = 2.67
#     elif vsh > 0.4 and vsh <= 0.5:
#         rhoma = 2.7
#     else:
#         rhoma = 2.74           #new rhoma = 2.737 from shale compaction study done by eiLink using XRD and grain density analysis previously (rhoma = 2.7)
#     RHOMA.append(rhoma)
# else:
#     RHOMA.append(MissingValue)

# phit_d = MissingValue
# phit_dnsst = MissingValue
# phit_dn = MissingValue

# neusstvshcorr = nphi[i] - vsh*0.20
# if rhob[i] != MissingValue and vsh != MissingValue: # this line was modified to ignore porosity calculation when vsh is absent
#     phit_d = limitValue((rhoma - rhob[i])/(rhoma-RHOFLUID[i]),0,0.5)   # currently this rhof is ither 0.83 or 0.935 ie it assumes no gas seen !
#     if nphi[i] !=MissingValue:
#         phit_dnsst = limitValue(((phit_d**2+nphi[i]**2)/2)**0.5,0,0.34)	  # so we combine a neutron sst which underestimates neu porosity and a density which over estimates den porosity
#         phit_dn = limitValue(((phit_d**2+neusstvshcorr**2)/2)**0.5,0,0.34) # same here - but we correct neu for vsh 
#     else:
#         phit_dnsst = MissingValue
#         phit_dn = MissingValue
# else:
#     pass
# PHIT_D.append(phit_d)
# PHIT_DNSST.append(phit_dnsst)
# PHIT_DN.append(phit_dn)

# #THAT'S WHERE ADDITION GOES TO 	
# if (phit_d-nphi[i])>float(diff_treshold) and phit_d!=-9999.0 and nphi[i]!=-9999.0:
#     fluid_pp[i] = 1

# # this makes phit use the neutron density porosity where fluid flag == 1 (gas)
# phit99 = phit_d
# if fluid_pp[i] == 1 and vsh <= 0.5:
#     phit99 = phit_dn
# if badporlog[i] == 1: # and removes bad data
#     phit99 = MissingValue
# if rhob == MissingValue:
#     phit99 = MissingValue
# PHIT.append(phit99)	

In [42]:
logs8 = pd.read_csv(r'C:\jupyter\SPP\inputoutput\general_logs\df_bal8_azr_v4.csv')
logs8 = logs8[logs8.phit_flag == 1]
logs10 = pd.read_csv(r'C:\jupyter\SPP\inputoutput\general_logs\df_bal10_vshclp2_v4.csv')
logs10 = logs10[logs10.phit_flag == 1]

# phit8 = logs8[logs8.PHIT>0]
phit8 = logs8.astype({  'well':'string','FORMATION_up':'string','FORMATION':'string', 'field':'string', 
                        'RHOB':'float32','RHOMA':'float32', 'RHOF':'float32','NET':'int32'})
phit10 = logs10.astype({  'well':'string','FORMATION_up':'string','FORMATION':'string', 'field':'string', 
                        'RHOB':'float32','RHOMA':'float32', 'RHOF':'float32','NET':'int32'})

In [4]:
def phit8_set1_func(phit8):
    well_list_set1 = [  'B02', 'B02Z', 'B07', 'B20', 'B22', 'B27Z', 'B29', 'B34', 'B38Z', 'B39', 'B40',
                        'B42Z', 'B43', 'C01AZ', 'C02', 'C08Z', 'C09', 'C10Z', 'C12', 'C13Z', 'C14Z',
                        'C16', 'C21Z', 'C31', 'C34', 'C36', 'C39', 'C40', 'D01', 'D01Z', 'D02Y',
                        'D02Z', 'D04Z', 'D09Z', 'D11', 'D12Z', 'D13', 'D15', 'D23X', 'D24', 'D27', 'D34']
    phit8_set1 = phit8[phit8.well.isin(well_list_set1)]
    phit8_set1['RHOMA'] = 2.7 
    phit8_set1.loc[phit8_set1.VSH <= 0.4, 'RHOMA'] = 2.67
    phit8_set1.loc[(phit8_set1.VSH > 0.4) & (phit8_set1.VSH <= 0.5), 'RHOMA'] = 2.7
    phit8_set1['phit_dn'] = (phit8_set1['RHOMA'] - phit8_set1['RHOB'])/(phit8_set1['RHOMA'] - 0.83)
    return phit8_set1, well_list_set1
phit8_set1, well_list_set1 = phit8_set1_func(phit8)

def phit8_set2_func(phit8, well_list_set1):
    well_list_set2 = np.setdiff1d(np.array(phit8.well.unique()), np.array(well_list_set1)).tolist()
    well_list_set2_sel = ['B01Y', 'B12', 'B13ST2', 'B14Z', 'B19', 'B21', 'B26', 'B37', 'C32']
    phit8_set2 = phit8[phit8.well.isin(well_list_set2_sel)]
    phit8_set2['RHOMA'] = 2.7
    phit8_set2['phit_dn'] = (phit8_set2['RHOMA'] - phit8_set2['RHOB'])/(phit8_set2['RHOMA'] - 0.83)
    phit8_set2['neusstvshcorr'] = phit8_set2['NPSS'] - phit8_set2['VSH']*0.20
    phit8_set2['phit_dn'] = ((phit8_set2['phit_dn']**2 + phit8_set2['neusstvshcorr']**2)/2)**0.5

    phit8_set2.loc[phit8_set2.VSH <= 0.4, 'RHOMA'] = 2.67
    phit8_set2.loc[phit8_set2.VSH <= 0.4, 'phit_dn'] = (phit8_set2['RHOMA'] - phit8_set2['RHOB'])/(phit8_set2['RHOMA'] - 0.83)
    phit8_set2.loc[phit8_set2.VSH <= 0.4, 'phit_dn'] = ((phit8_set2['phit_dn']**2 + phit8_set2['neusstvshcorr']**2)/2)**0.5

    phit8_set2.loc[(phit8_set2.VSH > 0.4) & (phit8_set2.VSH <= 0.5), 'RHOMA'] = 2.7
    phit8_set2.loc[(phit8_set2.VSH > 0.4) & (phit8_set2.VSH <= 0.5), 'phit_dn'] = (phit8_set2['RHOMA'] - phit8_set2['RHOB'])/(phit8_set2['RHOMA'] - 0.83)
    phit8_set2.loc[(phit8_set2.VSH > 0.4) & (phit8_set2.VSH <= 0.5), 'phit_dn'] = ((phit8_set2['phit_dn']**2 + phit8_set2['neusstvshcorr']**2)/2)**0.5
    return phit8_set2, well_list_set2, well_list_set2_sel
phit8_set2, well_list_set2, well_list_set2_sel = phit8_set2_func(phit8, well_list_set1)

def phit8_set3_func(phit8, well_list_set2, well_list_set2_sel):
    well_list_set3 = np.setdiff1d(np.array(well_list_set2), np.array(well_list_set2_sel)).tolist()
    well_list_set3_sel = ['B34Z', 'C01A', 'C14', 'C27Y', 'C42', 'C43', 'D09Y', 'D13Y', 'B08Z', 'C01', 'D19Z', 'D29']
    phit8_set3 = phit8[phit8.well.isin(well_list_set3_sel)]
    phit8_set3['RHOMA'] = 2.74
    phit8_set3['phit_dn'] = (phit8_set3['RHOMA'] - phit8_set3['RHOB'])/(phit8_set3['RHOMA'] - 0.83)
    phit8_set3['neusstvshcorr'] = phit8_set3['NPSS'] - phit8_set3['VSH']*0.20

    phit8_set3.loc[phit8_set3.VSH <= 0.4, 'RHOMA'] = 2.67
    phit8_set3.loc[phit8_set3.VSH <= 0.4, 'phit_dn'] = (phit8_set3['RHOMA'] - phit8_set3['RHOB'])/(phit8_set3['RHOMA'] - 0.83)

    phit8_set3.loc[(phit8_set3.VSH > 0.4) & (phit8_set3.VSH <= 0.5), 'RHOMA'] = 2.70
    phit8_set3.loc[(phit8_set3.VSH > 0.4) & (phit8_set3.VSH <= 0.5), 'phit_dn'] = ((phit8_set3['phit_dn']**2 + phit8_set3['neusstvshcorr']**2)/2)**0.5
    return phit8_set3, well_list_set3, well_list_set3_sel
phit8_set3, well_list_set3, well_list_set3_sel = phit8_set3_func(phit8, well_list_set2, well_list_set2_sel)

def phit8_set4_func(phit8, well_list_set3, well_list_set3_sel):
    well_list_set4 = np.setdiff1d(np.array(well_list_set3), np.array(well_list_set3_sel)).tolist()
    well_list_set4_sel = ['B31', 'C01AY', 'C06', 'C07', 'C18', 'C20Z', 'C21', 'C41', 'D03', 'D04', 'D05ST1', 'D06', 'D14', 'D25']
    phit8_set4 = phit8[phit8.well.isin(well_list_set4_sel)]
    phit8_set4['RHOMA'] = 2.7
    phit8_set4['phit_dn'] = (phit8_set4['RHOMA'] - phit8_set4['RHOB'])/(phit8_set4['RHOMA'] - 0.83)
    phit8_set4['neusstvshcorr'] = phit8_set4['NPSS'] - phit8_set4['VSH']*0.20

    phit8_set4.loc[phit8_set4.VSH <= 0.4, 'RHOMA'] = 2.67
    phit8_set4.loc[phit8_set4.VSH <= 0.4, 'phit_dn'] = (phit8_set4['RHOMA'] - phit8_set4['RHOB'])/(phit8_set4['RHOMA'] - 0.83)

    phit8_set4.loc[(phit8_set4.VSH > 0.4) & (phit8_set4.VSH <= 0.5), 'RHOMA'] = 2.70
    phit8_set4.loc[(phit8_set4.VSH > 0.4) & (phit8_set4.VSH <= 0.5), 'phit_dn'] = ((phit8_set4['phit_dn']**2 + phit8_set4['neusstvshcorr']**2)/2)**0.5
    return phit8_set4, well_list_set4, well_list_set4_sel
phit8_set4, well_list_set4, well_list_set4_sel = phit8_set4_func(phit8, well_list_set3, well_list_set3_sel)

def phit8_set5_func(phit8, well_list_set4, well_list_set4_sel):
    well_list_set5 = np.setdiff1d(np.array(well_list_set4), np.array(well_list_set4_sel)).tolist()
    well_list_set5_sel = ['C04', 'D07', 'D09', 'D37']
    phit8_set5 = phit8[phit8.well.isin(well_list_set5_sel)]
    phit8_set5['RHOMA'] = 2.7
    phit8_set5['phit_dn'] = (phit8_set5['RHOMA'] - phit8_set5['RHOB'])/(phit8_set5['RHOMA'] - 0.83)
    phit8_set5['neusstvshcorr'] = phit8_set5['NPSS'] - phit8_set5['VSH']*0.20

    phit8_set5.loc[phit8_set5.VSH <= 0.4, 'RHOMA'] = 2.67
    phit8_set5.loc[phit8_set5.VSH <= 0.4, 'phit_dn'] = (phit8_set5['RHOMA'] - phit8_set5['RHOB'])/(phit8_set5['RHOMA'] - 0.83)
    phit8_set5.loc[(phit8_set5.VSH <= 0.4) & (phit8_set5.phitd_npss >= 0.01), 'phit_dn'] = (phit8_set5['RHOMA'] - phit8_set5['RHOB'])/(phit8_set5['RHOMA'] - 0.5)

    phit8_set5.loc[(phit8_set5.VSH > 0.4) & (phit8_set5.VSH <= 0.5), 'RHOMA'] = 2.7
    phit8_set5.loc[(phit8_set5.VSH > 0.4) & (phit8_set5.VSH <= 0.5), 'phit_dn'] = (phit8_set5['RHOMA'] - phit8_set5['RHOB'])/(phit8_set5['RHOMA'] - 0.83)
    return phit8_set5, well_list_set5, well_list_set5_sel
phit8_set5, well_list_set5, well_list_set5_sel = phit8_set5_func(phit8, well_list_set4, well_list_set4_sel)

phit8_set_full = pd.concat([phit8_set1, phit8_set2, phit8_set3, phit8_set4, phit8_set5]).reset_index(drop=True)
phit8_set_full.to_csv(r'C:\jupyter\SPP_uncertainty\io\phit8_set_full.csv')

# Porosity replication Bal10

In [49]:
well_list_set1 = [  'A01Y', 'A03Z', 'A04', 'A05', 'A05Z', 'A06Z', 'A07', 'A07Z',
                    'A09Z', 'A10', 'A10X', 'A12', 'A12V', 'A12W', 'A12X', 'A12YST1',
                    'A13W', 'A13X', 'A13Z', 'A13ZST1', 'A14', 'A14V', 'A20Z', 'B02',
                    'B02Z', 'B06', 'B07', 'B14Z', 'B16Y', 'B18Y', 'B22', 'B32', 'B34',
                    'B42Z', 'C01AZ', 'C02', 'C03ST1', 'C04', 'C06', 'C07', 'C10',
                    'C13Z', 'C14Z', 'C16', 'C18', 'C20Z', 'C21', 'C27Z', 'C33', 'D01',
                    'D01Z', 'D02Z', 'D03', 'D04', 'D07', 'D09Z', 'D10', 'D11', 'D12Z',
                    'D13', 'D15', 'D23X', 'D27', 'E01', 'E01X', 'E01Y', 'E01Z', 'E04',
                    'E05', 'E07', 'E11', 'E16Y', 'E25', 'E26', 'E30Z', 'E31', 'E31Z',
                    'E36', 'E38', 'G01Z', 'G05', 'G06', 'G07Z', 'H01', 'H01Y', 'H01Z',
                    'J01', 'J01Z', 'J02', 'J11Z', 'J15', 'J15Z', 'J16', 'J25', 'J31']
phit10_set1 = phit10.copy()
phit10_set1 = phit10[phit10.well.isin(well_list_set1)]

phit10_set1['RHOMA'] = 2.7 
phit10_set1.loc[phit10_set1.VSH <= 0.4, 'RHOMA'] = 2.67
phit10_set1.loc[(phit10_set1.VSH > 0.4) & (phit10_set1.VSH <= 0.5), 'RHOMA'] = 2.7
phit10_set1['phit_dn'] = (phit10_set1['RHOMA'] - phit10_set1['RHOB'])/(phit10_set1['RHOMA'] - 0.83)

phit10_set1.to_csv(r'C:\jupyter\SPP_uncertainty\io\phit10_set_1.csv')

In [48]:
def phit_recalc_matrixplot(phit_matrixplot, phit_target, mae_cutoff):
    col = 5
    rows = len(phit_matrixplot.well.unique()) // col + [1 if (len(phit_matrixplot.well.unique()) % col) != 0 else 0][0]
    fig, ax = plt.subplots(rows, col, figsize=(col*3, rows*2))
    phit_wells_norm = []
    phit_wells_problem = []
    k = 0
    max_k = len(phit_matrixplot.well.unique()) - 1
    for i in range(rows):
        for j in range(col):
            wellname = phit_matrixplot.well.unique()[k]
            # data = phit_matrixplot[(phit_matrixplot.well == wellname) & (phit_matrixplot.NET == 1)]
            data = phit_matrixplot[(phit_matrixplot.well == wellname)]
            # correlation_coefficient = data['PHIT'].corr(data[phit_target])

            true_values = data['PHIT']
            predicted_values = data[phit_target]
            mae = (mean_absolute_error(true_values, predicted_values)*100).round(1)
            
            ax[i, j].scatter(data=phit_matrixplot[phit_matrixplot.well == wellname], x='PHIT', y=phit_target, alpha=0.5)
            ax[i, j].plot([0.0, 0.5], [0.0, 0.5], c='red', linestyle='dashed')
            ax[i, j].set_xlim(0.0, 0.5)
            ax[i, j].set_ylim(0.0, 0.5)
            # if correlation_coefficient == np.nan:
            #     pass
            # else:
            #     correlation_coefficient = round(correlation_coefficient, 2)
            if mae <= mae_cutoff:
                phit_wells_norm.append(wellname)
            else:
                phit_wells_problem.append(wellname)
            ax[i, j].set_title(wellname + ' ' + str(mae))
            if k < max_k:
                k += 1
            else:
                break
    plt.tight_layout()
    print('good phit:', len(phit_wells_norm), np.array(phit_wells_norm))
    print('problems phit:', len(phit_wells_problem), np.array(phit_wells_problem))
# phit_recalc_matrixplot(phit10_set1, 'phit_dn', 0.2)

In [47]:
def phit_recalc_display(phit_diagram, wellname):
    data = phit_diagram[(phit_diagram.well == wellname) & (phit_diagram.PHIT >0)]
    depth = data.TST
    phit = data.PHIT
    rhob = data.RHOB
    npss = data.NPSS
    vsh = data.VSH
    phit_dn = data.phit_dn
    # neusstvshcorr = data.neusstvshcorr
    phitdiff = data.phitd_npss
    fig, ax = plt.subplots(1,3,figsize=(8, 12))
    ax[0].plot(phit, depth, label='PHIT', color='green', ls='-', lw=2)
    ax[0].plot(phit_dn, depth, label='phit_dn', color='red', ls='-', lw=1)
    ax[0].set_title(wellname)
    ax[0].legend()
    ax[0].invert_yaxis()
    ax[0].invert_xaxis()
    ax[0].tick_params(axis='x', colors='green')
    ax2 = ax[0].twiny()
    ax2.plot(vsh, depth, label='VSH', color='gray', ls='-', lw=2, alpha=0.75)
    ax2.vlines(0.4, depth.min(), depth.max(), color='black', ls='--', lw=1)
    ax2.vlines(0.5, depth.min(), depth.max(), color='black', ls='--', lw=1)
    ax2.set_xlim(0,1)
    ax2.xaxis.set_major_locator(plt.MaxNLocator(4))
    ax2.tick_params(axis='x', colors='gray')
    ax[1].plot(rhob, depth, label='RHOB', color='red', ls='-', lw=1)
    ax[1].set_xlim(1.65, 2.65)
    ax[1].invert_yaxis()
    ax[1].tick_params(axis='x', colors='red')
    ax3 = ax[1].twiny()
    ax3.plot(npss, depth, label='NPSS', color='blue', ls='-', lw=1)
    ax3.set_xlim(0.6, 0)
    ax3.tick_params(axis='x', colors='blue')
    ax4 = ax[1].twiny()
    ax4.plot(vsh, depth, label='VSH', color='gray', ls='-', lw=2, alpha=0.75)
    ax4.set_xlim(0,1)
    ax4.xaxis.set_major_locator(plt.MaxNLocator(4))
    ax4.tick_params(axis='x', colors='gray')
    ax4.spines['top'].set_position(('outward', 20))
    ax[2].plot(phitdiff, depth, label='phitd_npss', color='black', ls='-', lw=1)
    ax[2].invert_yaxis()
    ax[2].set_xlim(-0.5,0.5)
    ax[2].vlines(0, depth.min(), depth.max(), color='gray', ls='--', lw=1)
# phit_recalc_display(phit10_set1, 'H01Y')