# Libs

In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
from sklearn.metrics.pairwise import euclidean_distances
from tqdm import tqdm
import statistics
import math
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler

from scipy.linalg import solve
from itertools import product
from sklearn.metrics import mean_squared_error as MSE

pd.options.display.precision = 3
pd.options.display.float_format = lambda x: '%.5f' % x
pd.options.display.max_columns = 15
pd.options.display.max_rows = 6

# Data uploading

In [2]:
df_bal8_v4 = pd.read_csv('C:\jupyter\SPP\inputoutput\general_logs\df_bal8_azr_v4.csv')
df_bal8_v4.columns = df_bal8_v4.columns.str.lower()
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII sand','formation'] = '1_bal8_sand'
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII 25','formation'] = '2_bal8_25'
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII 20','formation'] = '3_bal8_20'
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII 15','formation'] = '4_bal8_15'
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII 10','formation'] = '5_bal8_10'
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII 5','formation'] = '6_bal8_5'
well_phit_flag8 = df_bal8_v4[df_bal8_v4.phit_flag==1].groupby('well')['phit_flag'].apply(lambda x: x.iloc[0]).reset_index().well.unique()
df_bal8_v4_flag = df_bal8_v4[df_bal8_v4.well.isin(well_phit_flag8)]

df_bal10_v4 = pd.read_csv('C:\jupyter\SPP\inputoutput\general_logs\df_bal10_vshclp2_v4.csv')
df_bal10_v4.columns = df_bal10_v4.columns.str.lower()
df_bal10_v4.loc[df_bal10_v4.formation=='Balakhany X sand','formation'] = '1_bal10_sand'
df_bal10_v4.loc[df_bal10_v4.formation=='Balakhany X 50','formation'] = '2_bal10_40'
df_bal10_v4.loc[df_bal10_v4.formation=='Balakhany X 40','formation'] = '2_bal10_40'
df_bal10_v4.loc[df_bal10_v4.formation=='Balakhany X 20','formation'] = '3_bal10_20'
well_phit_flag10 = df_bal10_v4[df_bal10_v4.phit_flag==1].groupby('well')['phit_flag'].apply(lambda x: x.iloc[0]).reset_index().well.unique()
df_bal10_v4_flag = df_bal10_v4[df_bal10_v4.well.isin(well_phit_flag10)]

In [3]:
ntd_top_phi_bot8_bp_v4 = pd.read_csv(r'C:\jupyter\SPP\inputoutput\layers\ntd_top_phi_bot8_bp_v4.csv').drop('Unnamed: 0', axis=1)
ntd_top_phi_bot8_bp_v4.columns = ntd_top_phi_bot8_bp_v4.columns.str.lower()
ntd_top_phi_bot10_bp_v4 = pd.read_csv(r'C:\jupyter\SPP\inputoutput\layers\ntd_top_phi_bot10_bp_v4.csv').drop('Unnamed: 0', axis=1)
ntd_top_phi_bot10_bp_v4.columns = ntd_top_phi_bot10_bp_v4.columns.str.lower()

In [4]:
def well_dist_calc(dataset, fm):
    data = dataset.groupby('well')[['xmean', 'ymean']].first().reset_index().dropna()
    row_name = data.well.reset_index().drop(['index'], axis=1)
    distance_fm = pd.DataFrame(euclidean_distances(data[['xmean', 'ymean']]), columns=list(data.well))
    distance_fm_well = distance_fm.join(row_name).set_index('well')
    distance_fm_well = distance_fm_well.reset_index()
    dist_melt = distance_fm_well.melt(id_vars='well', 
                                var_name='well2', 
                                value_name='dist').rename(columns={'well':'well_offset', 'well2':'well'})
    dist_melt = dist_melt[['well', 'well_offset', 'dist']]
    dist_melt = dist_melt[dist_melt.dist != 0].sort_values(by=['well','dist'])
    dist_melt['FORMATION_up'] = fm
    return dist_melt

dist_bal8 = well_dist_calc(df_bal8_v4_flag, 'Balakhany VIII')
dist_bal10 = well_dist_calc(df_bal10_v4_flag, 'Balakhany X')

# Dashboard

In [None]:
dist_bal8

In [6]:
def well_dist_calc(dataset, fm):
    data = dataset.groupby('well')[['xmean', 'ymean']].first().reset_index().dropna()
    row_name = data.well.reset_index().drop(['index'], axis=1)
    distance_fm = pd.DataFrame(euclidean_distances(data[['xmean', 'ymean']]), columns=list(data.well))
    distance_fm_well = distance_fm.join(row_name).set_index('well')
    distance_fm_well = distance_fm_well.reset_index()
    dist_melt = distance_fm_well.melt(  id_vars='well', 
                                        var_name='well2', 
                                        value_name='dist').rename(columns={'well':'well_offset', 'well2':'well'})
    dist_melt = dist_melt[['well', 'well_offset', 'dist']]
    dist_melt = dist_melt[dist_melt.dist != 0].sort_values(by=['well','dist'])
    dist_melt['formation_up'] = fm
    return dist_melt

def well_list_selection(df, well_n):
    df_khtst = df.groupby('well')[['khtst', 'field']].first().reset_index()
    df_khtst = df_khtst[df_khtst.khtst != 0]
    df_khtst = df_khtst.sort_values(by=['khtst', 'field'], ascending=False)[:well_n-1].reset_index(drop=True)
    return df_khtst

def well_offset_comparison_dashboard(dataset_wells, dataset_layers, dist_df,  well_target, offset_qty, fm_name, print_flag):
    offset_well_list = dist_df[dist_df.well == well_target].iloc[:offset_qty]['well_offset'].values.tolist()
    # offset_well_list = dist_df[dist_df.well == well_target].iloc[:offset_qty]
    well_list = [well_target] + offset_well_list
    data_logs = dataset_wells[(dataset_wells.well.isin(well_list)) & (dataset_wells.phit != 0)]
    data_layers = dataset_layers[   (dataset_layers.well.isin(well_list)) & 
                                    (dataset_layers.htst > 1)]
    khtst_logs = data_logs.groupby(['well','formation'])[['khtst']].apply(lambda x: x.iloc[0] - x.iloc[-1]).reset_index()

    def khtst_layer_calculation(data_logs):
        data = data_logs[data_logs.net == 1]
        df_lst = []
        for wellname in data.well.unique():
            well_data = data_logs[data_logs.well == wellname]
            well_data['tst_index_rev'] = [i for i in range(len(well_data['tst']))[::-1]]
            df_lst.append(well_data)
        data_logs_khtst = pd.concat(df_lst)
        return data_logs_khtst
    data_logs_khtst = khtst_layer_calculation(data_logs)

    def well_dist_title(dist_df):
        offset_well_list = dist_df[dist_df.well == well_target].iloc[:offset_qty]
        well = offset_well_list['well'].iloc[0]
        well1 = offset_well_list.iloc[0,1]
        dist1 = offset_well_list.iloc[0,2].round(0).astype(int)
        well2 = offset_well_list.iloc[1,1]
        dist2 = offset_well_list.iloc[1,2].round(0).astype(int)
        well3 = offset_well_list.iloc[2,1]
        dist3 = offset_well_list.iloc[2,2].round(0).astype(int)
        return f"target well {well} : offsets {well1} - {dist1}m orange; {well2} - {dist2}m green; {well3} - {dist3}m blue;"

    fig = plt.figure(figsize=(22, 10))
    gs = gridspec.GridSpec(2, 4, figure=fig)
    ax1 = fig.add_subplot(gs[0, 0])
    ax2 = fig.add_subplot(gs[0, 1])
    ax3 = fig.add_subplot(gs[0, 2])
    ax4 = fig.add_subplot(gs[0, 3])
    ax5 = fig.add_subplot(gs[1, :3])

    custom_palette = {well_target: 'red', offset_well_list[0]: 'orange', offset_well_list[1]: 'green', offset_well_list[2]: '#0797eb'}
    sns.histplot(data=data_logs, x='phit', hue='well', bins=50, kde=True, ax=ax1, palette=custom_palette)
    sns.scatterplot(data=data_layers, x='htst', y='perm_avg', hue='well', s=75, ax=ax2, alpha=0.5, ec='black', palette=custom_palette)
    sns.lineplot(data=data_logs_khtst, x='tst_index_rev', y='khtst', hue='well', ax=ax3, palette=custom_palette)
    sns.barplot(data = khtst_logs, x='formation', y='khtst', hue='well', ax=ax4, palette=custom_palette)
    ax1.set_yticklabels(ax1.get_yticklabels(), rotation=90, va='center')
    ax2.set_yscale('log')
    ax2.grid(True, which='both', linestyle='--', linewidth=0.5)
    ax2.set_yticklabels(ax2.get_yticklabels(), rotation=90, va='center')
    ax3.grid(True, which='both', linestyle='--', linewidth=0.5)
    ax3.set_yticklabels(ax3.get_yticklabels(), rotation=90, va='center')

    x = np.arange(len(khtst_logs.formation.unique()))
    fms = khtst_logs.formation.unique()
    ax4.set_xticks(x, fms, rotation=45, fontsize=6)
    ax4.set_yticklabels(ax4.get_yticklabels(), rotation=90, va='center')

    offset_well_list = dist_df[dist_df.well == well_target].iloc[:offset_qty]['well_offset'].values.tolist()
    x = dataset_wells[dataset_wells.phit_flag == 1]['xmean']
    y = dataset_wells[dataset_wells.phit_flag == 1]['ymean']
    x_target = dataset_wells[dataset_wells.well == well_target]['xmean'].iloc[0]
    y_target = dataset_wells[dataset_wells.well == well_target]['ymean'].iloc[0]
    x_well1 = dataset_wells[dataset_wells.well == offset_well_list[0]]['xmean'].iloc[0]
    y_well1 = dataset_wells[dataset_wells.well == offset_well_list[0]]['ymean'].iloc[0]
    x_well2 = dataset_wells[dataset_wells.well == offset_well_list[1]]['xmean'].iloc[0]
    y_well2 = dataset_wells[dataset_wells.well == offset_well_list[1]]['ymean'].iloc[0]
    x_well3 = dataset_wells[dataset_wells.well == offset_well_list[2]]['xmean'].iloc[0]
    y_well3 = dataset_wells[dataset_wells.well == offset_well_list[2]]['ymean'].iloc[0]
    ax5.scatter(x, y, color='gray', s=10)
    ax5.scatter(x_target, y_target, color='red', s=50, ec='black')
    ax5.scatter(x_well1, y_well1, color='orange')
    ax5.scatter(x_well2, y_well2, color='green')
    ax5.scatter(x_well3, y_well3, color='#0797eb')

    plt.suptitle(well_dist_title(dist_df), fontsize=16, y=0.92, x=0.32)
    if print_flag == 'print':
        plt.savefig(f'C:/jupyter/SPP/plots/offset_dashboard/{fm_name}_{well_target}_offset_dashboard.png');

In [None]:
df_bal8_v4_flag = df_bal8_v4_flag[~df_bal8_v4_flag.well.isin(['E31Z', 'D01Z'])]
dist_bal8 = well_dist_calc(df_bal8_v4_flag, 'Balakhany VIII').round(0)
df_bal8_khtst = well_list_selection(df_bal8_v4, 15).round(0)
for wellname in df_bal8_khtst.well:
    try:
        well_offset_comparison_dashboard(df_bal8_v4_flag, ntd_top_phi_bot8_bp_v4, dist_bal8, wellname, 3, 'bal8','dontprint')
    except:
        print(f"error in {wellname}")

In [None]:
df_bal10_v4_flag = df_bal10_v4_flag[~df_bal10_v4_flag.well.isin(['E31Z', 'D01Z'])]
dist_bal10 = well_dist_calc(df_bal10_v4_flag, 'Balakhany X').round(0)
df_bal10_khtst = well_list_selection(df_bal10_v4_flag, 15).round(0)
for wellname in df_bal10_khtst.well:
    try:
        well_offset_comparison_dashboard(df_bal10_v4_flag, ntd_top_phi_bot10_bp_v4, dist_bal10, wellname, 3, 'bal10','dontprint')
    except:
        print(f"error in {wellname}")

# Best quantity of offset wells

## Bal8

In [9]:
df_bal8_v4 = pd.read_csv('C:\jupyter\SPP\inputoutput\general_logs\df_bal8_azr_v4.csv')
df_bal8_v4.columns = df_bal8_v4.columns.str.lower()
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII sand','formation'] = '1_bal8_sand'
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII 25','formation'] = '2_bal8_25'
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII 20','formation'] = '3_bal8_20'
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII 15','formation'] = '4_bal8_15'
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII 10','formation'] = '5_bal8_10'
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII 5','formation'] = '6_bal8_5'
df_bal8_v4 = df_bal8_v4[df_bal8_v4.phit_flag == 1]
df_bal8_v4 = df_bal8_v4[~df_bal8_v4.well.isin(['D01Z'])]
dist_bal8 = well_dist_calc(df_bal8_v4, 'Balakhany VIII').round(0)

In [10]:
def add_khtst_dist_df(df_logs, df_dist):
    df_lst = []
    for well_offset in df_dist.well_offset.unique():
        khtst = df_logs[df_logs.well == well_offset][['well','khtst']].iloc[0].values
        df_lst.append(khtst)
    khtst_dist = pd.DataFrame(df_lst, columns=['well', 'khtst'])
    dist_v2 = (df_dist.merge(khtst_dist, left_on='well_offset', right_on='well', how='left')).merge(
        khtst_dist, left_on='well_x', right_on='well', how='left')
    dist_v2 = dist_v2[['well_x','khtst_y', 'well_offset', 'dist','khtst_x','formation_up']].rename(
                    columns={'well_x':'well', 'khtst_y':'khtst_target', 'khtst_x':'khtst_offset', 'dist':'dist_offset'})
    return dist_v2

def selection_quantity_well(df_dist_v2, q):
    df_lst = []
    for wellname in df_dist_v2.well.unique():
        well_set = df_dist_v2[df_dist_v2.well == wellname].iloc[:q]
        df_lst.append(well_set)
    dist_v3 = pd.concat(df_lst).reset_index(drop=True)
    return dist_v3

def assesment_kh_by_offset_avg(df_dist_v3, quality_range):
    df_lst = []
    for wellname in df_dist_v3.well.unique():
        data = df_dist_v3[df_dist_v3.well == wellname]
        weighted_average = np.average(data.khtst_offset.values, weights=data.dist_offset.values)
        data['weighted_average'] = weighted_average
        df_lst.append(data)
    dist_v4 = pd.concat(df_lst).reset_index(drop=True)
    dist_v4['khtst_target_high'] = dist_v4.khtst_target*(1 + quality_range)
    dist_v4['khtst_target_low'] = dist_v4.khtst_target*(1 - quality_range)
    dist_v4['khtst_pred_qc'] = 'in_range' # 0 - below low, 1 - within range, 2 - above high
    dist_v4.loc[dist_v4.weighted_average <= dist_v4.khtst_target_low, 'khtst_pred_qc'] = 'under_estimated'
    dist_v4.loc[dist_v4.weighted_average >= dist_v4.khtst_target_high, 'khtst_pred_qc'] = 'over_estimated'
    dist_v4 = dist_v4[[ 'well', 'khtst_target', 'well_offset', 'dist_offset', 'khtst_offset', 'formation_up',
                        'khtst_target_high', 'weighted_average','khtst_target_low', 'khtst_pred_qc']]
    result = pd.DataFrame(dist_v4[['well','khtst_pred_qc']].drop_duplicates().value_counts('khtst_pred_qc', normalize=True)).reset_index()
    result = result.sort_values(by='khtst_pred_qc', ascending=True).reset_index(drop=True)
    return dist_v4, result

In [None]:
df_range_lst = []
for q in tqdm(range(1,16)):
    dist_bal8_v2 = add_khtst_dist_df(df_bal8_v4, dist_bal8)
    dist_bal8_v3 = selection_quantity_well(dist_bal8_v2, q)
    dist_bal8_v4, result_bal8 = assesment_kh_by_offset_avg(dist_bal8_v3, 0.3)
    result_bal8['offset_qty'] = q
    df_range_lst.append(result_bal8)

df_range = pd.concat(df_range_lst).reset_index(drop=True)
sns.lineplot(data=df_range, x='offset_qty', y='proportion', hue='khtst_pred_qc')
plt.grid()
plt.title('Prediction by weighted avg offset 0.30')

In [None]:
df_range_lst = []
for q in tqdm(range(1,16)):
    dist_bal8_v2 = add_khtst_dist_df(df_bal8_v4, dist_bal8)
    dist_bal8_v3 = selection_quantity_well(dist_bal8_v2, q)
    dist_bal8_v4, result_bal8 = assesment_kh_by_offset_avg(dist_bal8_v3, 0.25)
    result_bal8['offset_qty'] = q
    df_range_lst.append(result_bal8)

df_range = pd.concat(df_range_lst).reset_index(drop=True)
sns.lineplot(data=df_range, x='offset_qty', y='proportion', hue='khtst_pred_qc')
plt.grid()
plt.title('Prediction by weighted avg offset 0.25')

In [None]:
df_range_lst = []
for q in tqdm(range(1,21)):
    dist_bal8_v2 = add_khtst_dist_df(df_bal8_v4, dist_bal8)
    dist_bal8_v3 = selection_quantity_well(dist_bal8_v2, q)
    dist_bal8_v4, result_bal8 = assesment_kh_by_offset_avg(dist_bal8_v3, 0.1)
    result_bal8['offset_qty'] = q
    df_range_lst.append(result_bal8)

df_range = pd.concat(df_range_lst).reset_index(drop=True)
sns.lineplot(data=df_range, x='offset_qty', y='proportion', hue='khtst_pred_qc')
plt.grid()
plt.title('Prediction by weighted avg offset 0.10')

## Bal 10

In [14]:
df_bal10_v4 = pd.read_csv('C:\jupyter\SPP\inputoutput\general_logs\df_bal10_vshclp2_v4.csv')
df_bal10_v4.columns = df_bal10_v4.columns.str.lower()
df_bal10_v4.loc[df_bal10_v4.formation=='Balakhany X sand','formation'] = '1_bal10_sand'
df_bal10_v4.loc[df_bal10_v4.formation=='Balakhany X 50','formation'] = '2_bal10_40'
df_bal10_v4.loc[df_bal10_v4.formation=='Balakhany X 40','formation'] = '2_bal10_40'
df_bal10_v4.loc[df_bal10_v4.formation=='Balakhany X 20','formation'] = '3_bal10_20'
well_phit_flag10 = df_bal10_v4[df_bal10_v4.phit_flag==1].groupby('well')['phit_flag'].apply(lambda x: x.iloc[0]).reset_index().well.unique()
df_bal10_v4_flag = df_bal10_v4[df_bal10_v4.well.isin(well_phit_flag10)]
dist_bal10 = well_dist_calc(df_bal10_v4_flag, 'Balakhany X').round(0)

In [None]:
df_range_lst = []
for q in tqdm(range(1,16)):
    dist_bal10_v2 = add_khtst_dist_df(df_bal10_v4, dist_bal10)
    dist_bal10_v3 = selection_quantity_well(dist_bal10_v2, q)
    dist_bal10_v4, result_bal10 = assesment_kh_by_offset_avg(dist_bal10_v3, 0.3)
    result_bal10['offset_qty'] = q
    df_range_lst.append(result_bal10)

df_range = pd.concat(df_range_lst).reset_index(drop=True)
sns.lineplot(data=df_range, x='offset_qty', y='proportion', hue='khtst_pred_qc')
plt.grid()
plt.title('Prediction by weighted avg offset 0.30')

In [None]:
df_range_lst = []
for q in tqdm(range(1,16)):
    dist_bal10_v2 = add_khtst_dist_df(df_bal10_v4, dist_bal10)
    dist_bal10_v3 = selection_quantity_well(dist_bal10_v2, q)
    dist_bal10_v4, result_bal10 = assesment_kh_by_offset_avg(dist_bal10_v3, 0.25)
    result_bal10['offset_qty'] = q
    df_range_lst.append(result_bal10)

df_range = pd.concat(df_range_lst).reset_index(drop=True)
sns.lineplot(data=df_range, x='offset_qty', y='proportion', hue='khtst_pred_qc')
plt.grid()
plt.title('Prediction by weighted avg offset 0.25')

In [None]:
df_range_lst = []
for q in tqdm(range(1,16)):
    dist_bal10_v2 = add_khtst_dist_df(df_bal10_v4, dist_bal10)
    dist_bal10_v3 = selection_quantity_well(dist_bal10_v2, q)
    dist_bal10_v4, result_bal10 = assesment_kh_by_offset_avg(dist_bal10_v3, 0.10)
    result_bal10['offset_qty'] = q
    df_range_lst.append(result_bal10)

df_range = pd.concat(df_range_lst).reset_index(drop=True)
sns.lineplot(data=df_range, x='offset_qty', y='proportion', hue='khtst_pred_qc')
plt.grid()
plt.title('Prediction by weighted avg offset 0.10')

# Variance inside a well set

Variance formula:
$$ [ s^2 = \frac{\sum_{i=1}^{n} (x_i - \bar{x})^2}{n-1} ]

In [None]:
q_lst = []
for q in [3]:
    dist_bal8_v2 = add_khtst_dist_df(df_bal8_v4, dist_bal8)
    dist_bal8_v3 = selection_quantity_well(dist_bal8_v2, q)
    dist_bal8_v4, result_bal8 = assesment_kh_by_offset_avg(dist_bal8_v3, 0.25)
    df_lst = []
    for well in dist_bal8_v4.well.unique():
        data = dist_bal8_v4[dist_bal8_v4.well == well]
        khtst_full = np.hstack([data.khtst_offset.values,data.khtst_target.values[0]])
        df = pd.DataFrame({'well': [well], 'khtst_variance': [statistics.variance(khtst_full)], 'khtst_target': [data.khtst_target.iloc[0]]})
        df_lst.append(df)
    result_variance = pd.concat(df_lst).reset_index(drop=True)
    result_variance['offset_qty'] = q
    q_lst.append(result_variance)

result_variance = pd.concat(q_lst).reset_index(drop=True)
sns.scatterplot(data=result_variance, x='khtst_target', y='khtst_variance', hue='offset_qty', palette="bright")
plt.grid()

In [None]:
X = result_variance[['khtst_target', 'khtst_variance']]
X_scaled = StandardScaler().fit_transform(X)
outlier_scan = DBSCAN(eps=0.78, min_samples=10)
clusters = outlier_scan.fit_predict(X_scaled)
result_variance['cluster'] = clusters
sns.scatterplot(data=result_variance, x='khtst_target', y='khtst_variance', hue='cluster', palette="viridis")

In [None]:
bal8_xy = df_bal8_v4[['well','xmean','ymean']].drop_duplicates().reset_index(drop=True)
result_variance_v2 = result_variance.set_index('well').join(bal8_xy.set_index('well')).reset_index()
plt.subplots(figsize=(15,7))
sns.scatterplot(data=result_variance_v2, x='xmean', y='ymean', hue='cluster', palette="viridis")

In [None]:
big_variance = result_variance[result_variance.cluster == -1]
big_variance.well.unique()

In [None]:
for wellname in big_variance.well:
    try:
        well_offset_comparison_dashboard(df_bal8_v4_flag, ntd_top_phi_bot8_bp_v4, dist_bal8, wellname, 3, 'bal8','dontprint')
    except:
        print(f"error in {wellname}")

# Oriented offsets

In [23]:
def add_xykh_dist_df(df_logs, df_dist):
    df_lst = []
    for well_offset in df_dist.well_offset.unique():
        xy = df_logs[df_logs.well == well_offset][['well','khtst','xmean','ymean']].iloc[0].values
        df_lst.append(xy)
    xy_dist = pd.DataFrame(df_lst, columns=['well', 'khtst', 'xmean', 'ymean'])
    dist_v2 = (df_dist.merge(xy_dist, left_on='well_offset', right_on='well', how='left')).merge(
        xy_dist, left_on='well_x', right_on='well', how='left')
    dist_v2 = dist_v2[['well_x','xmean_y', 'ymean_y', 'khtst_y', 'well_offset', 'dist', 'xmean_x', 'ymean_x', 'khtst_x', 'formation_up']].rename(
                    columns={'well_x':'well', 'xmean_y':'x_target', 'ymean_y':'y_target', 'khtst_y':'khtst_target', 
                             'dist':'dist_offset', 'xmean_x':'x_offset', 'ymean_x':'y_offset', 'khtst_x':'khtst_offset'})
    return dist_v2
def add_angle_dist_df(df):
    df_lst = []
    for idx, row in df.iterrows():
        x1 = row.x_target
        y1 = row.y_target
        x2 = row.x_offset
        y2 = row.y_offset

        def calculate_angle(x1, y1, x2, y2):
            angle = math.atan2(x2 - x1, y2 - y1)
            angle_degrees = math.degrees(angle)
            if angle_degrees < 0:
                angle_degrees += 360
            return round(angle_degrees,0)
        
        angle = calculate_angle(x1, y1, x2, y2)
        df_lst.append(angle)
    df['angle'] = df_lst
    return xy
def selection_quantity_well(df_dist_v2, q):
    df_lst = []
    for wellname in df_dist_v2.well.unique():
        well_set = df_dist_v2[df_dist_v2.well == wellname].iloc[:q]
        df_lst.append(well_set)
    dist_v3 = pd.concat(df_lst).reset_index(drop=True)
    return dist_v3
def sector_assignment(bean, xy):
    a = (90 - bean)/2
    sector_1_3 = [0+a, 90-a, 180+a, 270-a]
    sector_2_4 = [90+a, 180-a, 270+a, 360-a]
    xy_sector_1_3 = xy[((xy.angle >=sector_1_3[0]) & (xy.angle <= sector_1_3[1])) | 
                    ((xy.angle >= sector_1_3[2]) & (xy.angle <= sector_1_3[3]))]
    xy_sector_1_3['sector'] = '1_3'
    xy_sector_2_4 = xy[((xy.angle >=sector_2_4[0]) & (xy.angle <= sector_2_4[1])) |
                        ((xy.angle >= sector_2_4[2]) & (xy.angle <= sector_2_4[3]))]
    xy_sector_2_4['sector'] = '2_4'
    return xy_sector_1_3, xy_sector_2_4
def assesment_kh_by_offset_avg(df_dist_v3, quality_range):
    df_lst = []
    for wellname in df_dist_v3.well.unique():
        data = df_dist_v3[df_dist_v3.well == wellname]
        weighted_average = np.average(data.khtst_offset.values, weights=data.dist_offset.values)
        data['weighted_average'] = weighted_average
        df_lst.append(data)
    dist_v4 = pd.concat(df_lst).reset_index(drop=True)
    dist_v4['khtst_target_high'] = dist_v4.khtst_target*(1 + quality_range)
    dist_v4['khtst_target_low'] = dist_v4.khtst_target*(1 - quality_range)
    dist_v4['khtst_pred_qc'] = 'in_range' # 0 - below low, 1 - within range, 2 - above high
    dist_v4.loc[dist_v4.weighted_average <= dist_v4.khtst_target_low, 'khtst_pred_qc'] = 'under_estimated'
    dist_v4.loc[dist_v4.weighted_average >= dist_v4.khtst_target_high, 'khtst_pred_qc'] = 'over_estimated'
    dist_v4 = dist_v4[[ 'well', 'khtst_target', 'well_offset', 'dist_offset', 'khtst_offset', 'formation_up',
                        'khtst_target_high', 'weighted_average','khtst_target_low', 'khtst_pred_qc']]
    result = pd.DataFrame(dist_v4[['well','khtst_pred_qc']].drop_duplicates().value_counts('khtst_pred_qc', normalize=True)).reset_index()
    result = result.sort_values(by='khtst_pred_qc', ascending=True).reset_index(drop=True)
    return dist_v4, result

In [None]:
bean = 60
df_range_lst_13 = []
df_range_lst_24 = []
for q in tqdm(range(1,16)):
    xy = add_xykh_dist_df(df_bal8_v4, dist_bal8).round(0)
    xy_v2 = add_angle_dist_df(xy)
    xy_sector_1_3, xy_sector_2_4 = sector_assignment(bean, xy_v2)
    xy_s13_v3 = selection_quantity_well(xy_sector_1_3, q)
    xy_s24_v3 = selection_quantity_well(xy_sector_2_4, q)
    xy_s13_v4, result_bal8_13 = assesment_kh_by_offset_avg(xy_s13_v3, 0.25)
    result_bal8_13['offset_qty'] = q
    df_range_lst_13.append(result_bal8_13)
    xy_s24_v4, result_bal8_24 = assesment_kh_by_offset_avg(xy_s24_v3, 0.25)
    result_bal8_24['offset_qty'] = q
    df_range_lst_24.append(result_bal8_24)

df_range_13 = pd.concat(df_range_lst_13).reset_index(drop=True)
fig, ax = plt.subplots(1,2, figsize=(14,4))
sns.lineplot(data=df_range_13, x='offset_qty', y='proportion', hue='khtst_pred_qc', ax=ax[0])
ax[0].grid()
ax[0].set_title(f'Prediction sector 1-3 bean={bean} by weighted avg offset 0.25')

df_range_24 = pd.concat(df_range_lst_24).reset_index(drop=True)
sns.lineplot(data=df_range_24, x='offset_qty', y='proportion', hue='khtst_pred_qc', ax=ax[1])
ax[1].grid()
ax[1].set_title(f'Prediction sector 2-4 bean={bean} by weighted avg offset 0.25')

In [None]:
dist_bal8_v2 = add_khtst_dist_df(df_bal8_v4, dist_bal8)
dist_bal8_v3 = selection_quantity_well(dist_bal8_v2, q)
dist_bal8_v4, result_bal8 = assesment_kh_by_offset_avg(dist_bal8_v3, 0.25)
result_bal8['offset_qty'] = q
result_bal8

In [None]:
def well_offset_sector(wellname, xy_s13_v3, xy_s24_v3):
    test = pd.concat([xy_s13_v3[xy_s13_v3.well == wellname], xy_s24_v3[xy_s24_v3.well == wellname]])
    test_target = test[['x_target','y_target']].iloc[0].values
    test_offset = test[['x_offset','y_offset','sector']]

    xy_all_wells8 = df_bal8_v4.groupby('well')[['xmean','ymean']].first().reset_index()
    plt.subplots(figsize=(15,8))
    sns.scatterplot(data=xy_all_wells8, x='xmean', y='ymean', s=50, color='gray')
    sns.scatterplot(data=test.iloc[:1], x='x_target', y='y_target', s=50, color='red')
    sns.scatterplot(data=test_offset, x='x_offset', y='y_offset', s=50, hue='sector', color='green')
    for index, row in test.iterrows():
        plt.text(row['x_offset'], row['y_offset'], row['well_offset'], color='black', ha='right', fontsize=6)
    plt.text(test_target[0], test_target[1], wellname, color='red', ha='right', fontsize=6)
well_offset_sector('B02', xy_s13_v3, xy_s24_v3)

# Well pairs

In [None]:
df_lst = []
for wellname in dist_bal8.well.unique():
    data = dist_bal8[dist_bal8.well == wellname].iloc[0:1]
    df_lst.append(data)
well_pairs = pd.concat(df_lst).reset_index(drop=True).drop_duplicates(subset=['dist'])
well_pairs_v2 = well_pairs[well_pairs.dist < 500] # 235m is the max distance between wells selected by elbow plot with distance 500m
# well_pairs.hist(column='dist', bins=50)
well_pairs_v2.sort_values(by='dist', ascending=True)

In [None]:
fig = plt.figure(figsize=(20, 5))
sns.lineplot(data=well_pairs_v2.sort_values(by='dist', ascending=False), x='well', y='dist')
plt.xticks(rotation=90)
plt.grid()

In [None]:
df_lst = []
for wellname in dist_bal8.well.unique():
    data = dist_bal8[dist_bal8.well == wellname].iloc[0:1]
    df_lst.append(data)
well_pairs = pd.concat(df_lst).reset_index(drop=True).drop_duplicates(subset=['dist'])
well_pairs_v3 = well_pairs[well_pairs.dist < 230] # 235m is the max distance between wells selected by elbow plot with distance 500m
# well_pairs.hist(column='dist', bins=50)
well_pairs_v3 = well_pairs_v3.sort_values(by='dist', ascending=True).reset_index(drop=True)
well_pairs_v3

In [None]:
df_bal8_v4_flag

In [None]:
df_bal8_v4_flag[df_bal8_v4_flag.well.isin(['D01'])][['well', 'tst']].iloc[0].values[1] - \
df_bal8_v4_flag[df_bal8_v4_flag.well.isin(['D01'])][['well', 'tst']].iloc[-1].values[1]

In [None]:
df_bal8_v4_flag[df_bal8_v4_flag.well.isin(['D09'])][['well', 'tst']].iloc[0].values[1] - \
df_bal8_v4_flag[df_bal8_v4_flag.well.isin(['D09'])][['well', 'tst']].iloc[-1].values[1]

In [None]:
print(  len(df_bal8_v4_flag[df_bal8_v4_flag.well == 'D01'][['well', 'tst']]),
        len(df_bal8_v4_flag[df_bal8_v4_flag.well == 'D09'][['well', 'tst']]))

In [None]:
def well_offset_comparison_dashboard_pairs(dataset_wells, dataset_layers, dist_df,  well_target, offset_qty, fm_name, print_flag):
    offset_well_list = dist_df[dist_df.well == well_target].iloc[:offset_qty]['well_offset'].values.tolist()
    # offset_well_list = dist_df[dist_df.well == well_target].iloc[:offset_qty]
    well_list = [well_target] + offset_well_list
    data_logs = dataset_wells[(dataset_wells.well.isin(well_list)) & (dataset_wells.phit != 0)]
    data_layers = dataset_layers[   (dataset_layers.well.isin(well_list)) & 
                                    (dataset_layers.htst > 1)]
    khtst_logs = data_logs.groupby(['well','formation'])[['khtst']].apply(lambda x: x.iloc[0] - x.iloc[-1]).reset_index()

    def khtst_layer_calculation(data_logs):
        data = data_logs[data_logs.net == 1]
        df_lst = []
        for wellname in data.well.unique():
            well_data = data_logs[data_logs.well == wellname]
            well_data['tst_index_rev'] = [i for i in range(len(well_data['tst']))[::-1]]
            df_lst.append(well_data)
        data_logs_khtst = pd.concat(df_lst)
        return data_logs_khtst
    data_logs_khtst = khtst_layer_calculation(data_logs)

    def well_dist_title(dist_df):
        offset_well_list = dist_df[dist_df.well == well_target].iloc[:offset_qty]
        well = offset_well_list['well'].iloc[0]
        well1 = offset_well_list.iloc[0,1]
        dist1 = offset_well_list.iloc[0,2].round(0).astype(int)
        # well2 = offset_well_list.iloc[1,1]
        # dist2 = offset_well_list.iloc[1,2].round(0).astype(int)
        # well3 = offset_well_list.iloc[2,1]
        # dist3 = offset_well_list.iloc[2,2].round(0).astype(int)
        return f"target well {well} : offsets {well1} - {dist1}m orange;"

    fig = plt.figure(figsize=(22, 10))
    gs = gridspec.GridSpec(2, 4, figure=fig)
    ax1 = fig.add_subplot(gs[0, 0])
    ax2 = fig.add_subplot(gs[0, 1])
    ax3 = fig.add_subplot(gs[0, 2])
    ax4 = fig.add_subplot(gs[0, 3])
    ax5 = fig.add_subplot(gs[1, :3])

    custom_palette = {well_target: 'red', offset_well_list[0]: 'orange'}
    sns.histplot(data=data_logs, x='phit', hue='well', bins=50, kde=True, ax=ax1, palette=custom_palette)
    sns.scatterplot(data=data_layers, x='htst', y='perm_avg', hue='well', s=75, ax=ax2, alpha=0.5, ec='black', palette=custom_palette)
    sns.lineplot(data=data_logs_khtst, x='tst_index_rev', y='khtst', hue='well', ax=ax3, palette=custom_palette)
    sns.barplot(data = khtst_logs, x='formation', y='khtst', hue='well', ax=ax4, palette=custom_palette)
    ax1.set_yticklabels(ax1.get_yticklabels(), rotation=90, va='center')
    ax2.set_yscale('log')
    ax2.grid(True, which='both', linestyle='--', linewidth=0.5)
    ax2.set_yticklabels(ax2.get_yticklabels(), rotation=90, va='center')
    ax3.grid(True, which='both', linestyle='--', linewidth=0.5)
    ax3.set_yticklabels(ax3.get_yticklabels(), rotation=90, va='center')

    x = np.arange(len(khtst_logs.formation.unique()))
    fms = khtst_logs.formation.unique()
    ax4.set_xticks(x, fms, rotation=45, fontsize=6)
    ax4.set_yticklabels(ax4.get_yticklabels(), rotation=90, va='center')

    offset_well_list = dist_df[dist_df.well == well_target].iloc[:offset_qty]['well_offset'].values.tolist()
    x = dataset_wells[dataset_wells.phit_flag == 1]['xmean']
    y = dataset_wells[dataset_wells.phit_flag == 1]['ymean']
    x_target = dataset_wells[dataset_wells.well == well_target]['xmean'].iloc[0]
    y_target = dataset_wells[dataset_wells.well == well_target]['ymean'].iloc[0]
    x_well1 = dataset_wells[dataset_wells.well == offset_well_list[0]]['xmean'].iloc[0]
    y_well1 = dataset_wells[dataset_wells.well == offset_well_list[0]]['ymean'].iloc[0]
    # x_well2 = dataset_wells[dataset_wells.well == offset_well_list[1]]['xmean'].iloc[0]
    # y_well2 = dataset_wells[dataset_wells.well == offset_well_list[1]]['ymean'].iloc[0]
    # x_well3 = dataset_wells[dataset_wells.well == offset_well_list[2]]['xmean'].iloc[0]
    # y_well3 = dataset_wells[dataset_wells.well == offset_well_list[2]]['ymean'].iloc[0]
    ax5.scatter(x, y, color='gray', s=10)
    ax5.scatter(x_target, y_target, color='red', s=50, ec='black')
    ax5.scatter(x_well1, y_well1, color='orange')
    # ax5.scatter(x_well2, y_well2, color='green')
    # ax5.scatter(x_well3, y_well3, color='#0797eb')

    plt.suptitle(well_dist_title(dist_df), fontsize=16, y=0.92, x=0.32)
    if print_flag == 'print':
        plt.savefig(f'C:/jupyter/SPP/plots/offset_dashboard/{fm_name}_{well_target}_offset_dashboard.png');

df_bal8_v4_flag = df_bal8_v4_flag[~df_bal8_v4_flag.well.isin(['E31Z', 'D01Z'])]
dist_bal8 = well_dist_calc(df_bal8_v4_flag, 'Balakhany VIII').round(0)
for wellname in well_pairs_v3.well:
    try:
        well_offset_comparison_dashboard_pairs(df_bal8_v4_flag, ntd_top_phi_bot8_bp_v4, well_pairs_v3, wellname, 1, 'bal8','dontprint')
    except:
        print(f"error in {wellname}")

# Well URL

In [35]:
import os

def get_file_paths(directory):
    file_paths = []
    for filename in os.listdir(directory):
        full_path = os.path.join(directory, filename)
        if os.path.isfile(full_path):  # Ensure it's a file, not a directory
            file_paths.append(full_path)
    return file_paths

directory_path = 'C:\\jupyter\\SPP\\plots\\logs_vsh_bal8'
file_paths = get_file_paths(directory_path)
df = pd.DataFrame(file_paths, columns=['file_path'])
df['well_old'] = df.file_path.str.split('\\').str[-1]
df['well'] = df.well_old.str.split('.').str[0]
df = df.drop(columns=['well_old'])
df_xy = df_bal8_v4[['well','xmean','ymean']].drop_duplicates().reset_index(drop=True)
df.merge(df_xy, on='well').to_csv('C:\\jupyter\\SPP\\inputoutput\\df_xy8.csv', index=False)

# Offset for channels wells

In [None]:
df_channel8 = pd.read_csv(r'C:\jupyter\SPP\output\channels\logs8_big_channels.csv')
xy = df_bal8_v4.groupby('well')[['xmean','ymean']].first().reset_index()
plt.subplots(figsize=(12,7))
sns.scatterplot(data=xy, x='xmean', y='ymean', s=70, color='green', ec='black', alpha=0.5)
sns.scatterplot(data=df_channel8, x='xmean', y='ymean', c='orange', s=35, ec='black');

In [None]:
df_bal8_v4_channel = df_bal8_v4_flag[df_bal8_v4_flag.well.isin(df_channel8.well.unique())]
dist_bal8_channel = well_dist_calc(df_bal8_v4_channel, 'Balakhany VIII').round(0)
df_bal8_khtst_channel = well_list_selection(df_bal8_v4_channel, 55).round(0)
for wellname in df_bal8_khtst_channel.well:
    try:
        well_offset_comparison_dashboard(df_bal8_v4_channel, ntd_top_phi_bot8_bp_v4, dist_bal8_channel, wellname, 3, 'bal8','dontprint')
    except:
        print(f"error in {wellname}")

In [38]:
def add_khtst_dist_df(df_logs, df_dist):
    df_lst = []
    for well_offset in df_dist.well_offset.unique():
        khtst = df_logs[df_logs.well == well_offset][['well','khtst']].iloc[0].values
        df_lst.append(khtst)
    khtst_dist = pd.DataFrame(df_lst, columns=['well', 'khtst'])
    dist_v2 = (df_dist.merge(khtst_dist, left_on='well_offset', right_on='well', how='left')).merge(
        khtst_dist, left_on='well_x', right_on='well', how='left')
    dist_v2 = dist_v2[['well_x','khtst_y', 'well_offset', 'dist','khtst_x','formation_up']].rename(
                    columns={'well_x':'well', 'khtst_y':'khtst_target', 'khtst_x':'khtst_offset', 'dist':'dist_offset'})
    return dist_v2

def selection_quantity_well(df_dist_v2, q):
    df_lst = []
    for wellname in df_dist_v2.well.unique():
        well_set = df_dist_v2[df_dist_v2.well == wellname].iloc[:q]
        df_lst.append(well_set)
    dist_v3 = pd.concat(df_lst).reset_index(drop=True)
    return dist_v3

def assesment_kh_by_offset_avg(df_dist_v3, quality_range):
    df_lst = []
    for wellname in df_dist_v3.well.unique():
        data = df_dist_v3[df_dist_v3.well == wellname]
        weighted_average = np.average(data.khtst_offset.values, weights=data.dist_offset.values)
        data['weighted_average'] = weighted_average
        df_lst.append(data)
    dist_v4 = pd.concat(df_lst).reset_index(drop=True)
    dist_v4['khtst_target_high'] = dist_v4.khtst_target*(1 + quality_range)
    dist_v4['khtst_target_low'] = dist_v4.khtst_target*(1 - quality_range)
    dist_v4['khtst_pred_qc'] = 'in_range' # 0 - below low, 1 - within range, 2 - above high
    dist_v4.loc[dist_v4.weighted_average <= dist_v4.khtst_target_low, 'khtst_pred_qc'] = 'under_estimated'
    dist_v4.loc[dist_v4.weighted_average >= dist_v4.khtst_target_high, 'khtst_pred_qc'] = 'over_estimated'
    dist_v4 = dist_v4[[ 'well', 'khtst_target', 'well_offset', 'dist_offset', 'khtst_offset', 'formation_up',
                        'khtst_target_high', 'weighted_average','khtst_target_low', 'khtst_pred_qc']]
    result = pd.DataFrame(dist_v4[['well','khtst_pred_qc']].drop_duplicates().value_counts('khtst_pred_qc', normalize=True)).reset_index()
    result = result.sort_values(by='khtst_pred_qc', ascending=True).reset_index(drop=True)
    return dist_v4, result

In [None]:
df_range_lst = []
for q in tqdm(range(1,16)):
    dist_bal8_ch_v2 = add_khtst_dist_df(df_bal8_v4_channel, dist_bal8_channel).round(0)
    dist_bal8_ch_v3 = selection_quantity_well(dist_bal8_ch_v2, q)
    dist_bal8_ch_v4, result_bal8_ch = assesment_kh_by_offset_avg(dist_bal8_ch_v3, 0.3)
    result_bal8_ch['offset_qty'] = q
    df_range_lst.append(result_bal8_ch)

df_range = pd.concat(df_range_lst).reset_index(drop=True)
sns.lineplot(data=df_range, x='offset_qty', y='proportion', hue='khtst_pred_qc')
plt.grid()
plt.title('Prediction by weighted avg offset 0.30')

# Phit+htst vs KHtst

In [40]:
# if PHI >=0.20:
#     PERM = (7.7925*((PHI*100)**2))-(29881.0*PHI)+2891.8
# elif PHI >= 0.16 and PHI < 0.2:
#     PERM = 0.00000002*(exp(PHI*105.56))      
# elif PHI < 0.16:
#     PERM = 0.0159*(exp(PHI*21.27))

In [41]:
khtst_ntd = ntd_top_phi_bot8_bp_v4.groupby('well')[['htst','khtst']].sum().reset_index()
khtst_ntd = khtst_ntd.rename(columns={'htst':'htst_sum','khtst':'khtst_cumsum'})
ntd_top_phi_bot8_bp_v4['htst*phit'] = ntd_top_phi_bot8_bp_v4['htst']*ntd_top_phi_bot8_bp_v4['phit_avg']
ntd8 = ntd_top_phi_bot8_bp_v4.set_index('well').join(khtst_ntd.set_index('well')).reset_index()
ntd8_v2 = ntd8.groupby('well')[['htst*phit','htst_sum','khtst_cumsum']].agg({'htst*phit':'sum','htst_sum':'first','khtst_cumsum':'first'}).reset_index()
ntd8_v2['phit_wavg'] = ntd8_v2['htst*phit']/ntd8_v2['htst_sum']
ntd8_v2 = ntd8_v2.drop(columns=['htst*phit'])
ntd8_v2.loc[ntd8_v2.phit_wavg >=0.2, 'perm'] = (7.7925*((ntd8_v2.phit_wavg*100)**2))-(29881.0*ntd8_v2.phit_wavg)+2891.8
ntd8_v2.loc[ntd8_v2.phit_wavg <0.2, 'perm'] = 0.00000002*(2.71828**(ntd8_v2.phit_wavg * 105.56))
ntd8_v2.loc[ntd8_v2.phit_wavg < 0.16, 'perm'] = 0.0159*(2.71828**(ntd8_v2.phit_wavg * 21.27))
ntd8_v2['perm*htst_sum'] = ntd8_v2['perm']*ntd8_v2['htst_sum']
ntd8_v2_sorted = ntd8_v2.sort_values(by='perm*htst_sum', ascending=False).reset_index(drop=True)

In [None]:
ntd8_v2

In [None]:
X = ntd8_v2_sorted.index.values
Y = ntd8_v2_sorted['khtst_cumsum'].values
degree = 3
coefficients = np.polyfit(X, Y, degree)
polynomial_function = np.poly1d(coefficients)
Y_fit = polynomial_function(X)
ntd8_v2_sorted['Y_fit'] = Y_fit

sns.lineplot(data=ntd8_v2_sorted, x=ntd8_v2_sorted.index, y='perm*htst_sum', label='perm*htst_sum')
sns.lineplot(data=ntd8_v2_sorted, x=ntd8_v2_sorted.index, y='khtst_cumsum', label='khtst_cumsum')
sns.lineplot(data=ntd8_v2_sorted, x=ntd8_v2_sorted.index.values, y=Y_fit, label='Y_fit')
plt.legend()

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

X = ntd8_v2_sorted[['perm*htst_sum']]  # Reshape X to be a 2D array
Y = ntd8_v2_sorted['khtst_cumsum']
model = LinearRegression()
model.fit(X, Y)
Y_pred = model.predict(X)
ntd8_v2_sorted['Y_pred'] = Y_pred
r2 = model.score(X, Y)
print(f'R-squared: {r2:.3f}')

sns.scatterplot(data=ntd8_v2_sorted, x='perm*htst_sum', y='khtst_cumsum')
sns.lineplot(x=X.values.flatten(), y=Y_pred, color='red')

In [None]:
X = ntd8_v2_sorted['perm*htst_sum']
Y = ntd8_v2_sorted['khtst_cumsum']

degree = 3
coefficients = np.polyfit(X, Y, degree)
polynomial_function = np.poly1d(coefficients)
Y_fit = polynomial_function(X)
ntd8_v2_sorted['Y_fit_deg3'] = Y_fit
# Plot the scatter plot
sns.scatterplot(data=ntd8_v2_sorted, x='perm*htst_sum', y='khtst_cumsum')

# Plot the regression line
plt.plot(X, Y_fit, color='red')  # This adds the regression line to the scatter plot
plt.plot([0,25000], [0,25000], c='green', ls='--')
plt.grid()
plt.show()

# Offsets to calc phit

In [45]:
def add_phit_dist_df(df_ntd, df_dist):
    df_lst = []
    for well_offset in df_dist.well_offset.unique():
        phit = df_ntd[df_ntd.well == well_offset][['well','htst','phit_avg']]
        phit['htst*phit'] = phit['htst']*phit['phit_avg']
        phit_htst = phit['htst*phit'].sum()
        htst_sum = phit['htst'].sum()
        phit_wavg = phit_htst/htst_sum
        df_lst.append((well_offset, phit_wavg, htst_sum))
    phit_dist = pd.DataFrame(df_lst, columns=['well', 'phit_wavg', 'htst_sum'])
    dist_v2 = (df_dist.merge(phit_dist, left_on='well_offset', right_on='well', how='left')).merge(
        phit_dist, left_on='well_x', right_on='well', how='left')
    dist_v2 = dist_v2[['well_x','phit_wavg_y', 'well_offset', 'htst_sum_y','dist','phit_wavg_x','htst_sum_x','formation_up']].rename(
                    columns={'well_x':'well', 'phit_wavg_y':'phit_target', 'htst_sum_y':'htst_target', 
                             'phit_wavg_x':'phit_offset', 'htst_sum_x':'htst_offset', 'dist':'dist_offset'})
    return dist_v2

def selection_quantity_well(df_dist_v2, q):
    df_lst = []
    for wellname in df_dist_v2.well.unique():
        well_set = df_dist_v2[df_dist_v2.well == wellname].iloc[:q]
        df_lst.append(well_set)
    dist_v3 = pd.concat(df_lst).reset_index(drop=True)
    return dist_v3

def assesment_phit_by_offset_avg(df_dist_v3, quality_range):
    df_lst = []
    for wellname in df_dist_v3.well.unique():
        data = df_dist_v3[df_dist_v3.well == wellname]
        weighted_average = np.average(data.phit_offset.values, weights=data.dist_offset.values)
        data['weighted_average'] = weighted_average
        df_lst.append(data)
    dist_v4 = pd.concat(df_lst).reset_index(drop=True)
    dist_v4['phit_target_high'] = dist_v4.phit_target + quality_range
    dist_v4['phit_target_low'] = dist_v4.phit_target - quality_range
    dist_v4['phit_pred_qc'] = 'in_range' # 0 - below low, 1 - within range, 2 - above high
    # dist_v4['phit_target_high'] = dist_v4.phit_target*(1 + quality_range)
    # dist_v4['phit_target_low'] = dist_v4.phit_target*(1 - quality_range)
    # dist_v4['phit_pred_qc'] = 'in_range' # 0 - below low, 1 - within range, 2 - above high
    dist_v4.loc[dist_v4.weighted_average <= dist_v4.phit_target_low, 'phit_pred_qc'] = 'under_estimated'
    dist_v4.loc[dist_v4.weighted_average >= dist_v4.phit_target_high, 'phit_pred_qc'] = 'over_estimated'
    dist_v4 = dist_v4[[ 'well', 'phit_target', 'well_offset', 'dist_offset', 'phit_offset', 'formation_up',
                        'phit_target_high', 'weighted_average','phit_target_low', 'phit_pred_qc']]
    result = pd.DataFrame(dist_v4[['well','phit_pred_qc']].drop_duplicates().value_counts('phit_pred_qc', normalize=True)).reset_index()
    result = result.sort_values(by='phit_pred_qc', ascending=True).reset_index(drop=True)
    return dist_v4, result

def assesment_htst_by_offset_avg(df_dist_v3, quality_range):
    df_lst = []
    for wellname in df_dist_v3.well.unique():
        data = df_dist_v3[df_dist_v3.well == wellname]
        weighted_average = np.average(data.htst_offset.values, weights=data.dist_offset.values)
        data['weighted_average'] = weighted_average
        df_lst.append(data)
    dist_v4 = pd.concat(df_lst).reset_index(drop=True)
    dist_v4['htst_target_high'] = dist_v4.htst_target + quality_range
    dist_v4['htst_target_low'] = dist_v4.htst_target - quality_range
    dist_v4['htst_pred_qc'] = 'in_range' # 0 - below low, 1 - within range, 2 - above high
    # dist_v4['htst_target_high'] = dist_v4.htst_target*(1 + quality_range)
    # dist_v4['htst_target_low'] = dist_v4.htst_target*(1 - quality_range)
    # dist_v4['htst_pred_qc'] = 'in_range' # 0 - below low, 1 - within range, 2 - above high
    dist_v4.loc[dist_v4.weighted_average <= dist_v4.htst_target_low, 'htst_pred_qc'] = 'under_estimated'
    dist_v4.loc[dist_v4.weighted_average >= dist_v4.htst_target_high, 'htst_pred_qc'] = 'over_estimated'
    dist_v4 = dist_v4[[ 'well', 'htst_target', 'well_offset', 'dist_offset', 'htst_offset', 'formation_up',
                        'htst_target_high', 'weighted_average','htst_target_low', 'htst_pred_qc']]
    result = pd.DataFrame(dist_v4[['well','htst_pred_qc']].drop_duplicates().value_counts('htst_pred_qc', normalize=True)).reset_index()
    result = result.sort_values(by='htst_pred_qc', ascending=True).reset_index(drop=True)
    return dist_v4, result

In [None]:
# I need to estimate range for kh prediction based on uncertainty of htst and phit.
# I know that phit is inside the range +/- 0.0115 v/v and htst inside the range +/- 4.25 m. See calculation of uncertainties.
df_range_phit_lst = []
df_range_htst_lst = []
for q in range(1,16):
    dist_bal8_v2 = add_phit_dist_df(ntd_top_phi_bot8_bp_v4, dist_bal8)
    dist_bal8_v3 = selection_quantity_well(dist_bal8_v2, q)
    dist_bal8_v4_phit, result_bal8_phit = assesment_phit_by_offset_avg(dist_bal8_v3, 0.0115)
    dist_bal8_v4_htst, result_bal8_htst = assesment_htst_by_offset_avg(dist_bal8_v3, 4.25)
    result_bal8_phit['offset_qty'] = q
    result_bal8_htst['offset_qty'] = q
    df_range_phit_lst.append(result_bal8_phit)
    df_range_htst_lst.append(result_bal8_htst)

df_range_phit = pd.concat(df_range_phit_lst).reset_index(drop=True)
df_range_htst = pd.concat(df_range_htst_lst).reset_index(drop=True)
fig, ax = plt.subplots(1,2, figsize=(14,4))
sns.lineplot(data=df_range_phit, x='offset_qty', y='proportion', hue='phit_pred_qc', ax=ax[0])
ax[0].grid()
ax[0].set_title('Prediction by weighted avg offset 0.0115')
sns.lineplot(data=df_range_htst, x='offset_qty', y='proportion', hue='htst_pred_qc', ax=ax[1])
ax[1].grid()
ax[1].set_title('Prediction by weighted avg offset 4.25')

In [None]:
df_range_phit[df_range_phit.phit_pred_qc == 'in_range'].sort_values(by='proportion', ascending=False).head(1)

In [48]:
# display(dist_bal8_v4_htst.iloc[:3])
# display(dist_bal8_v4_phit.iloc[:3])

In [None]:
# Does the htst range has correlation between net and range p10-p90 of net_rnd?
uncert8 = pd.read_csv(r'C:\jupyter\SPP_uncertainty\report\uncert_final_plt_bal8.csv')
uncert8 = uncert8[['well','NET_rnd_range', 'NET', 'field']].drop_duplicates()
sns.jointplot(data=uncert8, x='NET', y='NET_rnd_range', hue='field', kind='scatter')
plt.grid()

# Khtst range

In [None]:
q=2
dist_bal8_v2 = add_phit_dist_df(ntd_top_phi_bot8_bp_v4, dist_bal8)
dist_bal8_v3 = selection_quantity_well(dist_bal8_v2, q)
dist_bal8_v4_phit, result_bal8_phit = assesment_phit_by_offset_avg(dist_bal8_v3, 0.0115)
dist_bal8_v4_htst, result_bal8_htst = assesment_htst_by_offset_avg(dist_bal8_v3, 4.25)

# If I have phit and htst uncertainty, I can calculate kh uncertainty.
def khtst_basedon_phit_htst_offset(dist_bal8_v4_htst, dist_bal8_v4_phit):
    dist_bal8_v4_htst = dist_bal8_v4_htst.rename(columns={'dist_offset':'dist_offset_htst', 
                                                        'weighted_average':'wavg_htst'})
    dist_bal8_v4_phit = dist_bal8_v4_phit.rename(columns={'dist_offset':'dist_offset_phit',
                                                        'weighted_average':'wavg_phit'})
    dist_bal8_htst_phit = dist_bal8_v4_htst.set_index(['well','well_offset']).join(
        dist_bal8_v4_phit.set_index(['well','well_offset']), how='inner', rsuffix='_phit').drop(columns=['formation_up_phit']).reset_index()

    dist_bal8_htst_phit = dist_bal8_htst_phit[[ 'well', 'phit_target', 'htst_target', 'well_offset',  'dist_offset_htst', 'dist_offset_phit',
                                                'htst_offset', 'phit_offset', 'formation_up', 
                                                'htst_target_high', 'wavg_htst', 'htst_target_low','htst_pred_qc',  
                                                'phit_target_high', 'wavg_phit', 'phit_target_low', 'phit_pred_qc']]

    dist_bal8_htst_phit.loc[dist_bal8_htst_phit.phit_target >=0.2, 'perm_target'] = (7.7925*((dist_bal8_htst_phit.phit_target*100)**2))- \
                                                                                    (29881.0*dist_bal8_htst_phit.phit_target) + 2891.8
    dist_bal8_htst_phit.loc[dist_bal8_htst_phit.phit_target <0.2, 'perm_target'] = 0.00000002*(2.71828**(dist_bal8_htst_phit.phit_target * 105.56))
    dist_bal8_htst_phit.loc[dist_bal8_htst_phit.phit_target < 0.16, 'perm_target'] = 0.0159*(2.71828**(dist_bal8_htst_phit.phit_target * 21.27))

    dist_bal8_htst_phit.loc[dist_bal8_htst_phit.wavg_phit >=0.2, 'perm_wavg'] = (7.7925*((dist_bal8_htst_phit.wavg_phit*100)**2))- \
                                                                                    (29881.0*dist_bal8_htst_phit.wavg_phit) + 2891.8
    dist_bal8_htst_phit.loc[dist_bal8_htst_phit.wavg_phit <0.2, 'perm_wavg'] = 0.00000002*(2.71828**(dist_bal8_htst_phit.wavg_phit * 105.56))
    dist_bal8_htst_phit.loc[dist_bal8_htst_phit.wavg_phit < 0.16, 'perm_wavg'] = 0.0159*(2.71828**(dist_bal8_htst_phit.wavg_phit * 21.27))

    dist_bal8_htst_phit.loc[dist_bal8_htst_phit.phit_target_high >=0.2, 'perm_target_high'] = (7.7925*((dist_bal8_htst_phit.phit_target_high*100)**2))- \
                                                                                    (29881.0*dist_bal8_htst_phit.phit_target_high) + 2891.8 
    dist_bal8_htst_phit.loc[dist_bal8_htst_phit.phit_target_high <0.2, 'perm_target_high'] = 0.00000002*(2.71828**(dist_bal8_htst_phit.phit_target_high * 105.56))
    dist_bal8_htst_phit.loc[dist_bal8_htst_phit.phit_target_high < 0.16, 'perm_target_high'] = 0.0159*(2.71828**(dist_bal8_htst_phit.phit_target_high * 21.27))

    dist_bal8_htst_phit.loc[dist_bal8_htst_phit.phit_target_low >=0.2, 'perm_target_low'] = (7.7925*((dist_bal8_htst_phit.phit_target_low*100)**2))- \
                                                                                    (29881.0*dist_bal8_htst_phit.phit_target_low) + 2891.8
    dist_bal8_htst_phit.loc[dist_bal8_htst_phit.phit_target_low <0.2, 'perm_target_low'] = 0.00000002*(2.71828**(dist_bal8_htst_phit.phit_target_low * 105.56))
    dist_bal8_htst_phit.loc[dist_bal8_htst_phit.phit_target_low < 0.16, 'perm_target_low'] = 0.0159*(2.71828**(dist_bal8_htst_phit.phit_target_low * 21.27))

    dist_bal8_htst_phit['khtst_target'] = dist_bal8_htst_phit['htst_target']*dist_bal8_htst_phit['perm_target']
    dist_bal8_htst_phit['khtst_wavg'] = dist_bal8_htst_phit['wavg_htst']*dist_bal8_htst_phit['perm_wavg']
    dist_bal8_htst_phit['khtst_target_high'] = dist_bal8_htst_phit['htst_target_high']*dist_bal8_htst_phit['perm_target_high']
    dist_bal8_htst_phit['khtst_target_low'] = dist_bal8_htst_phit['htst_target_low']*dist_bal8_htst_phit['perm_target_low']

    dist_bal8_khst = dist_bal8_htst_phit[[ 'well', 'well_offset',
                                            'khtst_target',  
                                            'khtst_target_high', 'khtst_wavg', 'khtst_target_low',
                                            'wavg_htst', 'wavg_phit']]

    dist_bal8_khst['khtst_pred_qc'] = 'in_range' 
    dist_bal8_khst.loc[dist_bal8_khst.khtst_wavg <= dist_bal8_khst.khtst_target_low, 'khtst_pred_qc'] = 'under_estimated'
    dist_bal8_khst.loc[dist_bal8_khst.khtst_wavg >= dist_bal8_khst.khtst_target_high, 'khtst_pred_qc'] = 'over_estimated'
    result_bal8_khtst = dist_bal8_khst.value_counts('khtst_pred_qc', normalize=True)
    return dist_bal8_khst, result_bal8_khtst

dist_bal8_khtst, result_bal8_khtst = khtst_basedon_phit_htst_offset(dist_bal8_v4_htst, dist_bal8_v4_phit)
fig, ax = plt.subplots(1,3, figsize=(20,6))
sns.scatterplot(data=dist_bal8_khtst, y='khtst_target', x='khtst_wavg', hue='khtst_pred_qc', ax=ax[0])
sns.scatterplot(data=dist_bal8_khtst, y='khtst_target', x='wavg_htst', hue='khtst_pred_qc', ax=ax[1])
sns.scatterplot(data=dist_bal8_khtst, y='khtst_target', x='wavg_phit', hue='khtst_pred_qc', ax=ax[2]);

In [None]:
dist_bal8_khst_v2 = dist_bal8_khtst[['well','khtst_target','khtst_target_high','khtst_wavg','khtst_target_low',
                                    'wavg_htst','wavg_phit','khtst_pred_qc']].drop_duplicates().reset_index(drop=True)
xy = df_bal8_v4.groupby('well')[['xmean','ymean']].first().reset_index()
dist_bal8_khst_v3 = dist_bal8_khst_v2.merge(xy, on='well')
plt.subplots(figsize=(12,5))
sns.scatterplot(data=dist_bal8_khst_v3, x='xmean', y='ymean', hue='khtst_pred_qc', s=50, ec='black', alpha=0.5)

In [None]:
dist_bal8_khst_v3['khtst_target_high_%'] = (dist_bal8_khst_v3['khtst_target_high'] - dist_bal8_khst_v3['khtst_target']) / dist_bal8_khst_v3['khtst_target']
dist_bal8_khst_v3['khtst_target_low_%'] = (dist_bal8_khst_v3['khtst_target'] - dist_bal8_khst_v3['khtst_target_low']) / dist_bal8_khst_v3['khtst_target']
dist_bal8_khst_v3['khtst_target_up25'] = dist_bal8_khst_v3['khtst_target'] + dist_bal8_khst_v3['khtst_target']*0.25
dist_bal8_khst_v3['khtst_target_low25'] = dist_bal8_khst_v3['khtst_target'] - dist_bal8_khst_v3['khtst_target']*0.25
dist_bal8_khst_v3['khtst_pred_qc_25'] = 'in_range'
dist_bal8_khst_v3.loc[dist_bal8_khst_v3['khtst_wavg'] <= dist_bal8_khst_v3['khtst_target_low25'], 'khtst_pred_qc_25'] = 'under_estimated'
dist_bal8_khst_v3.loc[dist_bal8_khst_v3['khtst_wavg'] >= dist_bal8_khst_v3['khtst_target_up25'], 'khtst_pred_qc_25'] = 'over_estimated'
dist_bal8_khst_v3['khtst_pred_qc_25'].value_counts(normalize=True)
sns.kdeplot(data=dist_bal8_khst_v3, x='khtst_target_high_%')
sns.kdeplot(data=dist_bal8_khst_v3, x='khtst_target_low_%')
plt.grid()

In [None]:
sns.scatterplot(data=dist_bal8_khst_v3, y='khtst_target', x='khtst_wavg', hue='khtst_pred_qc_25')