# Libs

In [26]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.metrics.pairwise import euclidean_distances
from tqdm import tqdm
import os
import lasio

pd.options.display.precision = 3
pd.options.display.float_format = lambda x: '%.5f' % x
pd.options.display.max_columns = 15
pd.options.display.max_rows = 6

# Data uploading

In [3]:
df_bal8_v4 = pd.read_csv('C:\jupyter\SPP\inputoutput\general_logs\df_bal8_azr_v4.csv')
well_phit_flag8 = df_bal8_v4[df_bal8_v4.phit_flag==1].groupby('well')['phit_flag'].apply(lambda x: x.iloc[0]).reset_index().well.unique()
df_bal8_v4_flag = df_bal8_v4[df_bal8_v4.well.isin(well_phit_flag8)]

df_bal10_v4 = pd.read_csv('C:\jupyter\SPP\inputoutput\general_logs\df_bal10_vshclp2_v4.csv')
well_phit_flag10 = df_bal10_v4[df_bal10_v4.phit_flag==1].groupby('well')['phit_flag'].apply(lambda x: x.iloc[0]).reset_index().well.unique()
df_bal10_v4_flag = df_bal10_v4[df_bal10_v4.well.isin(well_phit_flag10)]

In [4]:
ntd_top_phi_bot8_bp_v4 = pd.read_csv(r'C:\jupyter\SPP\inputoutput\layers\ntd_top_phi_bot8_bp_v4.csv').drop('Unnamed: 0', axis=1)
ntd_top_phi_bot10_bp_v4 = pd.read_csv(r'C:\jupyter\SPP\inputoutput\layers\ntd_top_phi_bot10_bp_v4.csv').drop('Unnamed: 0', axis=1)

In [5]:
def well_dist_calc(dataset, fm):
    data = dataset.groupby('well')[['Xmean', 'Ymean']].first().reset_index().dropna()
    row_name = data.well.reset_index().drop(['index'], axis=1)
    distance_fm = pd.DataFrame(euclidean_distances(data[['Xmean', 'Ymean']]), columns=list(data.well))
    distance_fm_well = distance_fm.join(row_name).set_index('well')
    distance_fm_well = distance_fm_well.reset_index()
    dist_melt = distance_fm_well.melt(id_vars='well', 
                                var_name='well2', 
                                value_name='dist').rename(columns={'well':'well_offset', 'well2':'well'})
    dist_melt = dist_melt[['well', 'well_offset', 'dist']]
    dist_melt = dist_melt[dist_melt.dist != 0].sort_values(by=['well','dist'])
    dist_melt['FORMATION_up'] = fm
    return dist_melt

dist_bal8 = well_dist_calc(df_bal8_v4_flag, 'Balakhany VIII')
dist_bal10 = well_dist_calc(df_bal10_v4_flag, 'Balakhany X')

# Property EDA

## Dashboard

In [None]:
dataset = df_bal8_v4_flag
data = dataset.groupby('well')[['Xmean', 'Ymean']].first().reset_index().dropna()
distance_fm = pd.DataFrame(euclidean_distances(data[['Xmean', 'Ymean']]), columns=list(data.well))
row_name = data.well.reset_index().drop(['index'], axis=1)
distance_fm_well = distance_fm.join(row_name).set_index('well')
distance_fm_well = distance_fm_well.reset_index()
distance_fm_well

In [47]:
def well_dist_calc(dataset, fm):
    data = dataset.groupby('well')[['Xmean', 'Ymean']].first().reset_index().dropna()
    row_name = data.well.reset_index().drop(['index'], axis=1)
    distance_fm = pd.DataFrame(euclidean_distances(data[['Xmean', 'Ymean']]), columns=list(data.well))
    distance_fm_well = distance_fm.join(row_name).set_index('well')
    distance_fm_well = distance_fm_well.reset_index()
    dist_melt = distance_fm_well.melt(id_vars='well', 
                                var_name='well2', 
                                value_name='dist').rename(columns={'well':'well_offset', 'well2':'well'})
    dist_melt = dist_melt[['well', 'well_offset', 'dist']]
    dist_melt = dist_melt[dist_melt.dist != 0].sort_values(by=['well','dist'])
    dist_melt['FORMATION_up'] = fm
    return dist_melt

def well_list_selection(df, well_n):
    df_khtst = df.groupby('well')[['KHtst', 'field']].first().reset_index()
    df_khtst = df_khtst[df_khtst.KHtst != 0]
    df_khtst = df_khtst.sort_values(by=['KHtst', 'field'], ascending=False)[:well_n-1].reset_index(drop=True)
    return df_khtst

def well_offset_comparison_dashboard(dataset_wells, dataset_layers, dist_df,  well_target, offset_qty, fm_name, print_flag):
    offset_well_list = dist_df[dist_df.well == well_target].iloc[:offset_qty]['well_offset'].values.tolist()
    # offset_well_list = dist_df[dist_df.well == well_target].iloc[:offset_qty]
    well_list = [well_target] + offset_well_list
    data_logs = dataset_wells[(dataset_wells.well.isin(well_list)) & (dataset_wells.PHIT != 0)]
    data_layers = dataset_layers[   (dataset_layers.well.isin(well_list)) & 
                                    (dataset_layers.htst > 1)]
    khtst_logs = data_logs.groupby(['well','FORMATION'])[['KHtst']].apply(lambda x: x.iloc[0] - x.iloc[-1]).reset_index()

    def khtst_layer_calculation(data_logs):
        data = data_logs[data_logs.NET == 1]
        df_lst = []
        for wellname in data.well.unique():
            well_data = data_logs[data_logs.well == wellname]
            well_data['tst_index_rev'] = [i for i in range(len(well_data['TST']))[::-1]]
            df_lst.append(well_data)
        data_logs_khtst = pd.concat(df_lst)
        return data_logs_khtst
    data_logs_khtst = khtst_layer_calculation(data_logs)

    def well_dist_title(dist_df):
        offset_well_list = dist_df[dist_df.well == well_target].iloc[:offset_qty]
        well = offset_well_list['well'].iloc[0]
        well1 = offset_well_list.iloc[0,1]
        dist1 = offset_well_list.iloc[0,2].round(0).astype(int)
        well2 = offset_well_list.iloc[1,1]
        dist2 = offset_well_list.iloc[1,2].round(0).astype(int)
        well3 = offset_well_list.iloc[2,1]
        dist3 = offset_well_list.iloc[2,2].round(0).astype(int)
        return f"target well {well} : offsets {well1} - {dist1}m orange; {well2} - {dist2}m green; {well3} - {dist3}m blue;"

    fig = plt.figure(figsize=(22, 10))
    gs = gridspec.GridSpec(2, 4, figure=fig)
    ax1 = fig.add_subplot(gs[0, 0])
    ax2 = fig.add_subplot(gs[0, 1])
    ax3 = fig.add_subplot(gs[0, 2])
    ax4 = fig.add_subplot(gs[0, 3])
    ax5 = fig.add_subplot(gs[1, :3])

    custom_palette = {well_target: 'red', offset_well_list[0]: 'orange', offset_well_list[1]: 'green', offset_well_list[2]: '#0797eb'}
    sns.histplot(data=data_logs, x='PHIT', hue='well', bins=50, kde=True, ax=ax1, palette=custom_palette)
    sns.scatterplot(data=data_layers, x='htst', y='perm_avg', hue='well', s=75, ax=ax2, alpha=0.5, ec='black', palette=custom_palette)
    sns.lineplot(data=data_logs_khtst, x='tst_index_rev', y='KHtst', hue='well', ax=ax3, palette=custom_palette)
    sns.barplot(data = khtst_logs, x='FORMATION', y='KHtst', hue='well', ax=ax4, palette=custom_palette)
    ax1.set_yticklabels(ax1.get_yticklabels(), rotation=90, va='center')
    ax2.set_yscale('log')
    ax2.grid(True, which='both', linestyle='--', linewidth=0.5)
    ax2.set_yticklabels(ax2.get_yticklabels(), rotation=90, va='center')
    ax3.grid(True, which='both', linestyle='--', linewidth=0.5)
    ax3.set_yticklabels(ax3.get_yticklabels(), rotation=90, va='center')

    x = np.arange(len(khtst_logs.FORMATION.unique()))
    fms = khtst_logs.FORMATION.unique()
    ax4.set_xticks(x, fms, rotation=45, fontsize=6)
    ax4.set_yticklabels(ax4.get_yticklabels(), rotation=90, va='center')

    offset_well_list = dist_df[dist_df.well == well_target].iloc[:offset_qty]['well_offset'].values.tolist()
    x = dataset_wells[dataset_wells.phit_flag == 1]['Xmean']
    y = dataset_wells[dataset_wells.phit_flag == 1]['Ymean']
    x_target = dataset_wells[dataset_wells.well == well_target]['Xmean'].iloc[0]
    y_target = dataset_wells[dataset_wells.well == well_target]['Ymean'].iloc[0]
    x_well1 = dataset_wells[dataset_wells.well == offset_well_list[0]]['Xmean'].iloc[0]
    y_well1 = dataset_wells[dataset_wells.well == offset_well_list[0]]['Ymean'].iloc[0]
    x_well2 = dataset_wells[dataset_wells.well == offset_well_list[1]]['Xmean'].iloc[0]
    y_well2 = dataset_wells[dataset_wells.well == offset_well_list[1]]['Ymean'].iloc[0]
    x_well3 = dataset_wells[dataset_wells.well == offset_well_list[2]]['Xmean'].iloc[0]
    y_well3 = dataset_wells[dataset_wells.well == offset_well_list[2]]['Ymean'].iloc[0]
    ax5.scatter(x, y, color='gray', s=10)
    ax5.scatter(x_target, y_target, color='red', s=50, ec='black')
    ax5.scatter(x_well1, y_well1, color='orange')
    ax5.scatter(x_well2, y_well2, color='green')
    ax5.scatter(x_well3, y_well3, color='#0797eb')

    plt.suptitle(well_dist_title(dist_df), fontsize=16, y=0.92, x=0.32)
    if print_flag == 'print':
        plt.savefig(f'C:/jupyter/SPP/plots/offset_dashboard/{fm_name}_{well_target}_offset_dashboard.png');

In [None]:
dist_bal8 = well_dist_calc(df_bal8_v4_flag, 'Balakhany VIII')
df_bal8_khtst = well_list_selection(df_bal8_v4_flag, 10)
for wellname in df_bal8_khtst.well:
    try:
        well_offset_comparison_dashboard(df_bal8_v4_flag, ntd_top_phi_bot8_bp_v4, dist_bal8, wellname, 3, 'bal8','print')
    except:
        print(f"error in {wellname}")

In [None]:
df_bal10_v4_flag = df_bal10_v4_flag[~df_bal10_v4_flag.well.isin(['E31Z'])]
dist_bal10 = well_dist_calc(df_bal10_v4_flag, 'Balakhany X')
df_bal10_khtst = well_list_selection(df_bal10_v4_flag, 10)
for wellname in df_bal10_khtst.well:
    try:
        well_offset_comparison_dashboard(df_bal10_v4_flag, ntd_top_phi_bot10_bp_v4, dist_bal10, wellname, 3, 'bal10','print')
    except:
        print(f"error in {wellname}")

## Distance df

# Superfacies

In [None]:
df_bal8_v4_net = df_bal8_v4[df_bal8_v4.NET == 1]
avg_phit = df_bal8_v4_net.groupby('well')['PHIT'].mean().reset_index().rename(columns={'PHIT':'PHIT_avg'})
first_khtst = df_bal8_v4_net.groupby('well')['KHtst'].first().reset_index().rename(columns={'KHtst':'KHtst_first'})
field = df_bal8_v4_net.groupby('well')['field'].first().reset_index()
data_khphit = (avg_phit.merge(first_khtst, on='well')).merge(field, on='well')
data_khphit = data_khphit[data_khphit.PHIT_avg > 0.1]
sns.scatterplot(data=data_khphit, x='PHIT_avg', y='KHtst_first', hue='field', s=75, alpha=0.5, ec='black')
plt.yscale('log')
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.title('KHtst vs PHIT Bal8');

In [None]:
data_khphit['vis_cluster'] = 0
data_khphit.loc[(data_khphit.KHtst_first > 18000) & (data_khphit.PHIT_avg >=0.2), 'vis_cluster'] = 1
data_khphit.loc[(data_khphit.KHtst_first > 10000) & (data_khphit.PHIT_avg > 0.245), 'vis_cluster'] = 1
data_khphit.loc[(data_khphit.KHtst_first < 2000), 'vis_cluster'] = 2
data_khphit.loc[(data_khphit.KHtst_first > 8000) & (data_khphit.PHIT_avg < 0.205), 'vis_cluster'] = 3
data_khphit.loc[(data_khphit.PHIT_avg < 0.14), 'vis_cluster'] = 3
sns.scatterplot(data=data_khphit, x='PHIT_avg', y='KHtst_first', hue='vis_cluster', s=75, alpha=0.5, ec='black', palette='plasma')
plt.yscale('log')
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.title('KHtst vs PHIT Bal8');

In [13]:
# df_bal8_azr_v3 = df_bal8_azr_v3.merge(data_khphit[['well', 'vis_cluster']], on='well')
# df_bal8_azr_v3.to_csv('C:\jupyter\SPP\inputoutput\general_logs\df_bal8_azr_v3.csv')

In [82]:
# from sklearn.preprocessing import StandardScaler
# from sklearn.cluster import KMeans
# from sklearn.cluster import DBSCAN

# data = data_khphit.drop(['well','field'], axis=1)
# scaler = StandardScaler()
# # normalized_data = scaler.fit_transform(data)
# normalized_data = data

# kmeans = KMeans(n_clusters=3, random_state=151)
# kmeans_labels = kmeans.fit_predict(normalized_data)
# kmeans_labels = pd.DataFrame(kmeans_labels, columns=['kmeans'])
# data_khphit_clustered = data_khphit.join(kmeans_labels)
# sns.scatterplot(data=data_khphit_clustered, x='PHIT_avg', y='KHtst_first', hue='kmeans', s=75, alpha=0.5, ec='black', palette='viridis')
# plt.yscale('log')

In [81]:
# data = data_khphit.drop(['well','field'], axis=1)
# normalized_data = data

# dbscan = DBSCAN(eps=250, min_samples=3)
# dbscan_labels = dbscan.fit_predict(normalized_data)
# dbscan_labels = pd.DataFrame(dbscan_labels, columns=['dbscan'])
# data_khphit_clustered = data_khphit.join(dbscan_labels)

# sns.scatterplot(data=data_khphit_clustered, x='PHIT_avg', y='KHtst_first', hue='dbscan', s=75, alpha=0.5, ec='black', palette='viridis')
# # plt.yscale('log')

# Experiments with data

## Ntg vs vsh_avg

In [None]:
net = ntd_top_phi_bot8_bp_v4.groupby('well')['htst'].sum().reset_index()
ntd_top_phi_bot8_bp_v4['tst_interval'] = ntd_top_phi_bot8_bp_v4['fm_bot_tst'] - ntd_top_phi_bot8_bp_v4['fm_top_tst']
gross = ntd_top_phi_bot8_bp_v4.groupby('well')['tst_interval'].first().reset_index()
ntg = net.merge(gross, on='well', how='left')
ntg['ntg'] = ntg['htst'] / ntg['tst_interval']
vsh_avg = df_bal8_v4.groupby('well')['VSH'].mean().reset_index()
ntg_vsh = ntg.merge(vsh_avg, on='well', how='left')
fig, ax = plt.subplots(figsize=(10, 6))
sns.regplot(data=ntg_vsh, x='VSH', y='ntg', line_kws={'color': 'red'}, 
            scatter_kws={'s': 50})
slope, intercept = np.polyfit(ntg_vsh['VSH'], ntg_vsh['ntg'], 1)
corell = ntg_vsh[['VSH', 'ntg']].corr().iloc[0,1]
plt.text(0.25, 0.6, f'y = {slope:.2f}x + {intercept:.2f} coefR={corell:.2f}')
plt.xlim(0,1)
plt.ylim(0,1)
plt.grid()
# ntg_vsh.to_csv(r'C:\jupyter\SPP\inputoutput\netvsh_net\ntg_vsh.csv', index=False)

# Logs-df enrichment with ntd-df

In [3]:
logs8 = pd.read_csv(r'C:\jupyter\SPP\inputoutput\general_logs\df_bal8_azr_v4.csv')
logs8.columns = logs8.columns.str.lower()
ntd8 = pd.read_csv(r'C:\jupyter\SPP\inputoutput\layers\ntd_top_phi_bot8_bp_v4.csv').drop('Unnamed: 0', axis=1)
ntd8.columns = ntd8.columns.str.lower()
logs10 = pd.read_csv(r'C:\jupyter\SPP\inputoutput\general_logs\df_bal10_vshclp2_v4.csv')
logs10.columns = logs10.columns.str.lower()
ntd10 = pd.read_csv(r'C:\jupyter\SPP\inputoutput\layers\ntd_top_phi_bot10_bp_v4.csv').drop('Unnamed: 0', axis=1)
ntd10.columns = ntd10.columns.str.lower()

In [4]:
def logs_join_ntd(dataset_ntd, dataset_logs):
    well_lst = []
    for wellname in dataset_ntd.well.unique():
        data = dataset_ntd[dataset_ntd.well == wellname]
        well_layers_lst = []
        for idx, var in data.iterrows():
            start_depth = var['layer_top_tst']
            end_depth = var['layer_bot_tst']
            new_tst = np.arange(start_depth, end_depth, 0.1)
            new_well = [wellname for i in range(len(new_tst))]
            new_htst = [var['htst'] for i in range(len(new_tst))]
            df = pd.DataFrame({'well':new_well,'tst':new_tst, 'htst':new_htst})
            well_layers_lst.append(df)
        well_ntd_point = pd.concat(well_layers_lst).reset_index(drop=True)
        well_lst.append(well_ntd_point)
    result = pd.concat(well_lst)
    dataset_logs['tst'] = dataset_logs['tst'].round(1)
    result['tst'] = result['tst'].round(1)
    log_ntd = dataset_logs.set_index(['well','tst']).join(result.set_index(['well','tst'])).reset_index()
    log_ntd['htst'] = log_ntd['htst'].fillna(0)
    log_ntd = log_ntd.rename(columns={'htst':'htst_ntd'})
    log_ntd.columns = log_ntd.columns.str.lower()
    return log_ntd
logs8_ntd = logs_join_ntd(ntd8, logs8)
logs10_ntd = logs_join_ntd(ntd10, logs10)
logs8_ntd.to_csv(r'C:\jupyter\SPP\inputoutput\general_logs\logs8_ntd_v4.csv', index=False)
logs10_ntd.to_csv(r'C:\jupyter\SPP\inputoutput\general_logs\logs10_ntd_v4.csv', index=False)

In [None]:
len(logs8_ntd.well.unique())

In [None]:
htst_quart = [logs8_ntd[logs8_ntd.htst_ntd >= 1]['htst_ntd'].quantile(0.25).round(0), 
              logs8_ntd[logs8_ntd.htst_ntd >= 1]['htst_ntd'].quantile(0.75).round(0)]
logs8_ntd.loc[(logs8_ntd.htst_ntd < htst_quart[0]) & (logs8_ntd.htst_ntd != 0), 'htst_type'] = 'distal'
logs8_ntd.loc[(logs8_ntd.htst_ntd >= htst_quart[0]) & (logs8_ntd.htst_ntd < htst_quart[1]), 'htst_type'] = 'proximal'
logs8_ntd.loc[logs8_ntd.htst_ntd >= htst_quart[1], 'htst_type'] = 'axial'
logs8_ntd['htst_type'] = logs8_ntd['htst_type'].fillna('nonres')
print('qunatiles 0.25 & 0.75:', htst_quart)
for field in logs8_ntd.field.unique():
    data = logs8_ntd[(logs8_ntd.field == field) & (logs8_ntd.htst_type != 'nonres')]
    fig, ax = plt.subplots(figsize=(7,5))
    sns.histplot(data=data[data.htst_type == 'distal'], x='phit', color='green', bins=50, kde=True, edgecolor='none')
    sns.histplot(data=data[data.htst_type == 'proximal'], x='phit', color='orange', bins=50, kde=True, edgecolor='none')
    sns.histplot(data=data[data.htst_type == 'axial'], x='phit', color='yellow', bins=50, kde=True, edgecolor='gray')
    plt.title(f'{field} phit vs htst_type')
    plt.legend(['distal', 'proximal', 'axial'])
    plt.xlim(0.05,0.3)

# Analysing data by FU khtst

In [None]:
def diplay_fu_khtst_run(dataset, field_letter):
    def khtst_fu_processing(dataset):
        khtst_fu = dataset[dataset.phit_flag == 1].groupby(['well','formation'])['khtst'].apply(
            lambda x: x.iloc[0] - x.iloc[-1]).reset_index().rename(columns={'khtst':'khtst_fu'})
        khtst_fu.loc[khtst_fu.formation == 'Balakhany VIII 10', 'formation'] = '5bal8_10'
        khtst_fu.loc[khtst_fu.formation == 'Balakhany VIII 15', 'formation'] = '4bal8_15'
        khtst_fu.loc[khtst_fu.formation == 'Balakhany VIII 20', 'formation'] = '3bal8_20'
        khtst_fu.loc[khtst_fu.formation == 'Balakhany VIII 25', 'formation'] = '2bal8_25'
        khtst_fu.loc[khtst_fu.formation == 'Balakhany VIII 5', 'formation'] = '6bal8_05'
        khtst_fu.loc[khtst_fu.formation == 'Balakhany VIII sand', 'formation'] = '1bal8_s'
        khtst_fu = khtst_fu.sort_values(by=['well','formation'], ascending=[True, True])
        return khtst_fu
    khtst_fu = khtst_fu_processing(dataset)

    def diplay_fu_khtst(khtst_fu):
        khtst_fu_v2 = khtst_fu[khtst_fu.well.str.startswith(field_letter)]
        col = 5
        rows = len(khtst_fu_v2.well.unique()) // col + [1 if (len(khtst_fu_v2.well.unique()) % col) != 0 else 0][0]
        fig, ax = plt.subplots(rows, col, figsize=(col*3, rows*2))
        k = 0
        max_k = len(khtst_fu_v2.well.unique()) - 2
        for i in range(rows):
            for j in range(col):
                wellname = khtst_fu_v2.well.unique()[k]
                data = khtst_fu_v2[(khtst_fu_v2.well == wellname)]
                colors = ['red', 'green', 'blue', 'yellow', 'purple', 'orange'] * len(data)
                ax[i, j].barh(data.formation, data.khtst_fu, color=colors)
                ax[i, j].set_title(wellname)
                ax[i, j].set_xlim(0.0, 12000)
                ax[i, j].set_yticklabels(data.formation, rotation=45)
                ax[i, j].invert_yaxis()
                ax[i, j].grid(True, linestyle='--', linewidth=0.5)
                if k < max_k:
                    k += 1
                else:
                    break
            plt.tight_layout()
        return khtst_fu_v2
    khtst_fu_v2 = diplay_fu_khtst(khtst_fu)
    return khtst_fu_v2

khtst_fu_Bv2 = diplay_fu_khtst_run(logs8_ntd, 'B')
# khtst_fu_Cv2 = diplay_fu_khtst_run(logs8_ntd, 'C')
# khtst_fu_Dv2 = diplay_fu_khtst_run(logs8_ntd, 'D')

def khtst_fu_long_creation(dataset, column_name='khtst_fu'):
    df_lst = []
    for wellname in dataset.well.unique():
        data = dataset[dataset.well == wellname]
        df = pd.DataFrame(data[column_name].values.T.reshape(1,-1), columns=['1bal8_s', '2bal8_25', '3bal8_20', '4bal8_15', '5bal8_10','6bal8_05'])
        df['well'] = wellname
        df_lst.append(df)
    khtst_fu_long = pd.concat(df_lst)
    khtst_fu_long = khtst_fu_long[['well', '1bal8_s', '2bal8_25', '3bal8_20', '4bal8_15', '5bal8_10','6bal8_05']].reset_index(drop=True)
    col_norm = ['1bal8_s', '2bal8_25', '3bal8_20', '4bal8_15', '5bal8_10', '6bal8_05']
    khtst_fu_long[col_norm] = khtst_fu_long[col_norm].apply(lambda x: (x - x.min()) / (x.max() - x.min()), axis=1)
    return khtst_fu_long
khtst_fuB_long = khtst_fu_long_creation(khtst_fu_Bv2)
# khtst_fuC_long = khtst_fu_long_creation(khtst_fu_Cv2)
# khtst_fuD_long = khtst_fu_long_creation(khtst_fu_Dv2)

In [36]:
def vsh_fu_processing(log_data):
    vsh_fu = log_data[log_data.phit_flag == 1].groupby(['well','formation'])['vsh'].mean().reset_index().rename(columns={'vsh':'vsh_fu'})
    vsh_fu.loc[vsh_fu.formation == 'Balakhany VIII 10', 'formation'] = '5bal8_10'
    vsh_fu.loc[vsh_fu.formation == 'Balakhany VIII 15', 'formation'] = '4bal8_15'
    vsh_fu.loc[vsh_fu.formation == 'Balakhany VIII 20', 'formation'] = '3bal8_20'
    vsh_fu.loc[vsh_fu.formation == 'Balakhany VIII 25', 'formation'] = '2bal8_25'
    vsh_fu.loc[vsh_fu.formation == 'Balakhany VIII 5', 'formation'] = '6bal8_05'
    vsh_fu.loc[vsh_fu.formation == 'Balakhany VIII sand', 'formation'] = '1bal8_s'
    vsh_fu = vsh_fu.sort_values(by=['well','formation'], ascending=[True, True])
    vsh_fu['vsh_fu_r'] = 1 - vsh_fu['vsh_fu']
    return vsh_fu
vsh_fu = vsh_fu_processing(logs8_ntd)

In [None]:
bal8_fu10top = ['B02', 'B02Z', 'B13Z', 'B14Z', 'B16Y', 'B16Z', 'B19','B27Z', 'B34', 'B34Z', 'B37', 'B38Z']
vsh_khtst_fu = vsh_fu.set_index(['well','formation']).join(khtst_fu_Bv2.set_index(['well','formation'])).reset_index()
vsh_khtst_fu = vsh_khtst_fu[vsh_khtst_fu.formation == '5bal8_10']
vsh_khtst_fu['feature'] = 'norm'
vsh_khtst_fu.loc[vsh_khtst_fu.well.isin(bal8_fu10top), 'feature'] = 'khtst_top'

def plotly_vsh_khtst(vsh_khtst_fu):
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=vsh_khtst_fu['vsh_fu_r'],
        y=vsh_khtst_fu['khtst_fu'],
        mode='markers',
        marker=dict(color=vsh_khtst_fu['feature'].map({'norm': 'blue', 'khtst_top': 'red'}), size=10),  # Map feature to color
        text=vsh_khtst_fu['well'],  # This will be shown on hover
        hoverinfo='text + x + y'  # Customize hover text
    ))

    fig.update_layout(
        title='Vsh vs Khtst Bal8_10',
        xaxis_title='vsh_fu_r',
        yaxis_title='khtst_fu',
        hovermode='closest',
        width=500,
        height=500,
        margin = dict(l=0, r=10, t=50, b=0),
        showlegend=False
    )
    fig.show()
plotly_vsh_khtst(vsh_khtst_fu)

In [None]:
logs8_ntd = logs8_ntd[logs8_ntd.phit_flag == 1]
df_bal8_10 = logs8_ntd[(logs8_ntd.formation == 'Balakhany VIII 10') & (logs8_ntd.well.str.startswith('B'))]
df_bal8_10_htst = df_bal8_10.groupby('well')['net'].sum().reset_index()
df_bal8_10_htst['net'] = df_bal8_10_htst['net']*0.1
df_bal8_10_phit = df_bal8_10[df_bal8_10.net == 1].groupby('well')['phit'].sum().reset_index()
df_bal8_10_phit['phit'] = df_bal8_10_phit['phit']*0.1
df_bal8_10 = df_bal8_10_phit.set_index('well').join(df_bal8_10_htst.set_index('well')).reset_index()
df_bal8_10['feature'] = 'norm'
df_bal8_10.loc[df_bal8_10.well.isin(bal8_fu10top), 'feature'] = 'khtst_top'

def plotly_phit_net(df_bal8_10):
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=df_bal8_10['phit'],
        y=df_bal8_10['net'],
        mode='markers',
        marker=dict(color=df_bal8_10['feature'].map({'norm': 'blue', 'khtst_top': 'red'}), size=10),  # Map feature to color
        text=df_bal8_10['well'],  # This will be shown on hover
        hoverinfo='text + x + y'  # Customize hover text
    ))

    fig.update_layout(
        title='Net vs phit.sum() Bal8_10',
        xaxis_title='phit',
        yaxis_title='net',
        hovermode='closest',
        width=500,
        height=500,
        margin = dict(l=0, r=10, t=50, b=0),
        showlegend=False
    )
    fig.show()
plotly_phit_net(df_bal8_10)

## upload las from cube

In [None]:
# folder_path = r'C:\Petrel\exports\cube_logs'
# las_files = [f for f in os.listdir(folder_path) if f.endswith('.las')]
# df_lst = []
# for las_file in tqdm(las_files):
#     file_path = os.path.join(folder_path, las_file)
#     las = lasio.read(file_path)
#     df = las.df().reset_index()
#     df['well'] = las_file.split('.')[0]
#     df = df[['well','DEPT','GR_CUBE']]
#     df['DEPT'] = df['DEPT'].round(1)
#     df_lst.append(df)
# gr_cube = pd.concat(df_lst)
# gr_cube.columns = gr_cube.columns.str.lower()

In [20]:
# gr_cube = gr_cube.rename(columns={'dept':'md', 'gr_cube':'vsh_cube'})
# gr_cube_v2 = gr_cube.dropna()
# gr_cube_v2.to_csv(r'C:\Petrel\exports\cube_logs\gr_cube_v2.csv', index=False)

## logs_ntd_v5 & gr_cube

In [7]:
gr_cube_v2 = pd.read_csv(r'C:\Petrel\exports\cube_logs\gr_cube_v2.csv')

In [8]:
logs8_ntd_v2 = logs8_ntd.set_index(['well','md']).join(gr_cube_v2.set_index(['well','md'])).reset_index()
logs10_ntd_v2 = logs10_ntd.set_index(['well','md']).join(gr_cube_v2.set_index(['well','md'])).reset_index()

In [2]:
# logs8_ntd_v2.columns
# wellname = 'B02'
# data = logs8_ntd_v2[logs8_ntd_v2.well == wellname]
# depth = data.tst 
# vsh_log = data.vsh
# vsh_cube = data.vsh_cube
# fig, ax = plt.subplots(3, 5, figsize=(15,21))
# ax[0,0].plot(vsh_log, depth, color='gray')
# ax[0,0].plot(vsh_cube, depth, color='red')
# ax[0,0].invert_yaxis()
# ax[0,0].grid(True, linestyle='--', linewidth=0.5)
# ax[0,0].set_xlabel('VSH')
# ax[0,0].title.set_text(wellname + ' Bal8')
# ax[0,0].legend(['VSH log', 'VSH cube']);

In [9]:
def form_field_renames(logs8_ntd):
    logs8_ntd.loc[logs8_ntd.formation == 'Balakhany VIII 5', 'formation'] = '6_Balakhany VIII 5'
    logs8_ntd.loc[logs8_ntd.formation == 'Balakhany VIII 10', 'formation'] = '5_Balakhany VIII 10'
    logs8_ntd.loc[logs8_ntd.formation == 'Balakhany VIII 15', 'formation'] = '4_Balakhany VIII 15'
    logs8_ntd.loc[logs8_ntd.formation == 'Balakhany VIII 20', 'formation'] = '3_Balakhany VIII 20'
    logs8_ntd.loc[logs8_ntd.formation == 'Balakhany VIII 25', 'formation'] = '2_Balakhany VIII 25'
    logs8_ntd.loc[logs8_ntd.formation == 'Balakhany VIII sand', 'formation'] = '1_Balakhany VIII sand'

    logs8_ntd.loc[logs8_ntd.field == 'WEST AZERI', 'field'] = '1_WEST AZERI'
    logs8_ntd.loc[logs8_ntd.field == 'CENTRAL AZERI', 'field'] = '2_CENTRAL AZERI'
    logs8_ntd.loc[logs8_ntd.field == 'EAST AZERI', 'field'] = '3_EAST AZERI'
    return logs8_ntd
logs8_ntd_v2 = form_field_renames(logs8_ntd_v2)

In [12]:
def fu_metrics_calculation(dataset):
    khtst_fu = dataset[dataset.phit_flag == 1].groupby(
                                                ['well','formation'])['khtst'].apply(
                                                lambda x: x.iloc[0] - x.iloc[-1]).reset_index().rename(columns={'khtst':'khtst_fu'})
    vsh_fu = dataset[dataset.phit_flag == 1].groupby(['well','formation'])['vsh'].mean().reset_index().rename(columns={'vsh':'vsh_fu'})
    net_fu = dataset[dataset.phit_flag == 1].groupby(['well','formation'])['net'].sum().reset_index().rename(columns={'net':'net_fu'})
    df_merged = khtst_fu.merge(vsh_fu, on=['well', 'formation']).merge(net_fu, on=['well', 'formation'])
    return df_merged
df_merged = fu_metrics_calculation(logs8_ntd_v2)
logs8_ntd_v3 = logs8_ntd_v2.set_index(['well','formation']).join(
    df_merged.set_index(['well','formation'])).reset_index()

In [15]:
logs8_ntd_v3.to_csv(r'C:\jupyter\SPP\inputoutput\general_logs\logs8_ntd_v5.csv', index=False)
logs10_ntd_v2.to_csv(r'C:\jupyter\SPP\inputoutput\general_logs\logs10_ntd_v4.csv', index=False)

# Spotfire logs

In [None]:
spotfire_logs = pd.read_csv(r'C:\jupyter\SPP\inputoutput\general_logs\logs8_ntd_v5.csv')
spotfire_logs.columns

In [None]:
spotfire_logs = spotfire_logs[['well', 'tst','gr_n', 'vsh', 'npss','rhob', 'rdeep','vsh_cube']]
spotfire_logs
# spotfire_logs.to_csv(r'C:\jupyter\SPP\inputoutput\spotfire_logs_test.csv', index=False)

In [20]:
import pandas as pd

# Assuming spotfire_logs is your DataFrame
# Melting the DataFrame
melted_spotfire_logs = pd.melt(spotfire_logs, id_vars=['well', 'tst'], 
                               var_name='measurement', value_name='value')

# Display the melted DataFrame
melted_spotfire_logs.to_csv(r'C:\jupyter\SPP\inputoutput\spotfire_logs_test.csv', index=False)

# Fluids data upload

In [112]:
df_bal8_azr_v4 = pd.read_csv('C:\jupyter\SPP\inputoutput\general_logs\df_bal8_azr_v4.csv').drop('Unnamed: 0', axis=1)
# fluids = pd.read_csv(r'C:\jupyter\SPP\input\fluidcode_sql_tl.csv', names = ['well', 'MD','fluid_code'])
# fluids['fluid_code'] = fluids['fluid_code'].fillna(0)
# fluids8_v2 = df_bal8_azr_v4[df_bal8_azr_v4.phit_flag==1].set_index(['well','MD']).join(fluids.set_index(['well','MD']))

In [113]:
df_bal10_vshclp2_v4 = pd.read_csv('C:\jupyter\SPP\inputoutput\general_logs\df_bal10_vshclp2_v4.csv').drop('Unnamed: 0', axis=1)
# fluids10_v2 = df_bal10_vshclp2_v4[df_bal10_vshclp2_v4.phit_flag==1].set_index(['well','MD']).join(fluids.set_index(['well','MD']))

In [None]:
fluids8_v3 = df_bal8_azr_v4.groupby('well')[['fluid_code','NET']].count().reset_index()
sns.scatterplot(data=fluids8_v3, x='NET', y='fluid_code')
sns.lineplot(x=[0,4000], y=[0,4000], color='red', linestyle='--')

In [None]:
fluids10_v3 = df_bal10_vshclp2_v4.groupby('well')[['fluid_code','NET']].count().reset_index()
sns.scatterplot(data=fluids10_v3[fluids10_v3.NET < 6000], x='NET', y='fluid_code')
sns.lineplot(x=[0,6000], y=[0,6000], color='red', linestyle='--')

# Additional plots

In [None]:
smooth = df_bal8_azr_v4[df_bal8_azr_v4.NET_smooth.notna()]
smooth_gb = smooth.groupby('well')[['NET_smooth','NET_orig']].sum().reset_index()
smooth_gb['NET_smooth'] = smooth_gb['NET_smooth']*0.1
smooth_gb['NET_orig'] = smooth_gb['NET_orig']*0.1
fig, ax = plt.subplots(figsize=(7,7))
sns.scatterplot(data=smooth_gb, x='NET_orig', y='NET_smooth', c='green', s=150, alpha=0.5, ec='black')
sns.lineplot(x=[35,85], y=[35,85], color='red', linestyle='--')
for i in range(smooth_gb.shape[0]):
    ax.text(smooth_gb.NET_orig[i], smooth_gb.NET_smooth[i], smooth_gb.well[i], 
            horizontalalignment='left', size='medium', color='black')
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.title('NET_orig sum vs NET_smooth sum Bal8');

smooth_gb['diff'] = smooth_gb['NET_orig'] - smooth_gb['NET_smooth']
print(smooth_gb['diff'].mean().round(1))
print(smooth_gb.shape)

In [None]:
def well_gr_smooth_display(dataset, formation, var, well):
    well_sel = dataset[(dataset.well == well) & (dataset.FORMATION_up == formation)]
    depth = well_sel['TST']
    grn_smooth = well_sel['VSH_smooth']
    grn = well_sel['VSH']
    phit = well_sel['PHIT']
    net = well_sel[var]
    net_smooth = well_sel['NET_smooth']*100
    # net_smooth = net
    phit_cutoff = [0.13 for i in range(len(depth))]
    fig = go.Figure()
    fig = make_subplots(rows=1, cols=2, subplot_titles=('VSH', 'PHIT'))
    fig.add_trace(go.Scatter(x=grn, y=depth, mode='lines', name='VSH', 
                             line=dict(color='green')), row=1, col=1)
    fig.add_trace(go.Scatter(x=grn_smooth, y=depth, mode='lines', name='VSH', 
                            line=dict(color='lightgreen')), row=1, col=1)
    
    fig.update_xaxes(range = [0,1])
    fig.add_trace(go.Scatter(x=net_smooth, y=depth, mode='lines', name='NET',  
                             line=dict(color='rgba(245, 190, 20, 0.75)'), 
                             fill='tozerox', fillcolor='rgba(245, 190, 20, 0.25)'), row=1, col=1)
    fig.update_xaxes(range = [0, 1], row=1, col=1)

    fig.add_trace(go.Scatter(x=phit, y=depth, mode='lines', name='PHIT', 
                            line = dict(color='firebrick', width=2)), row=1, col=2)
    fig.add_trace(go.Scatter(x=phit_cutoff, y=depth, mode='lines', name='PHIT', 
                            line = dict(color='black', width=2, dash='dash')), row=1, col=2)
    fig.update_xaxes(range = [0.35, 0], row=1, col=2)
    fig.add_trace(go.Scatter(x=net, y=depth, mode='lines', name='NET',  
                             marker=dict(color='rgba(245, 190, 20, 0.75)'), 
                             fill='tozerox', fillcolor='rgba(245, 190, 20, 0.25)'), row=1, col=2)
    
    fig.update_yaxes(autorange="reversed")
    fig.update_layout(height=900, width=400,
                      title=dict(text=well, font=dict(size=24)),
                      showlegend=False,
                      margin=dict(l=10,r=10,b=10,t=50))
    fig.show()
well_gr_smooth_display(df_bal8_azr_v4, 'Balakhany VIII','NET_orig', 'B20')