# Libs

In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from tqdm import tqdm
import segyio
from scipy import ndimage as ndi
from shutil import copyfile
from scipy.ndimage import gaussian_filter

pd.options.display.precision = 3
pd.options.display.float_format = lambda x: '%.5f' % x
pd.options.display.max_columns = 15
pd.options.display.max_rows = 6

# Data

In [None]:
df8 = pd.read_csv(r'C:\jupyter\SPP\inputoutput\general_logs\logs8_ntd_v5.csv')
df8_ntd = pd.read_csv(r'C:\jupyter\SPP\inputoutput\layers\ntd_top_phi_bot8_bp_v4.csv').drop(columns=['Unnamed: 0'])
df10 = pd.read_csv(r'C:\jupyter\SPP\inputoutput\general_logs\logs10_ntd_v4.csv')
df10_ntd = pd.read_csv(r'C:\jupyter\SPP\inputoutput\layers\ntd_top_phi_bot10_bp_v4.csv').drop(columns=['Unnamed: 0'])
df10.columns

# Processing bal8

In [18]:
# 1. make up a set of channel wells to present of them +
# 2. check their properties +
# 3. apply 3-types correlations to vsh-logs +

In [None]:
quantiles = df8_ntd[df8_ntd['htst'] >= 2]['htst'].quantile([0.25, 0.5, 0.75])
quantiles_v2 = pd.DataFrame(quantiles).T
display(quantiles_v2)

channel_cutoff8 = 10
def chanel_data_preproc(df, df_ntd, channel_cutoff, add_wells):
    # df8_ntd_big = df8_ntd[(df8_ntd['htst'] >=channel_cutoff) & ((df8_ntd['top_htst'] <= 0.5) | (df8_ntd['bot_htst'] <= 0.5))]
    df_ntd_big = df_ntd[(df_ntd['htst'] >=channel_cutoff)]
    qnt_layers = df_ntd_big.groupby('well')['well'].count()
    df_ntd_big_v2 = df_ntd_big.set_index('well').join(qnt_layers).rename(columns={'well': 'qnt_layers'}).reset_index()

    well_big = df_ntd_big_v2.well.unique()
    well_big = np.append(well_big, add_wells)

    df_big = df[df['well'].isin(well_big)]
    df_big['vsh_clip'] = 1
    df_big.loc[df_big.net == 1, 'vsh_clip'] = df_big.vsh
    df_big['net_chanel'] = 0
    df_big.loc[df_big.htst_ntd >=channel_cutoff, 'net_chanel'] = 1
    return df_big, df_ntd_big_v2
df8_big, df8_ntd_big_v2 = chanel_data_preproc(df8, df8_ntd, channel_cutoff8, [])

def field_qnt(df):
    fields_qnt = df.groupby('well')['field'].first().reset_index()
    field_df = pd.DataFrame(fields_qnt.value_counts('field')).T
    display(field_df)
field_qnt(df8_big)

# Analyse bal8

In [None]:
def box_plot_channels(df, df_big, channel_cutoff, comment):
    df = df[df.phit_flag == 1]
    df['khtst'] = df['khtst'].round(0).astype(int)
    df_khtst = df.groupby('well')['khtst'].first().reset_index()
    df_khtst['channel_group'] = 'non-channel'
    custom_palette = {'channel': 'orange', 'non-channel': 'green'}
    df_khtst.loc[df_khtst['well'].isin(df_big['well'].unique()), 'channel_group'] = 'channel'
    df_khtst.sort_values('channel_group', inplace=True)
    fig, ax = plt.subplots(1,2, figsize=(15, 6))
    sns.boxplot(data=df_khtst, x='channel_group', y='khtst', hue='channel_group', palette=custom_palette, ax=ax[0])
    ax[0].set_title(comment + 'khtst distribution channel:' + str(channel_cutoff) + 'm')
    ax[0].grid()

    sum_wells = df[df.net == 1].groupby('well')[['net','phit']].sum().reset_index()
    sum_wells = sum_wells.rename(columns={'net': 'net_sum', 'phit': 'phit_sum'})
    sum_wells['net_sum'] = sum_wells['net_sum']*0.1
    sum_wells = sum_wells[sum_wells['net_sum'] >= 10]
    sum_wells['channel_group'] = 'non-channel'
    sum_wells.loc[sum_wells['well'].isin(df_big['well'].unique()), 'channel_group'] = 'channel'
    custom_palette = {'channel': 'orange', 'non-channel': 'green'}
    sns.scatterplot(data=sum_wells, x='net_sum', y='phit_sum', hue='channel_group', palette=custom_palette, ax=ax[1])
    ax[1].set_title(comment + 'net_sum vs phit_sum wells basis')
    ax[1].grid()
    return df_khtst
df8_khtst = box_plot_channels(df8, df8_big, channel_cutoff8, 'bal8 ')
df8_khtst[(df8_khtst.channel_group == 'non-channel') & (df8_khtst.khtst > 15000)].T

In [None]:
def chanel_data_preproc(df, df_ntd, channel_cutoff, add_wells):
    # df8_ntd_big = df8_ntd[(df8_ntd['htst'] >=channel_cutoff) & ((df8_ntd['top_htst'] <= 0.5) | (df8_ntd['bot_htst'] <= 0.5))]
    df_ntd_big = df_ntd[(df_ntd['htst'] >=channel_cutoff)]
    qnt_layers = df_ntd_big.groupby('well')['well'].count()
    df_ntd_big_v2 = df_ntd_big.set_index('well').join(qnt_layers).rename(columns={'well': 'qnt_layers'}).reset_index()

    well_big = df_ntd_big_v2.well.unique()
    well_big = np.append(well_big, add_wells)

    df_big = df[df['well'].isin(well_big)]
    df_big['vsh_clip'] = 1
    df_big.loc[df_big.net == 1, 'vsh_clip'] = df_big.vsh
    df_big['net_chanel'] = 0
    df_big.loc[df_big.htst_ntd >=channel_cutoff, 'net_chanel'] = 1
    return df_big, df_ntd_big_v2
# df8_big, df8_ntd_big_v2 = chanel_data_preproc(df8, df8_ntd, channel_cutoff, ['B01Y', 'B26',	'C16'])
df8_big, df8_ntd_big_v2 = chanel_data_preproc(df8, df8_ntd, channel_cutoff8, ['B01Y', 'B23Z', 'B19'])

def field_qnt(df, df_big, channel_cutoff):
    fields_qnt = df_big.groupby('well')['field'].first().reset_index()
    field_df = pd.DataFrame(fields_qnt.value_counts('field')).T
    display(field_df)
    print(f"wells with chanells up to {channel_cutoff} m: \
{pd.DataFrame(fields_qnt.value_counts('field'))['count'].sum()} out of {len(df['well'].unique())} wells")

field_qnt(df8, df8_big, channel_cutoff8)

In [6]:
df8['channel_presence'] = 'no'
df8.loc[df8.well.isin(df8_big.well.unique()), 'channel_presence'] = 'yes'
df8_full = df8.set_index(['well','tst']).join(
    df8_big[['well','tst','vsh_clip','net_chanel']].set_index(['well','tst'])).reset_index()
df8_full.to_csv(r'C:\jupyter\SPP\output\channels\logs8_full.csv')

# Histo & csv bal8

In [27]:
def channels8_fu_creation(df8_big):
    channels8_fu = df8_big.groupby(['well','net_chanel','formation'])[['tst']].count().reset_index()
    channels8_fu = channels8_fu[channels8_fu.net_chanel == 1]
    channels8_fu.loc[channels8_fu.well.str.startswith('B'), 'field'] = '2_central'
    channels8_fu.loc[channels8_fu.well.str.startswith('C'), 'field'] = '1_west'
    channels8_fu.loc[channels8_fu.well.str.startswith('D'), 'field'] = '3_east'
    channels8_fu['tst'] = channels8_fu['tst']*0.1
    channels8_fu = channels8_fu.rename(columns={'tst': 'htst_channel'})
    xy8 = df8_big.groupby(['well','formation'])[['xmean','ymean']].first()
    channels8_fu = channels8_fu.set_index(['well','formation']).join(xy8).reset_index()

    channels8_fu = channels8_fu[channels8_fu.htst_channel > 2]
    channels8_fu = channels8_fu.rename(columns={'formation': 'channel_fm'})
    if 'channel_fm' not in df8_big.columns:
        pass
    else:
        df8_big = df8_big.set_index('well').join(channels8_fu[['well','channel_fm']].set_index('well')).reset_index()
    return channels8_fu
channels8_fu = channels8_fu_creation(df8_big)
channels8_fu.to_csv(r'C:\jupyter\SPP\output\channels\logs8_big_channels.csv', index=False)

In [None]:
def bar_chart_channels(channels_fu, field_name):
    data = channels_fu[channels_fu.field == field_name].sort_values('channel_fm')
    plt.figure(figsize=(15, 6))
    sns.barplot(data = data, x='well', y='tst', hue='channel_fm', width=1)
    plt.xticks(rotation=45);
    plt.title('Channels in ' + field_name)

bar_chart_channels(channels8_fu, '1_west')
bar_chart_channels(channels8_fu, '2_central')
bar_chart_channels(channels8_fu, '3_east')

# Final plot bal8

In [None]:
def logs_display(df_big, field_sel, gaus_f, channel_cutoff, row=2, col=6, ):
    field = df_big[df_big.field == field_sel]
    fig, ax = plt.subplots(row,col, figsize=(col*2, row*3))
    k = 0
    if row > 1:
        for i in range(row):
            for j in range(col):
                wellname = field.well.unique()[k]
                data = field[field.well == wellname]
                vsh_clip_smoothed = gaussian_filter(data.vsh_clip, sigma=gaus_f)
                ax[i,j].plot(data.vsh_clip, data.tst, c='green', alpha=0.5, lw=2)
                ax[i,j].plot(data.vsh_cube, data.tst, c='black', alpha=1, lw=1, zorder=3)
                ax[i,j].plot(vsh_clip_smoothed, data.tst, c='red', alpha=1, lw=1)
                ax[i,j].plot(data.net_chanel, data.tst, c='orange', alpha=0.5)
                ax[i,j].fill_betweenx(data.tst, 0, data.net_chanel, color='orange', alpha=0.5)
                ax[i,j].set_title(wellname + ' ch:' + str(channel_cutoff) + 'm')
                ax[i,j].invert_yaxis()
                ax[i,j].grid()
                if k == len(field.well.unique())-1:
                    break
                else:
                    k += 1
        plt.tight_layout()
    else:
        for j in range(col):
            wellname = field.well.unique()[k]
            data = field[field.well == wellname]
            vsh_clip_smoothed = gaussian_filter(data.vsh_clip, sigma=gaus_f)
            ax[j].plot(data.vsh_clip, data.tst, c='green', alpha=0.5, lw=2)
            ax[j].plot(vsh_clip_smoothed, data.tst, c='red', alpha=1, lw=1)
            ax[j].plot(data.net_chanel, data.tst, c='orange', alpha=0.5)
            ax[j].fill_betweenx(data.tst, 0, data.net_chanel, color='orange', alpha=0.5)
            ax[j].set_title(wellname  + ' ch:' +  str(channel_cutoff) + 'm')
            ax[j].invert_yaxis()
            ax[j].grid()
            if k == len(field.well.unique())-1:
                break
            else:
                k += 1
        plt.tight_layout()

logs_display(df8_big, '2_CENTRAL AZERI',10, channel_cutoff8, 4, 6)
logs_display(df8_big, '3_EAST AZERI', 10, channel_cutoff8, 3, 6)
logs_display(df8_big, '1_WEST AZERI',10, channel_cutoff8, 2, 6)

In [1]:
# ОЦЕНИТЬ ВКЛАД КАНАЛОВ В ОБЩИЙ KHtst

# Processing bal10

In [None]:
quantiles = df10_ntd[df10_ntd['htst'] >= 2]['htst'].quantile([0.25, 0.5, 0.75])
quantiles_v2 = pd.DataFrame(quantiles).T
display(quantiles_v2)

channel_cutoff10 = 8
def chanel_data_preproc(df, df_ntd, channel_cutoff, add_wells):
    # df8_ntd_big = df8_ntd[(df8_ntd['htst'] >=channel_cutoff) & ((df8_ntd['top_htst'] <= 0.5) | (df8_ntd['bot_htst'] <= 0.5))]
    df_ntd_big = df_ntd[(df_ntd['htst'] >=channel_cutoff)]
    qnt_layers = df_ntd_big.groupby('well')['well'].count()
    df_ntd_big_v2 = df_ntd_big.set_index('well').join(qnt_layers).rename(columns={'well': 'qnt_layers'}).reset_index()

    well_big = df_ntd_big_v2.well.unique()
    well_big = np.append(well_big, add_wells)

    df_big = df[df['well'].isin(well_big)]
    df_big['vsh_clip'] = 1
    df_big.loc[df_big.net == 1, 'vsh_clip'] = df_big.vsh
    df_big['net_chanel'] = 0
    df_big.loc[df_big.htst_ntd >=channel_cutoff, 'net_chanel'] = 1
    return df_big, df_ntd_big_v2
df10_big, df10_ntd_big_v2 = chanel_data_preproc(df10, df10_ntd, channel_cutoff10, [])

def field_qnt(df):
    fields_qnt = df.groupby('well')['field'].first().reset_index()
    field_df = pd.DataFrame(fields_qnt.value_counts('field')).T
    display(field_df)
field_qnt(df10_big)

# Analyse bal10

In [None]:
def box_plot_channels(df, df_big, channel_cutoff, comment):
    df = df[df.phit_flag == 1]
    df['khtst'] = df['khtst'].round(0).astype(int)
    df_khtst = df.groupby('well')['khtst'].first().reset_index()
    df_khtst['channel_group'] = 'non-channel'
    custom_palette = {'channel': 'orange', 'non-channel': 'green'}
    df_khtst.loc[df_khtst['well'].isin(df_big['well'].unique()), 'channel_group'] = 'channel'
    df_khtst.sort_values('channel_group', inplace=True)
    fig, ax = plt.subplots(1,2, figsize=(15, 6))
    sns.boxplot(data=df_khtst, x='channel_group', y='khtst', hue='channel_group', palette=custom_palette, ax=ax[0])
    ax[0].set_title(comment + 'khtst distribution channel:' + str(channel_cutoff) + 'm')
    ax[0].grid()
    sum_wells = df[df.net == 1].groupby('well')[['net','phit']].sum().reset_index()
    sum_wells = sum_wells.rename(columns={'net': 'htst_sum', 'phit': 'phit_sum'})
    sum_wells['htst_sum'] = sum_wells['htst_sum']*0.1
    sum_wells = sum_wells[sum_wells['htst_sum'] >= 10]
    sum_wells['channel_group'] = 'non-channel'
    sum_wells.loc[sum_wells['well'].isin(df_big['well'].unique()), 'channel_group'] = 'channel'
    custom_palette = {'channel': 'orange', 'non-channel': 'green'}
    sum_wells = sum_wells[(sum_wells['phit_sum'] < 100)]
    sns.scatterplot(data=sum_wells, x='htst_sum', y='phit_sum', hue='channel_group', palette=custom_palette, ax=ax[1])
    return df_khtst
df10_khtst = box_plot_channels(df10, df10_big, channel_cutoff10, 'bal10 ')
df10_khtst[(df10_khtst.channel_group == 'non-channel') & (df10_khtst.khtst > 6000)].T

In [None]:
def chanel_data_preproc(df, df_ntd, channel_cutoff, add_wells):
    # df8_ntd_big = df8_ntd[(df8_ntd['htst'] >=channel_cutoff) & ((df8_ntd['top_htst'] <= 0.5) | (df8_ntd['bot_htst'] <= 0.5))]
    df_ntd_big = df_ntd[(df_ntd['htst'] >=channel_cutoff)]
    qnt_layers = df_ntd_big.groupby('well')['well'].count()
    df_ntd_big_v2 = df_ntd_big.set_index('well').join(qnt_layers).rename(columns={'well': 'qnt_layers'}).reset_index()

    well_big = df_ntd_big_v2.well.unique()
    well_big = np.append(well_big, add_wells)

    df_big = df[df['well'].isin(well_big)]
    df_big['vsh_clip'] = 1
    df_big.loc[df_big.net == 1, 'vsh_clip'] = df_big.vsh
    df_big['net_chanel'] = 0
    df_big.loc[df_big.htst_ntd >=channel_cutoff, 'net_chanel'] = 1
    return df_big, df_ntd_big_v2
# df8_big, df8_ntd_big_v2 = chanel_data_preproc(df8, df8_ntd, channel_cutoff, ['B01Y', 'B26',	'C16'])
df10_big, df10_ntd_big_v2 = chanel_data_preproc(df10, df10_ntd, channel_cutoff10, ['E31', 'C16'])

def field_qnt(df, df_big, channel_cutoff):
    fields_qnt = df_big.groupby('well')['field'].first().reset_index()
    field_df = pd.DataFrame(fields_qnt.value_counts('field')).T
    display(field_df)
    print(f"wells with chanells up to {channel_cutoff} m: \
{pd.DataFrame(fields_qnt.value_counts('field'))['count'].sum()} out of {len(df['well'].unique())} wells")

field_qnt(df10, df10_big, channel_cutoff10)

In [33]:
df10['channel_presence'] = 'no'
df10.loc[df10.well.isin(df10_big.well.unique()), 'channel_presence'] = 'yes'
df10_full = df10.set_index(['well','tst']).join(
    df10_big[['well','tst','vsh_clip','net_chanel']].set_index(['well','tst'])).reset_index()
df10_full.to_csv(r'C:\jupyter\SPP\output\channels\logs10_full.csv')

# Histo & csv bal10

In [35]:
def channels10_fu_creation(df10_big):
    channels10_fu = df10_big.groupby(['well','net_chanel','formation'])['tst'].count().reset_index()
    channels10_fu = channels10_fu[channels10_fu.net_chanel == 1]
    channels10_fu['field'] = 'field_name'
    channels10_fu.loc[channels10_fu.well.str.startswith('A'), 'field'] = 'chirag'
    channels10_fu.loc[channels10_fu.well.str.startswith('B'), 'field'] = 'central_az'
    channels10_fu.loc[channels10_fu.well.str.startswith('C'), 'field'] = 'west_az'
    channels10_fu.loc[channels10_fu.well.str.startswith('D'), 'field'] = 'east_az'
    channels10_fu.loc[channels10_fu.well.str.startswith('E'), 'field'] = 'dwg'
    channels10_fu.loc[channels10_fu.well.str.startswith('F'), 'field'] = 'ddgg'
    channels10_fu.loc[channels10_fu.well.str.startswith('J'), 'field'] = 'west_ch'
    channels10_fu['tst'] = channels10_fu['tst']*0.1
    channels10_fu = channels10_fu.rename(columns={'tst': 'htst_channel'})
    xy10 = df10_big.groupby(['well','formation'])[['xmean','ymean']].first()
    channels10_fu = channels10_fu.set_index(['well','formation']).join(xy10).reset_index()

    channels10_fu = channels10_fu[channels10_fu.htst_channel > 2]
    channels10_fu = channels10_fu.rename(columns={'formation': 'channel_fm'})
    if 'channel_fm' not in df10_big.columns:
        pass
    else:
        df10_big = df10_big.set_index('well').join(channels10_fu[['well','channel_fm']].set_index('well')).reset_index()
    return channels10_fu
channels10_fu = channels10_fu_creation(df10_big)
channels10_fu.to_csv(r'C:\jupyter\SPP\output\channels\logs10_big_channels.csv', index=False)

In [None]:
def bar_chart_channels(channels_fu, field_name):
    data = channels_fu[channels_fu.field == field_name].sort_values('channel_fm')
    plt.figure(figsize=(15, 6))
    sns.barplot(data = data, x='well', y='tst', hue='channel_fm', width=1)
    plt.xticks(rotation=45);
    plt.title('Channels in ' + field_name)

bar_chart_channels(channels10_fu, 'west_ch')
bar_chart_channels(channels10_fu, 'chirag')
# bar_chart_channels(channels_fu, '3_east')

# Final plot bal10

In [None]:
def field_qnt(df):
    fields_qnt = df.groupby('well')['field'].first().reset_index()
    field_df = pd.DataFrame(fields_qnt.value_counts('field')).T
    display(field_df)
field_qnt(df10_big)

In [None]:
def logs_display(df_big, field_sel, gaus_f, channel_cutoff, row=2, col=6, ):
    field = df_big[df_big.field == field_sel]
    fig, ax = plt.subplots(row,col, figsize=(col*2, row*3))
    k = 0
    if row > 1:
        for i in range(row):
            for j in range(col):
                wellname = field.well.unique()[k]
                data = field[field.well == wellname]
                vsh_clip_smoothed = gaussian_filter(data.vsh_clip, sigma=gaus_f)
                ax[i,j].plot(data.vsh_clip, data.tst, c='green', alpha=0.5, lw=2)
                ax[i,j].plot(data.vsh_cube, data.tst, c='black', alpha=1, lw=1, zorder=3)
                ax[i,j].plot(vsh_clip_smoothed, data.tst, c='red', alpha=1, lw=1)
                ax[i,j].plot(data.net_chanel, data.tst, c='orange', alpha=0.5)
                ax[i,j].fill_betweenx(data.tst, 0, data.net_chanel, color='orange', alpha=0.5)
                ax[i,j].set_title(wellname + ' ch:' + str(channel_cutoff) + 'm')
                ax[i,j].invert_yaxis()
                ax[i,j].grid()
                if k == len(field.well.unique())-1:
                    break
                else:
                    k += 1
        plt.tight_layout()
    else:
        for j in range(col):
            wellname = field.well.unique()[k]
            data = field[field.well == wellname]
            vsh_clip_smoothed = gaussian_filter(data.vsh_clip, sigma=gaus_f)
            ax[j].plot(data.vsh_clip, data.tst, c='green', alpha=0.5, lw=2)
            ax[j].plot(vsh_clip_smoothed, data.tst, c='red', alpha=1, lw=1)
            ax[j].plot(data.net_chanel, data.tst, c='orange', alpha=0.5)
            ax[j].fill_betweenx(data.tst, 0, data.net_chanel, color='orange', alpha=0.5)
            ax[j].set_title(wellname  + ' ch:' +  str(channel_cutoff) + 'm')
            ax[j].invert_yaxis()
            ax[j].grid()
            if k == len(field.well.unique())-1:
                break
            else:
                k += 1
        plt.tight_layout()

logs_display(df10_big, 'DWG', 10, channel_cutoff10, 3, 6)
logs_display(df10_big, 'DDGG', 10, channel_cutoff10, 2, 6)
logs_display(df10_big, 'WEST CHIRAG', 10, channel_cutoff10, 2, 6)
logs_display(df10_big, 'CHIRAG',10, channel_cutoff10, 2, 6)
logs_display(df10_big, 'EAST AZERI',10, channel_cutoff10, 3, 6)
logs_display(df10_big, 'CENTRAL AZERI',10, channel_cutoff10, 1, 6)
logs_display(df10_big, 'WEST AZERI',10, channel_cutoff10, 2, 6)