# Libs

In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import seaborn as sns
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler, Normalizer, RobustScaler

import xarray as xr
import lexcube
import segyio
import pathlib
from segysak.segy import segy_loader, well_known_byte_locs, segy_writer, segy_header_scrape
import lasio

import glob
import os
import tpot
import joblib
import ast
from scipy.interpolate import CubicSpline
from scipy.spatial import cKDTree

import geopandas as gpd
from shapely.geometry import Point
from scipy.interpolate import interp1d

pd.options.display.precision = 3
pd.options.display.float_format = lambda x: '%.5f' % x
pd.options.display.max_columns = 15
pd.options.display.max_rows = 6

# Well data uploading

In [2]:
df_bal8_v4 = pd.read_csv('C:\jupyter\SPP\inputoutput\general_logs\df_bal8_azr_v4.csv')
df_bal8_v4.columns = df_bal8_v4.columns.str.lower()
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII sand','formation'] = '1_bal8_sand'
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII 25','formation'] = '2_bal8_25'
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII 20','formation'] = '3_bal8_20'
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII 15','formation'] = '4_bal8_15'
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII 10','formation'] = '5_bal8_10'
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII 5','formation'] = '6_bal8_5'
well_phit_flag8 = df_bal8_v4[df_bal8_v4.phit_flag==1].groupby('well')['phit_flag'].apply(lambda x: x.iloc[0]).reset_index().well.unique()
df_bal8_v4_flag = df_bal8_v4[df_bal8_v4.well.isin(well_phit_flag8)]

ntd_top_phi_bot8_bp_v4 = pd.read_csv(r'C:\jupyter\SPP\inputoutput\layers\ntd_top_phi_bot8_bp_v4.csv').drop('Unnamed: 0', axis=1)
ntd_top_phi_bot8_bp_v4.columns = ntd_top_phi_bot8_bp_v4.columns.str.lower()

df_bal10_v4 = pd.read_csv('C:\jupyter\SPP\inputoutput\general_logs\df_bal10_vshclp2_v4.csv')
df_bal10_v4.columns = df_bal10_v4.columns.str.lower()
df_bal10_v4.loc[df_bal10_v4.formation=='Balakhany X sand','formation'] = '1_bal10_sand'
df_bal10_v4.loc[df_bal10_v4.formation=='Balakhany X 50','formation'] = '2_bal10_40'
df_bal10_v4.loc[df_bal10_v4.formation=='Balakhany X 40','formation'] = '2_bal10_40'
df_bal10_v4.loc[df_bal10_v4.formation=='Balakhany X 20','formation'] = '3_bal10_20'
well_phit_flag10 = df_bal10_v4[df_bal10_v4.phit_flag==1].groupby('well')['phit_flag'].apply(lambda x: x.iloc[0]).reset_index().well.unique()
df_bal10_v4_flag = df_bal10_v4[df_bal10_v4.well.isin(well_phit_flag10)]

xy8 = df_bal8_v4_flag[df_bal8_v4_flag.net == 1].groupby('well')[['xmean', 'ymean', 'phit', 'field']].agg({'xmean':'first', 'ymean':'first', 'field':'first', 'phit':'mean'}).reset_index()
xy8 = xy8.rename(columns={'phit':'phit_net_mean'})
xy8 = xy8.round({'xmean':0, 'ymean':0})
xy10 = df_bal10_v4_flag[df_bal10_v4_flag.net == 1].groupby('well')[['xmean', 'ymean', 'phit', 'field']].agg({'xmean':'first', 'ymean':'first', 'field':'first', 'phit':'mean'}).reset_index()
xy10 = xy10.rename(columns={'phit':'phit_net_mean'})
xy10 = xy10.round({'xmean':0, 'ymean':0})

# Well traj display

In [None]:
fig, ax = plt.subplots(figsize=(12, 7))
traj8 = df_bal8_v4_flag[['well','x_traj','y_traj']]
margin = 300
trag8_wellnames =df_bal8_v4_flag.groupby('well')[['xmean','ymean']].first().reset_index()
trag8_wellnames['bottom_left_x'] = trag8_wellnames['xmean'] - margin/2
trag8_wellnames['bottom_left_y'] = trag8_wellnames['ymean'] - margin/2
trag8_wellnames['margin'] = margin
plt.scatter(traj8.x_traj, traj8.y_traj, c='black',s=1)
plt.scatter(trag8_wellnames.xmean, trag8_wellnames.ymean, c='red', s=5)
for idx, txt in enumerate(trag8_wellnames.well):
    ax.annotate(txt, (trag8_wellnames.xmean.iloc[idx], trag8_wellnames.ymean.iloc[idx]), fontsize=6)
for idx, row in trag8_wellnames.iterrows():
    rect = plt.Rectangle((row['bottom_left_x'], row['bottom_left_y']), row['margin'], row['margin'], linewidth=1, edgecolor='b', facecolor='none')
    ax.add_patch(rect)

In [None]:
df_lst = []
for wellname in traj8.well.unique():
    traj8_well = traj8[traj8.well==wellname]
    traj_lenght = ((traj8_well.x_traj.iloc[0] - traj8_well.x_traj.iloc[-1])**2 + (traj8_well.y_traj.iloc[0] - traj8_well.y_traj.iloc[-1])**2)**(1/2)
    df = pd.DataFrame({'well':[wellname], 'traj_lenght':[traj_lenght]})
    df_lst.append(df)
result = pd.concat(df_lst).sort_values('traj_lenght', ascending=False)
result.head(15).round(0).values

# HDOBN_Bal8 uploading

In [None]:
path = 'attr/HDOBN8/'
def list_files_by_mask(directory, mask, exclude_mask=None):
    # Construct the full pattern
    pattern = os.path.join(directory, mask)
    
    # Use glob to get the list of files
    files = glob.glob(pattern)

    # Filter out files that match the exclude_mask
    if exclude_mask:
        exclude_pattern = os.path.join(directory, exclude_mask)
        exclude_files = glob.glob(exclude_pattern)
        files = [file for file in files if file not in exclude_files]
    
    # Extract the relative path of each file
    relative_paths = [os.path.relpath(file, directory) for file in files]
    
    return relative_paths
files = list_files_by_mask(path, '*', '*.xml')
full_files = list_files_by_mask(path, '* 0-80', '*.xml')
full_files, files

In [None]:
#compare current data with nadir maps RMS
path1, path2 = r'C:\Petrel\seismic_data_wv\attr\HDOBN8\RMS amplitude 0-80', r'C:\Petrel\seismic_data_ndr\input\Bal8_RMS'
def comparison_wv_ndr_attr(path1, path2):
    path_wv = pd.read_csv(path1, delimiter=' ', skiprows=20, names=['x', 'y', 'attr_wv', 'column', 'row'])
    path_ndr = pd.read_csv(path2, delimiter=' ', skiprows=20, names=['x', 'y', 'attr_ndr', 'column', 'row'])
    path_xplot_wv_ndr = path_wv.set_index(['x','y']).join(path_ndr.set_index(['x','y']), how='inner', rsuffix='_ndr').reset_index()
    sns.scatterplot(data=path_xplot_wv_ndr, x='attr_wv', y='attr_ndr', alpha=0.5)
    fig, ax = plt.subplots(1,2, figsize=(18, 6))
    ax[0].scatter(path_wv.x, path_wv.y, c = path_wv.attr_wv, cmap='coolwarm', s=1)
    ax[0].set_title(path1)
    ax[1].scatter(path_ndr.x, path_ndr.y, c = path_ndr.attr_ndr, cmap='coolwarm', s=1)
    ax[1].set_title(path2)
comparison_wv_ndr_attr(path1, path2)

In [None]:
#compare current data with nadir maps MA
path1, path2 = r'C:\Petrel\seismic_data_wv\attr\HDOBN8\Minimum amplitude 0-80', r'C:\Petrel\seismic_data_ndr\input\Bal8_MA'
def comparison_wv_ndr_attr(path1, path2):
    path_wv = pd.read_csv(path1, delimiter=' ', skiprows=20, names=['x', 'y', 'attr_wv', 'column', 'row'])
    path_ndr = pd.read_csv(path2, delimiter=' ', skiprows=20, names=['x', 'y', 'attr_ndr', 'column', 'row'])
    path_xplot_wv_ndr = path_wv.set_index(['x','y']).join(path_ndr.set_index(['x','y']), how='inner', rsuffix='_ndr').reset_index()
    sns.scatterplot(data=path_xplot_wv_ndr, x='attr_wv', y='attr_ndr', alpha=0.5)
    fig, ax = plt.subplots(1,2, figsize=(18, 6))
    ax[0].scatter(path_wv.x, path_wv.y, c = path_wv.attr_wv, cmap='coolwarm', s=1)
    ax[0].set_title(path1)
    ax[1].scatter(path_ndr.x, path_ndr.y, c = path_ndr.attr_ndr, cmap='coolwarm', s=1)
    ax[1].set_title(path2)

comparison_wv_ndr_attr(path1, path2)

In [None]:
margin = 300
df_attr_map = []
df_attr_well = []
df_attr_intersect_map = []
for file in full_files: 

    def seism_upload(file, delimiter):
        seismic = pd.read_csv(file, delimiter=delimiter, skiprows=20, names=['x', 'y', 'value', 'column', 'row'])
        # seismic = seismic.round({'x':0, 'y':0})
        return seismic
    seismic_map = seism_upload(path + file, ' ')
    seismic_map['path'] = path
    seismic_map['attr'] = file
    print(f"seismic map <{path + file}> is uploaded")

    def intersection_maps(map, wells_df, buffer):
        geometry_map = [Point(xy) for xy in zip(map['x'], map['y'])]
        gdf_map = gpd.GeoDataFrame(map, geometry=geometry_map)

        geometry_points = [Point(xy) for xy in zip(wells_df['xmean'], wells_df['ymean'])]
        gdf_points = gpd.GeoDataFrame(wells_df, geometry=geometry_points)
        convex_hull = gdf_points.unary_union.convex_hull.buffer(buffer)
        intersection = gdf_map[gdf_map.intersects(convex_hull)]
        return intersection
    seismic_map_intersect = intersection_maps(seismic_map, df_bal8_v4_flag, 1500)
    print(f'seismic map <{path + file}> is intersected with wells')
    df_attr_intersect_map.append(seismic_map_intersect)
    
    def seism_well_correl(seism_map, wells_df, margin, file):
        wells_df['xmean_min'] = wells_df['xmean'] - margin/2
        wells_df['xmean_max'] = wells_df['xmean'] + margin/2
        wells_df['ymean_min'] = wells_df['ymean'] - margin/2
        wells_df['ymean_max'] = wells_df['ymean'] + margin/2
        seism_map_short = seism_map[['x', 'y', 'value']]

        df_lst = []
        df_seism_map_zone_lst = []
        for idx, row in wells_df.iterrows():
            seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                                (seism_map_short['x'] < row['xmean_max']) & 
                                                (seism_map_short['y'] > row['ymean_min']) &
                                                (seism_map_short['y'] < row['ymean_max'])]
            
            mean = seism_map_zone.value.mean()
            p50 = seism_map_zone.value.quantile(0.5)
            p25 = seism_map_zone.value.quantile(0.25)
            p75 = seism_map_zone.value.quantile(0.75)
            df = pd.DataFrame({ 'well':row['well'], 
                                'phit_net_mean':row['phit_net_mean'], 
                                'mean': mean, 
                                'p50': p50, 
                                'p25': p25, 
                                'p75': p75,
                                'xmean':row['xmean'],
                                'ymean':row['ymean'],
                                'xmean_min':row['xmean_min'],
                                'xmean_max':row['xmean_max'],
                                'ymean_min':row['ymean_min'],
                                'ymean_max':row['ymean_max'],
                                'margin':margin,
                                'field':row['field'],
                                'seism_att':file}, index=[0])
            
            df_seism_map_zone = pd.DataFrame(seism_map_zone)
            df_seism_map_zone['well'] = row['well']
            df_lst.append(df)
            df_seism_map_zone_lst.append(df_seism_map_zone)

        result = pd.concat(df_lst).reset_index(drop=True)
        result['bottom_left_x'] = result['xmean'] - margin/2
        result['bottom_left_y'] = result['ymean'] - margin/2
        seism_map_zone = pd.concat(df_seism_map_zone_lst).reset_index(drop=True)
        seism_map_zone['seism_att'] = file
        return result, seism_map_zone
    seismic_wells, seismic_map = seism_well_correl(seismic_map_intersect, xy8, margin, file)
    # seismic_map_intersect.loc[seismic_map_intersect.value <= 1.65, 'value'] = np.nan
    print(f"seismic map <{path + file}> to wells dataset is done")
    df_attr_map.append(seismic_map)
    df_attr_well.append(seismic_wells)

df_attr_well_full = pd.concat(df_attr_well).reset_index(drop=True)
df_attr_map_full = pd.concat(df_attr_map).reset_index(drop=True)
df_attr_intersect_map_full = pd.concat(df_attr_intersect_map).reset_index(drop=True)

fig, ax = plt.subplots(figsize=(15, 7))
cb1 = plt.scatter(seismic_map_intersect['x'], seismic_map_intersect['y'], c=seismic_map_intersect['value'], cmap='coolwarm', s=1)
plt.colorbar(cb1)
plt.scatter(xy8['xmean'], xy8['ymean'], c='black', s=1)
for idx, txt in enumerate(xy8['well']):
    ax.annotate(txt, (xy8['xmean'].iloc[idx], xy8['ymean'].iloc[idx]), fontsize=8)
for idx, row in seismic_wells.iterrows():
    rect = plt.Rectangle((row['bottom_left_x'], row['bottom_left_y']), row['margin'], row['margin'], linewidth=1, edgecolor='b', facecolor='none')
    ax.add_patch(rect)
plt.title(path + file);

In [None]:
fig, ax = plt.subplots(figsize=(15, 7))
seism_map_zone = df_attr_map_full[df_attr_map_full.seism_att == 'RMS amplitude 0-80']
cb1 = plt.scatter(seism_map_zone['x'], seism_map_zone['y'], c=seism_map_zone['value'], cmap='coolwarm', s=1, alpha=0.5)
plt.colorbar(cb1)
plt.scatter(traj8.x_traj, traj8.y_traj, c='black',s=1)
plt.scatter(seismic_wells['xmean'], seismic_wells['ymean'], c='red', s=1)
for idx, row in seismic_wells.iterrows():
    rect = plt.Rectangle((row['bottom_left_x'], row['bottom_left_y']), row['margin'], row['margin'], linewidth=1, edgecolor='b', facecolor='none')
    ax.add_patch(rect)
for idx, txt in enumerate(xy8['well']):
    ax.annotate(txt, (xy8['xmean'].iloc[idx], xy8['ymean'].iloc[idx]), fontsize=8, c='black')
plt.title('Average negative amplitude 0-80');

In [None]:
for att in df_attr_well_full.seism_att.unique():
    seismic_wells = df_attr_well_full[df_attr_well_full.seism_att == att]
    fig, ax = plt.subplots(1, 4, figsize=(18, 4))
    sns.scatterplot(data=seismic_wells, x='phit_net_mean', y='mean', hue='field', ax=ax[0])
    sns.scatterplot(data=seismic_wells, x='phit_net_mean', y='p25', hue='field', ax=ax[2])
    sns.scatterplot(data=seismic_wells, x='phit_net_mean', y='p50', hue='field', ax=ax[1])
    sns.scatterplot(data=seismic_wells, x='phit_net_mean', y='p75', hue='field', ax=ax[3])
    plt.suptitle(f'Seismic {att} vs wells')
    plt.tight_layout()
    plt.show()

## HDOBN_Bal8 layers

In [None]:
# margin = 300
# for file in files[:4]:
#     if ' 0-80' not in file:
#         def seism_upload(file, delimiter):
#             seismic = pd.read_csv(file, delimiter=delimiter, skiprows=20, names=['x', 'y', 'value', 'column', 'row'])
#             # seismic = seismic.round({'x':0, 'y':0})
#             return seismic
#         seismic_map = seism_upload(path + file, ' ')
#         seismic_map['path'] = path
#         seismic_map['attr'] = file
#         print(f"seismic map <{path + file}> is uploaded")
        
#         def intersection_maps(map, wells_df, buffer):
#             geometry_map = [Point(xy) for xy in zip(map['x'], map['y'])]
#             gdf_map = gpd.GeoDataFrame(map, geometry=geometry_map)

#             geometry_points = [Point(xy) for xy in zip(wells_df['xmean'], wells_df['ymean'])]
#             gdf_points = gpd.GeoDataFrame(wells_df, geometry=geometry_points)
#             convex_hull = gdf_points.unary_union.convex_hull.buffer(buffer)
#             intersection = gdf_map[gdf_map.intersects(convex_hull)]
#             return intersection
#         seismic_map_intersect = intersection_maps(seismic_map, df_bal8_v4_flag, 1500)
#         print(f'seismic map <{path + file}> is intersected with wells')

#         def seism_well_correl(seism_map, wells_df, margin, file):
#             wells_df['xmean_min'] = wells_df['xmean'] - margin/2
#             wells_df['xmean_max'] = wells_df['xmean'] + margin/2
#             wells_df['ymean_min'] = wells_df['ymean'] - margin/2
#             wells_df['ymean_max'] = wells_df['ymean'] + margin/2
#             seism_map_short = seism_map[['x', 'y', 'value']]

#             df_lst = []
#             df_seism_map_zone_lst = []
#             for idx, row in wells_df.iterrows():
#                 seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
#                                                     (seism_map_short['x'] < row['xmean_max']) & 
#                                                     (seism_map_short['y'] > row['ymean_min']) &
#                                                     (seism_map_short['y'] < row['ymean_max'])]
#                 seism_map_zone['well'] = row['well']
#                 mean = seism_map_zone.value.mean()
#                 p50 = seism_map_zone.value.quantile(0.5)
#                 p25 = seism_map_zone.value.quantile(0.25)
#                 p75 = seism_map_zone.value.quantile(0.75)
#                 df = pd.DataFrame({'well':row['well'], 
#                                     'phit_net_mean':row['phit_net_mean'], 
#                                     'mean': mean, 
#                                     'p50': p50, 
#                                     'p25': p25, 
#                                     'p75': p75,
#                                     'xmean':row['xmean'],
#                                     'ymean':row['ymean'],
#                                     'xmean_min':row['xmean_min'],
#                                     'xmean_max':row['xmean_max'],
#                                     'ymean_min':row['ymean_min'],
#                                     'ymean_max':row['ymean_max'],
#                                     'margin':margin,
#                                     'seism_att':file}, index=[0])
#                 df_seism_map_zone = pd.DataFrame(seism_map_zone)

#                 df_lst.append(df)
#                 df_seism_map_zone_lst.append(df_seism_map_zone)

#             result = pd.concat(df_lst).reset_index(drop=True)
#             result['bottom_left_x'] = result['xmean'] - margin/2
#             result['bottom_left_y'] = result['ymean'] - margin/2
#             seism_map_zone = pd.concat(df_seism_map_zone_lst).reset_index(drop=True)
            
#             return result, seism_map_zone
#         seismic_wells, seism_map_zone = seism_well_correl(seismic_map_intersect, xy8, margin, file)
#         # seismic_map_intersect.loc[seismic_map_intersect.value <= 1.65, 'value'] = np.nan
#         print(f"seismic map <{path + file}> to wells dataset is done")

#         fig, ax = plt.subplots(figsize=(15, 7))
#         cb1 = plt.scatter(seismic_map_intersect['x'], seismic_map_intersect['y'], c=seismic_map_intersect['value'], cmap='coolwarm', s=1)
#         plt.colorbar(cb1)
#         plt.scatter(xy8['xmean'], xy8['ymean'], c='black', s=1)
#         for idx, txt in enumerate(xy8['well']):
#             ax.annotate(txt, (xy8['xmean'].iloc[idx], xy8['ymean'].iloc[idx]), fontsize=8)
#         for idx, row in seismic_wells.iterrows():
#             rect = plt.Rectangle((row['bottom_left_x'], row['bottom_left_y']), row['margin'], row['margin'], linewidth=1, edgecolor='b', facecolor='none')
#             ax.add_patch(rect)
#         plt.title(path + file);

# Experiments with ML

## collecting df

In [None]:
# path = 'attr/HDOBN8/'
# def list_files_by_mask(directory, mask, exclude_mask=None):
#     # Construct the full pattern
#     pattern = os.path.join(directory, mask)
    
#     # Use glob to get the list of files
#     files = glob.glob(pattern)

#     # Filter out files that match the exclude_mask
#     if exclude_mask:
#         exclude_pattern = os.path.join(directory, exclude_mask)
#         exclude_files = glob.glob(exclude_pattern)
#         files = [file for file in files if file not in exclude_files]
    
#     # Extract the relative path of each file
#     relative_paths = [os.path.relpath(file, directory) for file in files]
    
#     return relative_paths
# files = list_files_by_mask(path, '*', '*.xml')
# full_files = list_files_by_mask(path, '* 0-80', '*.xml')

# margin = 300
# df_attr_map = []
# df_attr_well = []
# df_attr_intersect_map = []

# for file in full_files: 

#     def seism_upload(file, delimiter):
#         seismic = pd.read_csv(file, delimiter=delimiter, skiprows=20, names=['x', 'y', 'value', 'column', 'row'])
#         # seismic = seismic.round({'x':0, 'y':0})
#         return seismic
#     seismic_map = seism_upload(path + file, ' ')
#     seismic_map['path'] = path
#     seismic_map['attr'] = file
#     print(f"seismic map <{path + file}> is uploaded")

#     def intersection_maps(map, wells_df, buffer):
#         geometry_map = [Point(xy) for xy in zip(map['x'], map['y'])]
#         gdf_map = gpd.GeoDataFrame(map, geometry=geometry_map)

#         geometry_points = [Point(xy) for xy in zip(wells_df['xmean'], wells_df['ymean'])]
#         gdf_points = gpd.GeoDataFrame(wells_df, geometry=geometry_points)
#         convex_hull = gdf_points.unary_union.convex_hull.buffer(buffer)
#         intersection = gdf_map[gdf_map.intersects(convex_hull)]
#         return intersection
#     seismic_map_intersect = intersection_maps(seismic_map, df_bal8_v4_flag, 1500)
#     print(f'seismic map <{path + file}> is intersected with wells')
#     df_attr_intersect_map.append(seismic_map_intersect)
    
#     def seism_well_correl(seism_map, wells_df, margin, file):
#         wells_df['xmean_min'] = wells_df['xmean'] - margin/2
#         wells_df['xmean_max'] = wells_df['xmean'] + margin/2
#         wells_df['ymean_min'] = wells_df['ymean'] - margin/2
#         wells_df['ymean_max'] = wells_df['ymean'] + margin/2
#         seism_map_short = seism_map[['x', 'y', 'value']]

#         df_lst = []
#         df_seism_map_zone_lst = []
#         for idx, row in wells_df.iterrows():
#             seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
#                                                 (seism_map_short['x'] < row['xmean_max']) & 
#                                                 (seism_map_short['y'] > row['ymean_min']) &
#                                                 (seism_map_short['y'] < row['ymean_max'])]
            
#             mean = seism_map_zone.value.mean()
#             p50 = seism_map_zone.value.quantile(0.5)
#             p25 = seism_map_zone.value.quantile(0.25)
#             p75 = seism_map_zone.value.quantile(0.75)
#             df = pd.DataFrame({ 'well':row['well'], 
#                                 'phit_net_mean':row['phit_net_mean'], 
#                                 'mean': mean, 
#                                 'p50': p50, 
#                                 'p25': p25, 
#                                 'p75': p75,
#                                 'xmean':row['xmean'],
#                                 'ymean':row['ymean'],
#                                 'xmean_min':row['xmean_min'],
#                                 'xmean_max':row['xmean_max'],
#                                 'ymean_min':row['ymean_min'],
#                                 'ymean_max':row['ymean_max'],
#                                 'margin':margin,
#                                 'field':row['field'],
#                                 'seism_att':file}, index=[0])
            
#             df_seism_map_zone = pd.DataFrame(seism_map_zone)
#             df_seism_map_zone['well'] = row['well']
#             df_lst.append(df)
#             df_seism_map_zone_lst.append(df_seism_map_zone)

#         result = pd.concat(df_lst).reset_index(drop=True)
#         result['bottom_left_x'] = result['xmean'] - margin/2
#         result['bottom_left_y'] = result['ymean'] - margin/2
#         seism_map_zone = pd.concat(df_seism_map_zone_lst).reset_index(drop=True)
#         seism_map_zone['seism_att'] = file
#         return result, seism_map_zone
#     seismic_wells, seismic_map = seism_well_correl(seismic_map_intersect, xy8, margin, file)
#     # seismic_map_intersect.loc[seismic_map_intersect.value <= 1.65, 'value'] = np.nan
#     print(f"seismic map <{path + file}> to wells dataset is done")
#     df_attr_map.append(seismic_map)
#     df_attr_well.append(seismic_wells)

# df_attr_well_full = pd.concat(df_attr_well).reset_index(drop=True)
# df_attr_map_full = pd.concat(df_attr_map).reset_index(drop=True)
# df_attr_intersect_map_full = pd.concat(df_attr_intersect_map).reset_index(drop=True)

# df_attr_well_full.to_csv('io/df_attr_well_full.csv', index=False)
# df_attr_map_full.to_csv('io/df_attr_map_full.csv', index=False)
# df_attr_intersect_map_full.to_csv('io/df_attr_intersect_map_full.csv', index=False)

In [11]:
df_attr_well_full = pd.read_csv('io/df_attr_well_full.csv')
df_attr_map_full = pd.read_csv('io/df_attr_map_full.csv')
df_attr_intersect_map_full = pd.read_csv('io/df_attr_intersect_map_full.csv')

def seism_attr_collection(df_attr_intersect_map_full):
    attr_list = list(df_attr_intersect_map_full.attr.unique())
    df1 = df_attr_intersect_map_full[df_attr_intersect_map_full.attr == attr_list[0]]
    df2 = df_attr_intersect_map_full[df_attr_intersect_map_full.attr == attr_list[1]]
    df3 = df_attr_intersect_map_full[df_attr_intersect_map_full.attr == attr_list[2]]
    df12 = df1.set_index(['x','y']).join(df2.set_index(['x','y']), lsuffix='_1', rsuffix='_2').reset_index()
    df123 = df12.set_index(['x','y']).join(df3.set_index(['x','y']), rsuffix='_3').reset_index()
    df123 = df123.rename(columns={'column':'column_3','value':'value_3', 'row':'row_3', 'path':'path_3', 'attr':'attr_3', 'geometry':'geometry_3'})
    df123 = df123[[ 'x', 'y', 
                    'value_1', 'attr_1',
                    'value_2', 'attr_2',
                    'value_3', 'attr_3']]
    return df123
seismic_map_v1 = seism_attr_collection(df_attr_intersect_map_full)

def seism_tvdss_dip_collection(seismic_map):
    top8 = pd.read_csv('./interv/Top_Bal8_Depth', delimiter=' ', skiprows=20, names=['x', 'y', 'top_tvd_scs', 'column', 'row'])
    top8['top_tvd_scs'] = top8['top_tvd_scs'] * -1
    bot8 = pd.read_csv('./interv/Bot_Bal8_Depth', delimiter=' ', skiprows=20, names=['x', 'y', 'bot_tvd_scs', 'column', 'row'])
    bot8['bot_tvd_scs'] = bot8['bot_tvd_scs'] * -1
    dip8 = pd.read_csv('./interv/Bal8_Dip_Angle', delimiter=' ', skiprows=20, names=['x', 'y', 'dip8', 'column', 'row'])
    seismic_map_v2 = seismic_map.set_index(['x','y']).join(top8.drop(['column', 'row'], axis=1).set_index(['x','y'])).reset_index()
    seismic_map_v3 = seismic_map_v2.set_index(['x','y']).join(bot8.drop(['column', 'row'], axis=1).set_index(['x','y'])).reset_index()
    seismic_map_v3['bal8_thick'] = seismic_map_v3['bot_tvd_scs'] - seismic_map_v3['top_tvd_scs']
    seismic_map_v4 = seismic_map_v3.set_index(['x','y']).join(dip8.set_index(['x','y'])).reset_index().drop(['column', 'row'], axis=1)
    return seismic_map_v4
seismic_map_v2 = seism_tvdss_dip_collection(seismic_map_v1)
seismic_map_v3 = seismic_map_v2[['x', 'y', 'value_1', 'value_2', 'value_3', 'top_tvd_scs', 'bal8_thick', 'dip8']]
seismic_map_v3 = seismic_map_v3.rename(columns={'value_1':'ana0_80', 'value_2':'ma0_80', 'value_3':'rms0_80'})

margin = 300
wells_df = df_attr_well_full[['well', 'phit_net_mean', 'xmean', 'ymean', 'margin', 'field']].drop_duplicates()
def seism_well_preprocessing(seismic_map_v3, df_attr_well_full, margin):
    def seism_well_correl_v2(seism_map, wells_df, margin, value, file):
        wells_df['xmean_min'] = wells_df['xmean'] - margin/2
        wells_df['xmean_max'] = wells_df['xmean'] + margin/2
        wells_df['ymean_min'] = wells_df['ymean'] - margin/2
        wells_df['ymean_max'] = wells_df['ymean'] + margin/2
        seism_map_short = seism_map[['x', 'y', value]]

        df_lst = []
        df_seism_map_zone_lst = []
        for idx, row in wells_df.iterrows():
            seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                                (seism_map_short['x'] < row['xmean_max']) & 
                                                (seism_map_short['y'] > row['ymean_min']) &
                                                (seism_map_short['y'] < row['ymean_max'])]
            
            mean = seism_map_zone[value].mean()
            p50 = seism_map_zone[value].quantile(0.5)
            p25 = seism_map_zone[value].quantile(0.25)
            p75 = seism_map_zone[value].quantile(0.75)
            df = pd.DataFrame({ 'well':row['well'], 
                                'phit_net_mean':row['phit_net_mean'], 
                                value+'_avg': mean, 
                                value+'_p50': p50, 
                                value+'_p25': p25, 
                                value+'_p75': p75,
                                'xmean':row['xmean'],
                                'ymean':row['ymean'],
                                'xmean_min':row['xmean_min'],
                                'xmean_max':row['xmean_max'],
                                'ymean_min':row['ymean_min'],
                                'ymean_max':row['ymean_max'],
                                'margin':margin,
                                'field':row['field'],
                                'seism_att':file}, index=[0])
            
            df_seism_map_zone = pd.DataFrame(seism_map_zone)
            df_seism_map_zone['well'] = row['well']
            df_lst.append(df)
            df_seism_map_zone_lst.append(df_seism_map_zone)

        result = pd.concat(df_lst).reset_index(drop=True)
        result['bottom_left_x'] = result['xmean'] - margin/2
        result['bottom_left_y'] = result['ymean'] - margin/2
        seism_map_zone = pd.concat(df_seism_map_zone_lst).reset_index(drop=True)
        seism_map_zone['seism_att'] = file
        return result, seism_map_zone
    seismic_wells_ana, seismic_square_ana = seism_well_correl_v2(seismic_map_v3, df_attr_well_full, margin, 'ana0_80', 'ana0_80')
    seismic_wells_ma, seismic_square_ma = seism_well_correl_v2(seismic_map_v3, df_attr_well_full, margin, 'ma0_80', 'ma0_80')
    seismic_wells_rms, seismic_square_rms = seism_well_correl_v2(seismic_map_v3, df_attr_well_full, margin, 'rms0_80', 'rms0_80')
    seismic_wells_tvdss, seismic_square_tvdss = seism_well_correl_v2(seismic_map_v3, df_attr_well_full, margin, 'top_tvd_scs', 'top_tvd_scs')
    seismic_wells_thick, seismic_square_thick = seism_well_correl_v2(seismic_map_v3, df_attr_well_full, margin, 'bal8_thick', 'bal8_thick')
    seismic_wells_dip, seismic_square_dip = seism_well_correl_v2(seismic_map_v3, df_attr_well_full, margin, 'dip8', 'dip8')

    drop_list_v1 = ['xmean', 'ymean', 'xmean_min', 'xmean_max', 'ymean_min', 'ymean_max', 'margin', 'field', 'seism_att', 'bottom_left_x', 'bottom_left_y']
    drop_list_v2 = ['well','phit_net_mean']
    seismic_wells_ana = seismic_wells_ana.drop(drop_list_v1, axis=1)
    seismic_wells_ma = seismic_wells_ma.drop(drop_list_v1, axis=1)
    seismic_wells_rms = seismic_wells_rms.drop(drop_list_v1, axis=1)
    seismic_wells_tvdss = seismic_wells_tvdss.drop(drop_list_v1, axis=1)
    seismic_wells_thick = seismic_wells_thick.drop(drop_list_v1, axis=1)

    df_attr_final_init = pd.concat([seismic_wells_ana, 
                                    seismic_wells_ma.drop(drop_list_v2, axis=1), 
                                    seismic_wells_rms.drop(drop_list_v2, axis=1), 
                                    seismic_wells_tvdss.drop(drop_list_v2, axis=1), 
                                    seismic_wells_thick.drop(drop_list_v2, axis=1),
                                    seismic_wells_dip.drop(drop_list_v2, axis=1)], axis=1).reset_index(drop=True)
    df_attr_final_init.columns

    feature_list = ['well','phit_net_mean','xmean','ymean','ana0_80_avg', 'ma0_80_avg', 'rms0_80_avg', 'top_tvd_scs_avg', 'bal8_thick_avg','dip8_avg']
    df_attr_final = df_attr_final_init[feature_list]
    df_well_attr_final = df_attr_final.rename(columns={ 'ana0_80_avg':'ana0_80', 
                                                        'ma0_80_avg':'ma0_80', 
                                                        'rms0_80_avg':'rms0_80', 
                                                        'top_tvd_scs_avg':'top_tvd_scs', 
                                                        'bal8_thick_avg':'bal8_thick',
                                                        'dip8_avg':'dip8',
                                                        'xmean':'x', 'ymean':'y'})
    return df_well_attr_final
df_well_attr_final = seism_well_preprocessing(seismic_map_v3, wells_df, margin)

In [None]:
# df_well_attr_final.iloc[:,:5]
# Generation 20 - Current best internal CV score: 0.41814972520472315
# df_well_attr_final.iloc[:,:6]
# Generation 20 - Current best internal CV score: 0.43406389993742367
# df_well_attr_final.iloc[:,:]
# Generation 20 - Current best internal CV score: 0.4697519194296129
# df_well_attr_final.iloc[:,:] + field
# Generation 12 - Current best internal CV score: 0.42649841178519277
# df_well_attr_final.iloc[:,:] + MinMaxScaler
# Generation 20 - Current best internal CV score: 0.50922249695856
# df_well_attr_final.iloc[:,:].drop('rms0_80', axis=1)
# Generation 12 - Current best internal CV score: 0.4549541785965697
# df_well_attr_final.iloc[:,:].drop('rms0_80', axis=1) + MinMaxScaler to X

## Exercise 1
Just simple prediction with all features w/o minmaxscaling

In [None]:
def train_test_generation(seismic_map, well_data):
    X_test = seismic_map.copy()

    outliers_drop = well_data[~well_data.well.isin(['B39', 'D02Y', 'D34', 'C36', 'B31', 'B20'])] #outliers
    near_neighbor_drop = outliers_drop[~outliers_drop.well.isin(['B01ST1', 'D01', 'C14', 'C01A', 'B06', 'C13Z', 'C06', 'D01Z','C07'])] #very close to each other wells
    validation_wells = ["B02Z", "B05", "B08Z", "B12", "B14Z", "B19", "C01", "C03Z", "C10", "C14Z", "C20Z","D02Z","D03","D04Z","D07"]

    X = near_neighbor_drop.drop('phit_net_mean', axis=1)
    y = near_neighbor_drop['phit_net_mean']
    X_train = X[~X.well.isin(validation_wells)]
    y_train = y[~X.well.isin(validation_wells)]
    X_val = X[X.well.isin(validation_wells)]
    y_val = y[X.well.isin(validation_wells)]
    # X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=142)

    well_train = X_train['well']
    well_val = X_val['well']

    result = {'X_train':X_train.drop('well', axis=1),
              'well_train':well_train, 
              'y_train':y_train, 
              'X_val':X_val.drop('well', axis=1),
              'well_val':well_val, 
              'y_val':y_val, 
              'X_test':X_test, 
              'well_data':well_data}
    return result
result = train_test_generation(seismic_map_v3, df_well_attr_final)
X_train8, y_train8, X_val8,y_val8, X_test8 = result['X_train'], result['y_train'], result['X_val'], result['y_val'], result['X_test']

display(f'X_train8 shape: {X_train8.shape}')
display(X_train8.head(3))
display(f'X_test8 shape: {X_test8.shape}') 
display(X_test8.head(3))

plt.scatter(X_test8.x, X_test8.y, c=X_test8.ana0_80, cmap='coolwarm', s=1)
plt.scatter(X_train8.x, X_train8.y, c='black',  s=10)
plt.scatter(X_val8.x, X_val8.y, c='green',  s=10)

In [None]:
tregr8 = tpot.TPOTRegressor(n_jobs=7, verbosity=2, generations=20, random_state=42, scoring='r2', early_stop=5)
tregr8.fit(X_train8, y_train8)
name = 'ex_1_v2'
joblib.dump(tregr8.fitted_pipeline_, f'./report/models/tregr_{name}.pkl')
tregr8.fitted_pipeline_

In [None]:
def xtest_xval_prediction(X_test, X_val8, y_val8, X_train8, y_train8, model_name):
    model = joblib.load(f'./report/models/tregr_{model_name}.pkl')

    y_pred_train = model.predict(X_train8)
    train_df = pd.DataFrame({'x': X_train8.iloc[:,0], 'y': X_train8.iloc[:,1],'phit_pred': y_pred_train, 'phit_true':y_train8})
    
    y_pred_val = model.predict(X_val8)
    val_df = pd.DataFrame({'x': X_val8.iloc[:,0], 'y': X_val8.iloc[:,1],'phit_pred': y_pred_val, 'phit_true':y_val8})

    y_pred = model.predict(X_test)
    model_df = pd.DataFrame({'x': X_test.iloc[:,0], 'y': X_test.iloc[:,1],'phit_pred': y_pred})
    return model_df, val_df, train_df
model_df8, val_df8, train_df8 = xtest_xval_prediction(X_test8, X_val8, y_val8, X_train8, y_train8, 'ex_1')

rng = 0.0115
def validation_qc(val, train, rng):
    val['qc_up'] = val['phit_true']  + rng
    val['qc_down'] = val['phit_true'] - rng
    val['qc'] = 'norm'
    val.loc[val['phit_pred'] >= val['qc_up'], 'qc'] = 'bigger'
    val.loc[val['phit_pred'] <= val['qc_down'], 'qc'] = 'lower'
    val['rng'] = rng
    display(val.qc.value_counts(normalize=True))
    display(val.qc.value_counts())

    train['qc_up'] = train['phit_true']  + rng
    train['qc_down'] = train['phit_true'] - rng
    train['qc'] = 'norm'
    train.loc[train['phit_pred'] >= train['qc_up'], 'qc'] = 'bigger'
    train.loc[train['phit_pred'] <= train['qc_down'], 'qc'] = 'lower'
    train['rng'] = rng

    return val, train
val_df8, train_df8 = validation_qc(val_df8, train_df8, rng)

def display_xplot_map(val_df, model_df, rng):
    fig = plt.figure(figsize=(16, 4))
    gs = GridSpec(1, 3, figure=fig)
    ax1 = fig.add_subplot(gs[0, 0])
    sns.scatterplot(data=val_df, x='phit_true', y='phit_pred', c='#1190e6', s=30, alpha=0.5, ec='black', ax=ax1)
    sns.lineplot(x=[0.18, 0.28], y=[0.18, 0.28], c='r', ls='--', ax=ax1)
    sns.lineplot(x=[0.18, 0.28], y=[0.18+rng, 0.28+rng], c='g', ls='--', ax=ax1)
    sns.lineplot(x=[0.18, 0.28], y=[0.18-rng, 0.28-rng], c='g', ls='--', ax=ax1)
    ax1.grid()
    ax2 = fig.add_subplot(gs[0, 1:])
    cb1 = ax2.scatter(model_df['x'], model_df['y'], c=model_df['phit_pred'], cmap='coolwarm', s=1)
    ax2.scatter(val_df.x, val_df.y, c='green', s=10)
    colorbar1 = plt.colorbar(cb1)
display_xplot_map(val_df8,model_df8, rng)

In [None]:
train_df8
sns.scatterplot(data=train_df8, x='phit_true', y='phit_pred', c='blue', s=30, alpha=0.5, ec='black')
sns.lineplot(x=[0.18, 0.28], y=[0.18, 0.28], c='r', ls='--')
sns.lineplot(x=[0.18, 0.28], y=[0.18+rng, 0.28+rng], c='g', ls='--')
sns.lineplot(x=[0.18, 0.28], y=[0.18-rng, 0.28-rng], c='g', ls='--')
display(train_df8.qc.value_counts(normalize=True))
display(train_df8.qc.value_counts())
plt.grid()

In [43]:
well_val = result['well_val']
well_val_out = val_df8[val_df8.qc != 'norm'].join(well_val, how='inner')
val_df8_well = val_df8.join(well_val, how='inner')

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(model_df8['x'], model_df8['y'], c=model_df8['phit_pred'], cmap='coolwarm', s=1)
plt.scatter(val_df8_well[~val_df8_well.well.isin(well_val_out.well)].x, 
            val_df8_well[~val_df8_well.well.isin(well_val_out.well)].y, c='green', s=10)
plt.scatter(well_val_out.x, well_val_out.y, c='red', s=10)
for idx, txt in enumerate(well_val_out.well):
    plt.annotate(txt, (well_val_out.x.iloc[idx], well_val_out.y.iloc[idx]), fontsize=8)

In [None]:
fig = plt.figure(figsize=(7, 5))
sns.scatterplot(data=val_df8_well, x='phit_true', y='phit_pred', c='#1190e6', s=50, alpha=0.5, ec='black')
sns.lineplot(x=[0.18, 0.28], y=[0.18, 0.28], c='r', ls='--')
sns.lineplot(x=[0.18, 0.28], y=[0.18+rng, 0.28+rng], c='g', ls='--')
sns.lineplot(x=[0.18, 0.28], y=[0.18-rng, 0.28-rng], c='g', ls='--')
for idx, txt in enumerate(well_val_out.well):
    plt.annotate(txt, (well_val_out.phit_true.iloc[idx], well_val_out.phit_pred.iloc[idx]), fontsize=8)
plt.grid()

In [None]:
import gempy as gp
import gempy_viewer as gpv

data_path = 'https://raw.githubusercontent.com/cgre-aachen/gempy_data/master/'

geo_model: gp.data.GeoModel = gp.create_geomodel(
    project_name='Tutorial_ch1_1_Basics',
    extent=[0, 2000, 0, 2000, 0, 750],
    refinement=4,  # * Here we define the number of octree levels. If octree levels are defined, the resolution is ignored.
    importer_helper=gp.data.ImporterHelper(
        path_to_orientations=data_path + "/data/input_data/getting_started/simple_fault_model_orientations.csv",
        path_to_surface_points=data_path + "/data/input_data/getting_started/simple_fault_model_points.csv",
        hash_surface_points="4cdd54cd510cf345a583610585f2206a2936a05faaae05595b61febfc0191563",
        hash_orientations="7ba1de060fc8df668d411d0207a326bc94a6cdca9f5fe2ed511fd4db6b3f3526"
    )
)

## Exercise 2
Prediction is based on 1 selected seism attr

In [None]:
def train_test_generation(seismic_map, well_data):
    X_test = seismic_map.copy()

    outliers_drop = well_data[~well_data.well.isin(['B39', 'D02Y', 'D34', 'C36', 'B31', 'B20'])] #outliers
    near_neighbor_drop = outliers_drop[~outliers_drop.well.isin(['B01ST1', 'D01', 'C14', 'C01A', 'B06', 'C13Z', 'C06', 'D01Z','C07'])] #very close to each other wells
    validation_wells = ["B02Z", "B05", "B08Z", "B12", "B14Z", "B19", "C01", "C03Z", "C10", "C14Z", "C20Z","D02Z","D03","D04Z","D07"]

    X = near_neighbor_drop.drop('phit_net_mean', axis=1)
    y = near_neighbor_drop['phit_net_mean']
    X_train = X[~X.well.isin(validation_wells)]
    y_train = y[~X.well.isin(validation_wells)]
    X_val = X[X.well.isin(validation_wells)]
    y_val = y[X.well.isin(validation_wells)]
    # X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=142)

    well_train = X_train['well']
    well_val = X_val['well']

    result = {'X_train':X_train.drop('well', axis=1),
              'well_train':well_train, 
              'y_train':y_train, 
              'X_val':X_val.drop('well', axis=1),
              'well_val':well_val, 
              'y_val':y_val, 
              'X_test':X_test, 
              'well_data':well_data}
    return result
df_well_attr_final_ex2 = df_well_attr_final.iloc[:,:5]
seismic_map_v3_ex2 = seismic_map_v3.iloc[:,:3]
result = train_test_generation(seismic_map_v3_ex2, df_well_attr_final_ex2)
X_train8, y_train8, X_val8,y_val8, X_test8 = result['X_train'], result['y_train'], result['X_val'], result['y_val'], result['X_test']

display(f'X_train8 shape: {X_train8.shape}')
display(X_train8.head(3))
display(f'X_test8 shape: {X_test8.shape}') 
display(X_test8.head(3))

In [None]:
tregr8 = tpot.TPOTRegressor(n_jobs=7, verbosity=2, generations=20, random_state=42, scoring='r2', early_stop=5)
tregr8.fit(X_train8, y_train8)
name = 'ex_2'
joblib.dump(tregr8.fitted_pipeline_, f'./report/models/tregr_{name}.pkl')
tregr8.fitted_pipeline_

In [None]:
def xtest_xval_prediction(X_test, X_val8, y_val8, X_train8, y_train8, model_name):
    model = joblib.load(f'./report/models/tregr_{model_name}.pkl')

    y_pred_train = model.predict(X_train8)
    train_df = pd.DataFrame({'x': X_train8.iloc[:,0], 'y': X_train8.iloc[:,1],'phit_pred': y_pred_train, 'phit_true':y_train8})
    
    y_pred_val = model.predict(X_val8)
    val_df = pd.DataFrame({'x': X_val8.iloc[:,0], 'y': X_val8.iloc[:,1],'phit_pred': y_pred_val, 'phit_true':y_val8})

    y_pred = model.predict(X_test)
    model_df = pd.DataFrame({'x': X_test.iloc[:,0], 'y': X_test.iloc[:,1],'phit_pred': y_pred})
    return model_df, val_df, train_df
model_df8, val_df8, train_df8 = xtest_xval_prediction(X_test8, X_val8, y_val8, X_train8, y_train8, 'ex_2')

rng = 0.0115
def validation_qc(val, train, rng):
    val['qc_up'] = val['phit_true']  + rng
    val['qc_down'] = val['phit_true'] - rng
    val['qc'] = 'norm'
    val.loc[val['phit_pred'] >= val['qc_up'], 'qc'] = 'bigger'
    val.loc[val['phit_pred'] <= val['qc_down'], 'qc'] = 'lower'
    val['rng'] = rng
    display(val.qc.value_counts(normalize=True))
    display(val.qc.value_counts())

    train['qc_up'] = train['phit_true']  + rng
    train['qc_down'] = train['phit_true'] - rng
    train['qc'] = 'norm'
    train.loc[train['phit_pred'] >= train['qc_up'], 'qc'] = 'bigger'
    train.loc[train['phit_pred'] <= train['qc_down'], 'qc'] = 'lower'
    train['rng'] = rng

    return val, train
val_df8, train_df8 = validation_qc(val_df8, train_df8, rng)

def display_xplot_map(val_df, model_df, rng):
    fig = plt.figure(figsize=(16, 4))
    gs = GridSpec(1, 3, figure=fig)
    ax1 = fig.add_subplot(gs[0, 0])
    sns.scatterplot(data=val_df, x='phit_true', y='phit_pred', c='#1190e6', s=30, alpha=0.5, ec='black', ax=ax1)
    sns.lineplot(x=[0.18, 0.28], y=[0.18, 0.28], c='r', ls='--', ax=ax1)
    sns.lineplot(x=[0.18, 0.28], y=[0.18+rng, 0.28+rng], c='g', ls='--', ax=ax1)
    sns.lineplot(x=[0.18, 0.28], y=[0.18-rng, 0.28-rng], c='g', ls='--', ax=ax1)
    ax1.grid()
    ax2 = fig.add_subplot(gs[0, 1:])
    cb1 = ax2.scatter(model_df['x'], model_df['y'], c=model_df['phit_pred'], cmap='coolwarm', s=1)
    ax2.scatter(val_df.x, val_df.y, c='green', s=10)
    colorbar1 = plt.colorbar(cb1)
display_xplot_map(val_df8,model_df8, rng)

In [None]:
train_df8
sns.scatterplot(data=train_df8, x='phit_true', y='phit_pred', c='blue', s=30, alpha=0.5, ec='black')
sns.lineplot(x=[0.18, 0.28], y=[0.18, 0.28], c='r', ls='--')
sns.lineplot(x=[0.18, 0.28], y=[0.18+rng, 0.28+rng], c='g', ls='--')
sns.lineplot(x=[0.18, 0.28], y=[0.18-rng, 0.28-rng], c='g', ls='--')
display(train_df8.qc.value_counts(normalize=True))
display(train_df8.qc.value_counts())
plt.grid()

## Exersice 3
Only 3 seismic attributes are the base for phit prediction

In [None]:
def train_test_generation(seismic_map, well_data):
    X_test = seismic_map.copy()

    outliers_drop = well_data[~well_data.well.isin(['B39', 'D02Y', 'D34', 'C36', 'B31', 'B20'])] #outliers
    near_neighbor_drop = outliers_drop[~outliers_drop.well.isin(['B01ST1', 'D01', 'C14', 'C01A', 'B06', 'C13Z', 'C06', 'D01Z','C07'])] #very close to each other wells
    validation_wells = ["B02Z", "B05", "B08Z", "B12", "B14Z", "B19", "C01", "C03Z", "C10", "C14Z", "C20Z","D02Z","D03","D04Z","D07"]

    X = near_neighbor_drop.drop('phit_net_mean', axis=1)
    y = near_neighbor_drop['phit_net_mean']
    X_train = X[~X.well.isin(validation_wells)]
    y_train = y[~X.well.isin(validation_wells)]
    X_val = X[X.well.isin(validation_wells)]
    y_val = y[X.well.isin(validation_wells)]
    # X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=142)

    well_train = X_train['well']
    well_val = X_val['well']

    result = {'X_train':X_train.drop('well', axis=1),
              'well_train':well_train, 
              'y_train':y_train, 
              'X_val':X_val.drop('well', axis=1),
              'well_val':well_val, 
              'y_val':y_val, 
              'X_test':X_test, 
              'well_data':well_data}
    return result
df_well_attr_final_ex3 = df_well_attr_final[['well', 'phit_net_mean', 'x', 'y', 'ana0_80', 'ma0_80', 'rms0_80']]
seismic_map_v3_ex3 = seismic_map_v3[['x', 'y', 'ana0_80', 'ma0_80', 'rms0_80']]
result = train_test_generation(seismic_map_v3_ex3, df_well_attr_final_ex3)
X_train8, y_train8, X_val8,y_val8, X_test8 = result['X_train'], result['y_train'], result['X_val'], result['y_val'], result['X_test']

display(f'X_train8 shape: {X_train8.shape}')
display(X_train8.head(3))
display(f'X_test8 shape: {X_test8.shape}') 
display(X_test8.head(3))

In [None]:
tregr8 = tpot.TPOTRegressor(n_jobs=7, verbosity=2, generations=20, random_state=42, scoring='r2', early_stop=5)
tregr8.fit(X_train8, y_train8)
name = 'ex_3'
joblib.dump(tregr8.fitted_pipeline_, f'./report/models/tregr_{name}.pkl')
tregr8.fitted_pipeline_

In [None]:
def xtest_xval_prediction(X_test, X_val8, y_val8, X_train8, y_train8, model_name):
    model = joblib.load(f'./report/models/tregr_{model_name}.pkl')

    y_pred_train = model.predict(X_train8)
    train_df = pd.DataFrame({'x': X_train8.iloc[:,0], 'y': X_train8.iloc[:,1],'phit_pred': y_pred_train, 'phit_true':y_train8})
    
    y_pred_val = model.predict(X_val8)
    val_df = pd.DataFrame({'x': X_val8.iloc[:,0], 'y': X_val8.iloc[:,1],'phit_pred': y_pred_val, 'phit_true':y_val8})

    y_pred = model.predict(X_test)
    model_df = pd.DataFrame({'x': X_test.iloc[:,0], 'y': X_test.iloc[:,1],'phit_pred': y_pred})
    return model_df, val_df, train_df
model_df8, val_df8, train_df8 = xtest_xval_prediction(X_test8, X_val8, y_val8, X_train8, y_train8, 'ex_3')

rng = 0.0115
def validation_qc(val, train, rng):
    val['qc_up'] = val['phit_true']  + rng
    val['qc_down'] = val['phit_true'] - rng
    val['qc'] = 'norm'
    val.loc[val['phit_pred'] >= val['qc_up'], 'qc'] = 'bigger'
    val.loc[val['phit_pred'] <= val['qc_down'], 'qc'] = 'lower'
    val['rng'] = rng
    display(val.qc.value_counts(normalize=True))
    display(val.qc.value_counts())

    train['qc_up'] = train['phit_true']  + rng
    train['qc_down'] = train['phit_true'] - rng
    train['qc'] = 'norm'
    train.loc[train['phit_pred'] >= train['qc_up'], 'qc'] = 'bigger'
    train.loc[train['phit_pred'] <= train['qc_down'], 'qc'] = 'lower'
    train['rng'] = rng

    return val, train
val_df8, train_df8 = validation_qc(val_df8, train_df8, rng)

def display_xplot_map(val_df, model_df, rng):
    fig = plt.figure(figsize=(16, 4))
    gs = GridSpec(1, 3, figure=fig)
    ax1 = fig.add_subplot(gs[0, 0])
    sns.scatterplot(data=val_df, x='phit_true', y='phit_pred', c='#1190e6', s=30, alpha=0.5, ec='black', ax=ax1)
    sns.lineplot(x=[0.18, 0.28], y=[0.18, 0.28], c='r', ls='--', ax=ax1)
    sns.lineplot(x=[0.18, 0.28], y=[0.18+rng, 0.28+rng], c='g', ls='--', ax=ax1)
    sns.lineplot(x=[0.18, 0.28], y=[0.18-rng, 0.28-rng], c='g', ls='--', ax=ax1)
    ax1.grid()
    ax2 = fig.add_subplot(gs[0, 1:])
    cb1 = ax2.scatter(model_df['x'], model_df['y'], c=model_df['phit_pred'], cmap='coolwarm', s=1)
    ax2.scatter(val_df.x, val_df.y, c='green', s=10)
    colorbar1 = plt.colorbar(cb1)
display_xplot_map(val_df8,model_df8, rng)

In [None]:
train_df8
sns.scatterplot(data=train_df8, x='phit_true', y='phit_pred', c='blue', s=30, alpha=0.5, ec='black')
sns.lineplot(x=[0.18, 0.28], y=[0.18, 0.28], c='r', ls='--')
sns.lineplot(x=[0.18, 0.28], y=[0.18+rng, 0.28+rng], c='g', ls='--')
sns.lineplot(x=[0.18, 0.28], y=[0.18-rng, 0.28-rng], c='g', ls='--')
display(train_df8.qc.value_counts(normalize=True))
display(train_df8.qc.value_counts())
plt.grid()

## Exercise 4

In [None]:
def train_test_generation(seismic_map, well_data):
    X_test = seismic_map.copy()

    outliers_drop = well_data[~well_data.well.isin(['B39', 'D02Y', 'D34', 'C36', 'B31', 'B20'])] #outliers
    near_neighbor_drop = outliers_drop[~outliers_drop.well.isin(['B01ST1', 'D01', 'C14', 'C01A', 'B06', 'C13Z', 'C06', 'D01Z','C07'])] #very close to each other wells
    validation_wells = ["B02Z", "B05", "B08Z", "B12", "B14Z", "B19", "C01", "C03Z", "C10", "C14Z", "C20Z","D02Z","D03","D04Z","D07"]

    X = near_neighbor_drop.drop('phit_net_mean', axis=1)
    y = near_neighbor_drop['phit_net_mean']
    X_train = X[~X.well.isin(validation_wells)]
    y_train = y[~X.well.isin(validation_wells)]
    X_val = X[X.well.isin(validation_wells)]
    y_val = y[X.well.isin(validation_wells)]
    # X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=142)

    well_train = X_train['well']
    well_val = X_val['well']

    result = {'X_train':X_train.drop('well', axis=1),
              'well_train':well_train, 
              'y_train':y_train, 
              'X_val':X_val.drop('well', axis=1),
              'well_val':well_val, 
              'y_val':y_val, 
              'X_test':X_test, 
              'well_data':well_data}
    return result
df_well_attr_final_ex4 = df_well_attr_final[['well', 'phit_net_mean', 'x', 'y', 'top_tvd_scs', 'bal8_thick', 'dip8']]
seismic_map_v3_ex4 = seismic_map_v3[['x', 'y', 'top_tvd_scs', 'bal8_thick', 'dip8']]
result = train_test_generation(seismic_map_v3_ex4, df_well_attr_final_ex4)
X_train8, y_train8, X_val8,y_val8, X_test8 = result['X_train'], result['y_train'], result['X_val'], result['y_val'], result['X_test']

display(f'X_train8 shape: {X_train8.shape}')
display(X_train8.head(3))
display(f'X_test8 shape: {X_test8.shape}') 
display(X_test8.head(3))

In [None]:
tregr8 = tpot.TPOTRegressor(n_jobs=7, verbosity=2, generations=20, random_state=42, scoring='r2', early_stop=5)
tregr8.fit(X_train8, y_train8)
name = 'ex_4'
joblib.dump(tregr8.fitted_pipeline_, f'./report/models/tregr_{name}.pkl')
tregr8.fitted_pipeline_

In [None]:
def xtest_xval_prediction(X_test, X_val8, y_val8, X_train8, y_train8, model_name):
    model = joblib.load(f'./report/models/tregr_{model_name}.pkl')

    y_pred_train = model.predict(X_train8)
    train_df = pd.DataFrame({'x': X_train8.iloc[:,0], 'y': X_train8.iloc[:,1],'phit_pred': y_pred_train, 'phit_true':y_train8})
    
    y_pred_val = model.predict(X_val8)
    val_df = pd.DataFrame({'x': X_val8.iloc[:,0], 'y': X_val8.iloc[:,1],'phit_pred': y_pred_val, 'phit_true':y_val8})

    y_pred = model.predict(X_test)
    model_df = pd.DataFrame({'x': X_test.iloc[:,0], 'y': X_test.iloc[:,1],'phit_pred': y_pred})
    return model_df, val_df, train_df
model_df8, val_df8, train_df8 = xtest_xval_prediction(X_test8, X_val8, y_val8, X_train8, y_train8, 'ex_4')

rng = 0.0115
def validation_qc(val, train, rng):
    val['qc_up'] = val['phit_true']  + rng
    val['qc_down'] = val['phit_true'] - rng
    val['qc'] = 'norm'
    val.loc[val['phit_pred'] >= val['qc_up'], 'qc'] = 'bigger'
    val.loc[val['phit_pred'] <= val['qc_down'], 'qc'] = 'lower'
    val['rng'] = rng
    display(val.qc.value_counts(normalize=True))
    display(val.qc.value_counts())

    train['qc_up'] = train['phit_true']  + rng
    train['qc_down'] = train['phit_true'] - rng
    train['qc'] = 'norm'
    train.loc[train['phit_pred'] >= train['qc_up'], 'qc'] = 'bigger'
    train.loc[train['phit_pred'] <= train['qc_down'], 'qc'] = 'lower'
    train['rng'] = rng

    return val, train
val_df8, train_df8 = validation_qc(val_df8, train_df8, rng)

def display_xplot_map(val_df, model_df, rng):
    fig = plt.figure(figsize=(16, 4))
    gs = GridSpec(1, 3, figure=fig)
    ax1 = fig.add_subplot(gs[0, 0])
    sns.scatterplot(data=val_df, x='phit_true', y='phit_pred', c='#1190e6', s=30, alpha=0.5, ec='black', ax=ax1)
    sns.lineplot(x=[0.18, 0.28], y=[0.18, 0.28], c='r', ls='--', ax=ax1)
    sns.lineplot(x=[0.18, 0.28], y=[0.18+rng, 0.28+rng], c='g', ls='--', ax=ax1)
    sns.lineplot(x=[0.18, 0.28], y=[0.18-rng, 0.28-rng], c='g', ls='--', ax=ax1)
    ax1.grid()
    ax2 = fig.add_subplot(gs[0, 1:])
    cb1 = ax2.scatter(model_df['x'], model_df['y'], c=model_df['phit_pred'], cmap='coolwarm', s=1)
    ax2.scatter(val_df.x, val_df.y, c='green', s=10)
    colorbar1 = plt.colorbar(cb1)
display_xplot_map(val_df8,model_df8, rng)

In [None]:
sns.scatterplot(data=train_df8, x='phit_true', y='phit_pred', c='blue', s=30, alpha=0.5, ec='black')
sns.lineplot(x=[0.18, 0.28], y=[0.18, 0.28], c='r', ls='--')
sns.lineplot(x=[0.18, 0.28], y=[0.18+rng, 0.28+rng], c='g', ls='--')
sns.lineplot(x=[0.18, 0.28], y=[0.18-rng, 0.28-rng], c='g', ls='--')
display(train_df8.qc.value_counts(normalize=True))
display(train_df8.qc.value_counts())
plt.grid()

## cross-validation

In [None]:
well_data = df_well_attr_final
outliers = ['B39', 'D02Y', 'D34', 'C36', 'B31', 'B20']
near_neighbor = ['B01ST1', 'D01', 'C14', 'C01A', 'B06', 'C13Z', 'C06', 'D01Z','C07']
contouring_wells = [    "B01Y",	"B02",	"B10",	"B21",	"B27Z",	"B34",	"B34Z",	
                        "B37",	"B39",	"B40",	"B42Z",	"B43",	"B44",	"C02",	
                        "C08Z",	"C21Z",	"C27Y",	"C32",	"C33Z",	"C36",	"C39",	
                        "C40",	"C42",	"C43",	"D09Z",	"D12Z",	"D13Y",	"D15",	
                        "D19Z",	"D20",	"D23X",	"D27",	"D34",	"D37"]
drop_wells_list = set(outliers + near_neighbor + contouring_wells)
well_data_v2 = well_data[~well_data.well.isin(drop_wells_list)]
well_data_cont_wells = well_data[well_data.well.isin(contouring_wells)]

X = well_data_v2.drop('phit_net_mean', axis=1)
y = well_data_v2['phit_net_mean']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.27, random_state=8, shuffle=True)
plt.scatter(X_train.x, X_train.y, c='red',  s=20)
plt.scatter(well_data_cont_wells.x, well_data_cont_wells.y, c='blue',  s=20, marker='x')
plt.scatter(X_val.x, X_val.y, c='green',  s=20, marker='d')

In [None]:
for num in [97, 6, 82, 79, 52, 19]:
    def validation_wells_selection(well_data, num):
        outliers = ['B39', 'D02Y', 'D34', 'C36', 'B31', 'B20']
        near_neighbor = ['B01ST1', 'D01', 'C14', 'C01A', 'B06', 'C13Z', 'C06', 'D01Z','C07']
        contouring_wells = [    "B01Y",	"B02",	"B10",	"B21",	"B27Z",	"B34",	"B34Z",	
                                "B37",	"B39",	"B40",	"B42Z",	"B43",	"B44",	"C02",	
                                "C08Z",	"C21Z",	"C27Y",	"C32",	"C33Z",	"C36",	"C39",	
                                "C40",	"C42",	"C43",	"D09Z",	"D12Z",	"D13Y",	"D15",	
                                "D19Z",	"D20",	"D23X",	"D27",	"D34",	"D37"]
        drop_wells_list = set(outliers + near_neighbor + contouring_wells)

        well_data_v2 = well_data[~well_data.well.isin(drop_wells_list)]
        data_cont_wells = well_data[well_data.well.isin(contouring_wells)]

        X = well_data_v2
        y = well_data_v2['phit_net_mean']
        data_train, data_val, y_train, y_val = train_test_split(X, y, test_size=0.27, random_state=num, shuffle=True)
        data_train_final = pd.concat([data_train, data_cont_wells])

        return data_train_final, data_val
    data_train_final, data_val = validation_wells_selection(df_well_attr_final, num)
    data_train_final.to_csv(f'./report/models_cv/data_train_{num}.csv', index=False)
    data_val.to_csv(f'./report/models_cv/data_val_{num}.csv', index=False)
    
    X_train8 = data_train_final.drop(['well','phit_net_mean'], axis=1)
    y_train8 = data_train_final['phit_net_mean']
    tregr8 = tpot.TPOTRegressor(n_jobs=7, verbosity=2, generations=20, random_state=42, scoring='r2', early_stop=5)
    tregr8.fit(X_train8, y_train8)
    joblib.dump(tregr8.fitted_pipeline_, f'./report/models_cv/tregr_{num}.pkl')
    tregr8.fitted_pipeline_


In [None]:
train_val_lst = []
model_df_lst = []
map_phit_lst = []
for num in [97, 6, 82, 79, 52, 19]:
    print('*'*25)
    print(f'model num: {num}')
    print('*'*25)    
    X_test8 = seismic_map_v3
    data_train = pd.read_csv(f'./report/models_cv/data_train_{num}.csv')
    data_val = pd.read_csv(f'./report/models_cv/data_val_{num}.csv')
    X_train8 = data_train.drop(['well','phit_net_mean'], axis=1)
    y_train8 = data_train['phit_net_mean']
    X_val8 = data_val.drop(['well','phit_net_mean'], axis=1)
    y_val8 = data_val['phit_net_mean']

    def xtest_xval_prediction(X_test, X_val8, y_val8, X_train8, y_train8, num):
        model = joblib.load(f'./report/models_cv/tregr_{num}.pkl')

        y_pred_train = model.predict(X_train8)
        train_df = pd.DataFrame({'x': X_train8.iloc[:,0], 'y': X_train8.iloc[:,1],'phit_pred': y_pred_train, 'phit_true':y_train8})
        
        y_pred_val = model.predict(X_val8)
        val_df = pd.DataFrame({'x': X_val8.iloc[:,0], 'y': X_val8.iloc[:,1],'phit_pred': y_pred_val, 'phit_true':y_val8})

        y_pred = model.predict(X_test)
        model_df = pd.DataFrame({'x': X_test.iloc[:,0], 'y': X_test.iloc[:,1],'phit_pred': y_pred})
        return model_df, val_df, train_df
    model_df8, val_df8, train_df8 = xtest_xval_prediction(X_test8, X_val8, y_val8, X_train8, y_train8, f'{num}')
    model_df8['num'] = num
    map_phit_lst.append(model_df8)

    rng = 0.0115
    val_df8_v2 = val_df8.set_index(['x','y']).join(data_val[['well','x','y']].set_index(['x','y'])).reset_index()
    train_df8_v2 = train_df8.set_index(['x','y']).join(data_train[['well','x','y']].set_index(['x','y'])).reset_index()
    def validation_qc(val, train, rng):
        val['qc_up'] = val['phit_true']  + rng
        val['qc_down'] = val['phit_true'] - rng
        val['qc'] = 'norm'
        val.loc[val['phit_pred'] >= val['qc_up'], 'qc'] = 'bigger'
        val.loc[val['phit_pred'] <= val['qc_down'], 'qc'] = 'lower'
        val['rng'] = rng
        display(val.qc.value_counts(normalize=True))
        display(val.qc.value_counts())

        train['qc_up'] = train['phit_true']  + rng
        train['qc_down'] = train['phit_true'] - rng
        train['qc'] = 'norm'
        train.loc[train['phit_pred'] >= train['qc_up'], 'qc'] = 'bigger'
        train.loc[train['phit_pred'] <= train['qc_down'], 'qc'] = 'lower'
        train['rng'] = rng

        return val, train
    val_df8_v2, train_df8_v2 = validation_qc(val_df8_v2, train_df8_v2, rng)

    model = joblib.load(f'./report/models_cv/tregr_{num}.pkl')
    model8 = pd.DataFrame({'num': num, 'model': model}, index=[0])
    model_df_lst.append(model8)

    val_df8_v2['num'] = num
    val_df8_v2['type'] = 'val'
    train_df8_v2['num'] = num
    train_df8_v2['type'] = 'train'
    train_val_df8 = pd.concat([train_df8_v2, val_df8_v2])
    train_val_lst.append(train_val_df8)

    def display_xplot_map(val_df, model_df, rng):
        fig = plt.figure(figsize=(16, 4))
        gs = GridSpec(1, 3, figure=fig)
        ax1 = fig.add_subplot(gs[0, 0])
        sns.scatterplot(data=val_df, x='phit_true', y='phit_pred', c='#1190e6', s=30, alpha=0.5, ec='black', ax=ax1)
        sns.lineplot(x=[0.18, 0.28], y=[0.18, 0.28], c='r', ls='--', ax=ax1)
        sns.lineplot(x=[0.18, 0.28], y=[0.18+rng, 0.28+rng], c='g', ls='--', ax=ax1)
        sns.lineplot(x=[0.18, 0.28], y=[0.18-rng, 0.28-rng], c='g', ls='--', ax=ax1)
        for idx, txt in enumerate(val_df.well):
            ax1.annotate(txt, (val_df.phit_true.iloc[idx], val_df.phit_pred.iloc[idx]), fontsize=8)
        ax1.grid()
        ax2 = fig.add_subplot(gs[0, 1:])
        cb1 = ax2.scatter(model_df['x'], model_df['y'], c=model_df['phit_pred'], cmap='coolwarm', s=1)
        ax2.scatter(val_df.x, val_df.y, c='green', s=10)
        for idx, txt in enumerate(val_df.well):
            ax1.annotate(txt, (val_df.x.iloc[idx], val_df.y.iloc[idx]), fontsize=8)
        colorbar1 = plt.colorbar(cb1)
        plt.show()
    display_xplot_map(val_df8_v2, model_df8, rng)

map_phit = pd.concat(map_phit_lst).reset_index(drop=True)
map_phit.to_csv('./report/models_cv/map_phit.csv', index=False)

def averaging_phit_map(map_phit_lst):
    map_phit_v2 = pd.concat(map_phit_lst)
    map_phit_df_lst =[]
    for num in map_phit_v2.num.unique():
        data = map_phit_v2[map_phit_v2.num == num]
        data = data.rename(columns={'phit_pred':f'phit_pred_{num}',
                                    'x':f'x_{num}',
                                    'y':f'y_{num}',
                                    'num':f'num_{num}'})
        map_phit_df_lst.append(data)
    map_phit_v3 = pd.concat(map_phit_df_lst, axis=1)
    map_phit_v3 = map_phit_v3[['x_97', 'y_97', 'phit_pred_97', 'phit_pred_6','phit_pred_82', 'phit_pred_79', 'phit_pred_52', 'phit_pred_19']]
    map_phit_v3['phit_pred_mean'] = map_phit_v3.iloc[:,2:].mean(axis=1)
    map_phit_v3 = map_phit_v3.rename(columns={'x_97':'x', 'y_97':'y'})
    return map_phit_v3
map_phit_avg = averaging_phit_map(map_phit_lst)
map_phit_avg.to_csv('./report/models_cv/map_phit_avg.csv', index=False)
plt.scatter(map_phit_avg.x, map_phit_avg.y, c=map_phit_avg.phit_pred_mean, cmap='coolwarm', s=1)

model_df = pd.concat(model_df_lst).reset_index(drop=True)
train_val = pd.concat(train_val_lst).reset_index(drop=True)
train_val_v2 = train_val.set_index('num').join(model_df.set_index('num'), on='num', how='left').reset_index()
train_val_v2 = train_val_v2[['well', 'x', 'y', 'phit_pred', 'phit_true',  'qc_up', 'qc', 'qc_down', 'rng', 'model', 'num', 'type']]
train_val_v2.to_csv('./report/models_cv/train_val.csv', index=False)

## map_phit_avg vs v2

In [None]:
mdl_lst = [97, 6, 82, 79, 52, 19]
model_print = joblib.load(f'./report/models_cv/tregr_{mdl_lst[1]}.pkl')
model_print

In [None]:
map_phit_avg = pd.read_csv('./report/models_cv/map_phit_avg.csv')

def seism_well_correl_v2(seism_map, wells_df, margin, value, file):
    wells_df['xmean_min'] = wells_df['xmean'] - margin/2
    wells_df['xmean_max'] = wells_df['xmean'] + margin/2
    wells_df['ymean_min'] = wells_df['ymean'] - margin/2
    wells_df['ymean_max'] = wells_df['ymean'] + margin/2
    seism_map_short = seism_map[['x', 'y', value]]

    df_lst = []
    df_seism_map_zone_lst = []
    for idx, row in wells_df.iterrows():
        seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                            (seism_map_short['x'] < row['xmean_max']) & 
                                            (seism_map_short['y'] > row['ymean_min']) &
                                            (seism_map_short['y'] < row['ymean_max'])]
        
        mean = seism_map_zone[value].mean()
        p50 = seism_map_zone[value].quantile(0.5)
        p25 = seism_map_zone[value].quantile(0.25)
        p75 = seism_map_zone[value].quantile(0.75)
        df = pd.DataFrame({ 'well':row['well'], 
                            'phit_net_mean':row['phit_net_mean'], 
                            value+'_avg': mean, 
                            value+'_p50': p50, 
                            value+'_p25': p25, 
                            value+'_p75': p75,
                            'xmean':row['xmean'],
                            'ymean':row['ymean'],
                            'xmean_min':row['xmean_min'],
                            'xmean_max':row['xmean_max'],
                            'ymean_min':row['ymean_min'],
                            'ymean_max':row['ymean_max'],
                            'margin':margin,
                            'field':row['field'],
                            'seism_att':file}, index=[0])
        
        df_seism_map_zone = pd.DataFrame(seism_map_zone)
        df_seism_map_zone['well'] = row['well']
        df_lst.append(df)
        df_seism_map_zone_lst.append(df_seism_map_zone)

    result = pd.concat(df_lst).reset_index(drop=True)
    result['bottom_left_x'] = result['xmean'] - margin/2
    result['bottom_left_y'] = result['ymean'] - margin/2
    seism_map_zone = pd.concat(df_seism_map_zone_lst).reset_index(drop=True)
    seism_map_zone['seism_att'] = file
    return result, seism_map_zone
wells_df = df_attr_well_full[['well', 'phit_net_mean', 'xmean', 'ymean', 'margin', 'field']].drop_duplicates()
phit_wells_init, phit_square = seism_well_correl_v2(map_phit_avg, wells_df, 300, 'phit_pred_mean', 'phit_pred_mean')
phit_wells = phit_wells_init[['well',	'phit_net_mean', 'phit_pred_mean_avg']]
display(phit_wells.head(3))

rng = 0.0115*1
phit_wells['qc_up'] = phit_wells['phit_net_mean']  + rng
phit_wells['qc_down'] = phit_wells['phit_net_mean'] - rng
phit_wells['qc'] = 'norm'
phit_wells.loc[phit_wells['phit_pred_mean_avg'] >= phit_wells['qc_up'], 'qc'] = 'bigger'
phit_wells.loc[phit_wells['phit_pred_mean_avg'] <= phit_wells['qc_down'], 'qc'] = 'lower'
phit_wells['rng'] = rng
display(phit_wells.qc.value_counts(normalize=True).round(2))
display(phit_wells.qc.value_counts())
sns.scatterplot(data=phit_wells, x='phit_net_mean', y='phit_pred_mean_avg', hue='qc', s=30, alpha=0.75, ec='black')
sns.lineplot(x=[0.18, 0.28], y=[0.18, 0.28], c='r', ls='--')
sns.lineplot(x=[0.18, 0.28], y=[0.18+rng, 0.28+rng], c='g', ls='--')
sns.lineplot(x=[0.18, 0.28], y=[0.18-rng, 0.28-rng], c='g', ls='--')
plt.grid()
plt.title('map_phit_avg')

In [None]:
map_phit_avg_v2 = pd.read_csv('./report/models_cv/map_phit_avg_v2.csv')

def seism_well_correl_v2(seism_map, wells_df, margin, value, file):
    wells_df['xmean_min'] = wells_df['xmean'] - margin/2
    wells_df['xmean_max'] = wells_df['xmean'] + margin/2
    wells_df['ymean_min'] = wells_df['ymean'] - margin/2
    wells_df['ymean_max'] = wells_df['ymean'] + margin/2
    seism_map_short = seism_map[['x', 'y', value]]

    df_lst = []
    df_seism_map_zone_lst = []
    for idx, row in wells_df.iterrows():
        seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                            (seism_map_short['x'] < row['xmean_max']) & 
                                            (seism_map_short['y'] > row['ymean_min']) &
                                            (seism_map_short['y'] < row['ymean_max'])]
        
        mean = seism_map_zone[value].mean()
        p50 = seism_map_zone[value].quantile(0.5)
        p25 = seism_map_zone[value].quantile(0.25)
        p75 = seism_map_zone[value].quantile(0.75)
        df = pd.DataFrame({ 'well':row['well'], 
                            'phit_net_mean':row['phit_net_mean'], 
                            value+'_avg': mean, 
                            value+'_p50': p50, 
                            value+'_p25': p25, 
                            value+'_p75': p75,
                            'xmean':row['xmean'],
                            'ymean':row['ymean'],
                            'xmean_min':row['xmean_min'],
                            'xmean_max':row['xmean_max'],
                            'ymean_min':row['ymean_min'],
                            'ymean_max':row['ymean_max'],
                            'margin':margin,
                            'field':row['field'],
                            'seism_att':file}, index=[0])
        
        df_seism_map_zone = pd.DataFrame(seism_map_zone)
        df_seism_map_zone['well'] = row['well']
        df_lst.append(df)
        df_seism_map_zone_lst.append(df_seism_map_zone)

    result = pd.concat(df_lst).reset_index(drop=True)
    result['bottom_left_x'] = result['xmean'] - margin/2
    result['bottom_left_y'] = result['ymean'] - margin/2
    seism_map_zone = pd.concat(df_seism_map_zone_lst).reset_index(drop=True)
    seism_map_zone['seism_att'] = file
    return result, seism_map_zone
wells_df = df_attr_well_full[['well', 'phit_net_mean', 'xmean', 'ymean', 'margin', 'field']].drop_duplicates()
phit_wells_init_v2, phit_square_v2 = seism_well_correl_v2(map_phit_avg_v2, wells_df, 300, 'phit_pred_mean', 'phit_pred_mean')
phit_wells_v2 = phit_wells_init_v2[['well',	'phit_net_mean', 'phit_pred_mean_avg']]
display(phit_wells_v2.head(3))

rng = 0.0115*1.
phit_wells_v2['qc_up'] = phit_wells_v2['phit_net_mean']  + rng
phit_wells_v2['qc_down'] = phit_wells_v2['phit_net_mean'] - rng
phit_wells_v2['qc'] = 'norm'
phit_wells_v2.loc[phit_wells_v2['phit_pred_mean_avg'] >= phit_wells_v2['qc_up'], 'qc'] = 'bigger'
phit_wells_v2.loc[phit_wells_v2['phit_pred_mean_avg'] <= phit_wells_v2['qc_down'], 'qc'] = 'lower'
phit_wells_v2['rng'] = rng
display(phit_wells_v2.qc.value_counts(normalize=True).round(2))
display(phit_wells_v2.qc.value_counts())
sns.scatterplot(data=phit_wells_v2, x='phit_net_mean', y='phit_pred_mean_avg', hue='qc', s=30, alpha=0.75, ec='black')
sns.lineplot(x=[0.18, 0.28], y=[0.18, 0.28], c='r', ls='--')
sns.lineplot(x=[0.18, 0.28], y=[0.18+rng, 0.28+rng], c='g', ls='--')
sns.lineplot(x=[0.18, 0.28], y=[0.18-rng, 0.28-rng], c='g', ls='--')
plt.grid()
plt.title('map_phit_avg_v2')

## train vs val ds

In [None]:
def train_val_qcxplots(train_val_v2, rng, num):
    data_type = train_val_v2[train_val_v2.num==num]
    fig, ax = plt.subplots(1, 2, figsize=(10, 4))
    custom_pallete = {'norm':'#1190e6', 'bigger':'#ff7f0e', 'lower':'#2ca02c'}
    sns.scatterplot(data=data_type[data_type.type == 'train'], x='phit_true', y='phit_pred', hue='qc', s=30, alpha=0.75, ec='black', ax=ax[0], palette=custom_pallete)
    sns.lineplot(x=[0.15, 0.28], y=[0.15, 0.28], c='r', ls='--', ax=ax[0])
    sns.lineplot(x=[0.15, 0.28], y=[0.15+rng, 0.28+rng], c='g', ls='--', ax=ax[0])
    sns.lineplot(x=[0.15, 0.28], y=[0.15-rng, 0.28-rng], c='g', ls='--', ax=ax[0])
    sns.scatterplot(data=data_type[data_type.type == 'val'], x='phit_true', y='phit_pred', hue='qc', s=30, alpha=0.75, ec='black', ax=ax[1], palette=custom_pallete)
    sns.lineplot(x=[0.15, 0.28], y=[0.15, 0.28], c='r', ls='--', ax=ax[1])
    sns.lineplot(x=[0.15, 0.28], y=[0.15+rng, 0.28+rng], c='g', ls='--', ax=ax[1])
    sns.lineplot(x=[0.15, 0.28], y=[0.15-rng, 0.28-rng], c='g', ls='--', ax=ax[1])
    ax[0].set_xlim(0.16, 0.28)
    ax[0].set_ylim(0.16, 0.28)
    ax[1].set_xlim(0.16, 0.28)
    ax[1].set_ylim(0.16, 0.28)
    ax[0].grid()
    ax[1].grid()
    plt.suptitle(f'Model {num}')
    plt.show()

for num in [97, 6, 82, 79, 52, 19]:
    train_val_qcxplots(train_val_v2, rng, num)

In [None]:
def averaging_phit_map(map_phit_lst):
    map_phit_v2 = pd.concat(map_phit_lst)
    map_phit_df_lst =[]
    for num in map_phit_v2.num.unique():
        data = map_phit_v2[map_phit_v2.num == num]
        data = data.rename(columns={'phit_pred':f'phit_pred_{num}',
                                    'x':f'x_{num}',
                                    'y':f'y_{num}',
                                    'num':f'num_{num}'})
        map_phit_df_lst.append(data)
    map_phit_v3 = pd.concat(map_phit_df_lst, axis=1)
    map_phit_v3 = map_phit_v3[['x_97', 'y_97', 'phit_pred_97', 'phit_pred_6','phit_pred_79', 'phit_pred_52', 'phit_pred_19']]
    map_phit_v3['phit_pred_mean'] = map_phit_v3.iloc[:,2:].mean(axis=1)
    map_phit_v3 = map_phit_v3.rename(columns={'x_97':'x', 'y_97':'y'})
    return map_phit_v3
map_phit_avg_v2 = averaging_phit_map(map_phit_lst)
map_phit_avg_v2.to_csv('./report/models_cv/map_phit_avg_v2.csv', index=False)
plt.scatter(map_phit_avg_v2.x, map_phit_avg_v2.y, c=map_phit_avg_v2.phit_pred_mean, cmap='coolwarm', s=1)

# Perm calc base on phit_map

In [None]:
def perm_ds_us_calc(dataset, value):
    def perm_ds_1(x):
        return 10**(39139.46 * pow(x, 5) - 11140.04 * pow(x, 4) + 855.2176 * pow(x, 3) + 7.04505 * pow(x, 2) + 5.750233 * x - 1.997085)
    def perm_us_1(x):
        return 10**(1638286 * pow(x, 5) - 1396883 * pow(x, 4) + 468324.8 * pow(x, 3) - 76974.79 * pow(x, 2) + 6217.262 * x - 198.5042)
    def perm_ds_2(x):
        return 10**(+ 5675.143 * pow(x, 5) - 11106.91 * pow(x, 4) + 8608.366 * pow(x, 3) - 3318.893 * pow(x, 2) + 644.7713 * x - 48.16968)
    def perm_us_2(x):
        return 10**(+ 81.59968 * pow(x, 5) - 275.5442 * pow(x, 4) + 364.9522 * pow(x, 3) - 238.8838 * pow(x, 2) + 79.97139 * x - 7.15)
    
    def perm_20(x):
        return (7.7925*((x*100)**2))-(29881.0*x)+2891.8
    
    def perm_16_20(x):
        return 0.00000002*(np.exp(x*105.56))
    
    def perm_15_16(x):
        return 0.0159*(np.exp(x*21.27))
    
    df = dataset.copy()
    
    df.loc[(df[value] >= 0.13) & (df[value] <=0.2), 'perm_us'] = df[value].apply(perm_us_1)
    df.loc[(df[value] >= 0.2) & (df[value] <=1), 'perm_us'] = df[value].apply(perm_us_2)

    df.loc[(df[value] >= 0.2) & (df[value] <=1), 'perm'] = df[value].apply(perm_20)
    df.loc[(df[value] >= 0.16) & (df[value] <0.2), 'perm'] = df[value].apply(perm_16_20)
    df.loc[(df[value] >= 0.15) & (df[value] <0.16), 'perm'] = df[value].apply(perm_15_16)

    df.loc[(df[value] >= 0.13) & (df[value] <=0.2), 'perm_ds'] = df[value].apply(perm_ds_1)
    df.loc[(df[value] >= 0.2) & (df[value] <=1), 'perm_ds'] = df[value].apply(perm_ds_2)
    
    df['perm_ds'] = df['perm_ds'].fillna(0)
    df['perm_us'] = df['perm_us'].fillna(0)
    return df
htst_phit = df_bal8_v4_flag[df_bal8_v4_flag.net == 1].groupby('well')[['phit', 
                                                                       'xmean', 'ymean', 
                                                                       'net', 
                                                                       'khtst',
                                                                       'field']].agg({  'phit':'mean', 
                                                                                        'net': lambda x: x.sum()*0.1,
                                                                                        'khtst':'first',
                                                                                        'xmean':'first', 'ymean':'first',
                                                                                        'field':'first'}).reset_index()
htst_phit = perm_ds_us_calc(htst_phit, 'phit')
htst_phit = htst_phit.rename(columns={'perm':'perm_phit_avg', 'perm_ds':'perm_ds_phit_avg', 'perm_us':'perm_us_phit_avg'})
htst_phit

In [None]:
# if PHI >=0.20:
# PERM = (7.7925*((PHI*100)**2))-(29881.0*PHI)+2891.8

# elif PHI >= 0.16 and PHI < 0.20:
# PERM = 0.00000002*(exp(PHI*105.56))

# elif PHI >= 0.15 and PHI < 0.16:
# PERM = 0.0159*(exp(PHI*21.27))

In [None]:
map_phit_avg = pd.read_csv('./report/models_cv/map_phit_avg.csv')
perm_map = perm_ds_us_calc(map_phit_avg, 'phit_pred_mean')
fig = plt.figure(figsize=(16, 7))
cb = plt.scatter(perm_map.x, perm_map.y, c=perm_map.perm, cmap='coolwarm', s=1)
plt.colorbar(cb)
plt.show()

In [None]:
def seism_well_correl_v3(seism_map, wells_df, margin, value, file):
    wells_df['xmean_min'] = wells_df['xmean'] - margin/2
    wells_df['xmean_max'] = wells_df['xmean'] + margin/2
    wells_df['ymean_min'] = wells_df['ymean'] - margin/2
    wells_df['ymean_max'] = wells_df['ymean'] + margin/2
    seism_map_short = seism_map[['x', 'y', value]]

    df_lst = []
    df_seism_map_zone_lst = []
    for idx, row in wells_df.iterrows():
        seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                            (seism_map_short['x'] < row['xmean_max']) & 
                                            (seism_map_short['y'] > row['ymean_min']) &
                                            (seism_map_short['y'] < row['ymean_max'])]
        
        mean = seism_map_zone[value].mean()
        p50 = seism_map_zone[value].quantile(0.5)
        p25 = seism_map_zone[value].quantile(0.25)
        p75 = seism_map_zone[value].quantile(0.75)
        df = pd.DataFrame({ 'well':row['well'], 
                            'phit':row['phit'], 
                            value+'_avg': mean, 
                            value+'_p50': p50, 
                            value+'_p25': p25, 
                            value+'_p75': p75,
                            'xmean':row['xmean'],
                            'ymean':row['ymean'],
                            'xmean_min':row['xmean_min'],
                            'xmean_max':row['xmean_max'],
                            'ymean_min':row['ymean_min'],
                            'ymean_max':row['ymean_max'],
                            'margin':margin,
                            'field':row['field'],
                            'htst': row['net'],
                            'khtst': row['khtst'],
                            'perm_us_phit_avg': row['perm_us_phit_avg'],	
                            'perm_phit_avg': row['perm_phit_avg'],	
                            'perm_ds_phit_avg': row['perm_ds_phit_avg'],
                            'seism_att':file}, index=[0])
        
        df_seism_map_zone = pd.DataFrame(seism_map_zone)
        df_seism_map_zone['well'] = row['well']
        df_lst.append(df)
        df_seism_map_zone_lst.append(df_seism_map_zone)

    result = pd.concat(df_lst).reset_index(drop=True)
    result['bottom_left_x'] = result['xmean'] - margin/2
    result['bottom_left_y'] = result['ymean'] - margin/2
    seism_map_zone = pd.concat(df_seism_map_zone_lst).reset_index(drop=True)
    seism_map_zone['seism_att'] = file
    return result, seism_map_zone
perm_map_wells_init, perm_map_square = seism_well_correl_v3(perm_map, htst_phit, 300, 'perm', 'perm')
perm_map_wells = perm_map_wells_init[['well', 'phit', 'perm_us_phit_avg', 'perm_phit_avg', 'perm_ds_phit_avg', 'htst', 'perm_avg', 'khtst', ]]
perm_map_wells = perm_map_wells.rename(columns={'perm_avg':'perm_map_avg'})
perm_map_wells['khtst_map'] = perm_map_wells['perm_map_avg'] * perm_map_wells['htst']
perm_map_wells['khtst_log'] = perm_map_wells['perm_phit_avg'] * perm_map_wells['htst']
perm_map_wells

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(18, 5))
sns.scatterplot(data=perm_map_wells, x='khtst', y='khtst_log',  c='#32aa22', s=30, alpha=0.75, ec='black', ax=ax[0])
sns.scatterplot(data=perm_map_wells, x='khtst_log', y='khtst_map',  c ='#eea506' ,s=30, alpha=0.75, ec='black', ax=ax[1])
sns.lineplot(x=[0, 20000], y=[0, 20000], c='r', ls='--', ax=ax[1])
sns.scatterplot(data=perm_map_wells, x='khtst_map', y='khtst',  s=30, alpha=0.75, ec='black', ax=ax[2])
sns.lineplot(x=[0, 20000], y=[0, 20000], c='r', ls='--', ax=ax[2])
sns.lineplot(x=[0, 8000], y=[0, 30000], c='r', ls=':', ax=ax[2])
ax[2].grid()

In [None]:
#calculation distance between two dashed lines on 3d diagramn
def calculate_distance(x1, y1, x2, y2):
    return round(((x2 - x1)**2 + (y2 - y1)**2)**(1/2),0)

x1, y1 = 20000, 20000
x2, y2 = 30000, 8000
distance = calculate_distance(x1, y1, x2, y2)
distance

# New wells blind test

In [None]:
new_wells = pd.read_csv(r'C:\Petrel\techlog_data\k01k02d31c36x.csv')[1:].drop(['datasetName','TVDSS', 'BADPORLOG', 'FLANK', 'FLANK2', 
                                                                               'FLUIDCODE', 'Fluidcode', 'Fluidcode_mod', 'FLUIDCODE_PP',
                                                                               'Labelling', 'AREA'], axis=1)
new_wells = new_wells.rename(columns={'DEPT':'MD', 'wellName':'well'})
new_wells8 = new_wells[new_wells.FORMATION.str.contains('Balakhany VIII')].reset_index(drop=True)
new_wells8['FORMATION_up'] = 'Balakhany VIII'
new_wells8.loc[new_wells8.well.str.startswith('K'), 'field'] = 'ACE'
new_wells8.loc[new_wells8.well.str.startswith('D'), 'field'] = 'EAST AZERI'
new_wells8.loc[new_wells8.well.str.startswith('C'), 'field'] = 'WEST AZERI'
new_wells8 = new_wells8[new_wells8.TST != -9999.]
new_wells8 = new_wells8[new_wells8.MD != -9999.]
for col in  [   'MD', 'GR_N', 'GRMATRIX', 'GRSHALE',
                'LPERM', 'LPERM_DS_Bal', 'LPERM_US_Bal', 'NET', 'NPSS', 'PHIT', 'RDEEP',
                'RHOB', 'RHOF', 'RHOMA', 'TST', 'TVD_SCS', 'X', 'Y']:
    new_wells8[col] = pd.to_numeric(new_wells8[col], errors='coerce')
for col in  [ 'well', 'FORMATION', 'FORMATION_up', 'field']:
    new_wells8[col] = new_wells8[col].astype('string')

def interpolate_by_depth_fm_run_v3(df, step):
    df_tst = df[df.TST.notna()].round({'MD':1})
    
    def interpolate_by_depth_fm_v2(one_well, step):
        one_well = one_well.sort_values(by='TST')
        well_name = one_well["well"].iloc[0]
        formation = one_well["FORMATION"].iloc[0]
        formation_up = one_well["FORMATION_up"].iloc[0]
        field = one_well["field"].iloc[0]
        data_range = np.floor((one_well["TST"].max() - one_well["TST"].min())/step)
        starting_tst = one_well["TST"].iloc[0]
        new_TST_values = [starting_tst + i*0.1 for i in range(1,int(data_range))]
        col_lst = []
        for col in one_well.columns:
            if col not in ['well','FORMATION_up', 'FORMATION','field']:
                interp = interp1d(one_well['TST'], one_well[col], kind='linear', fill_value="extrapolate")
                new_data = {col: interp(new_TST_values)}
                new_df = pd.DataFrame(new_data)
                col_lst.append(new_df)
        new_df = pd.concat(col_lst, axis=1)
        new_df['well'] = well_name
        new_df['TST'] = new_TST_values
        new_df['FORMATION'] = formation
        new_df['FORMATION_up'] = formation_up
        new_df['field'] = field #Index(['FORMATION', 'FORMATION_up', 'field'], dtype='object')
        new_df = new_df[[   'well', 'MD', 'FORMATION', 'GR_N', 'GRMATRIX', 'GRSHALE', 'LPERM',
                            'LPERM_DS_Bal', 'LPERM_US_Bal', 'NET', 'NPSS', 'PHIT', 'RDEEP', 'RHOB',
                            'RHOF', 'RHOMA', 'TST', 'TVD_SCS', 'X', 'Y', 'FORMATION_up', 'field']]
        return new_df
    df_lst = []
    for well in tqdm(df_tst.well.unique()):
        well_data = df_tst[df_tst.well == well]
        well_data_interp = interpolate_by_depth_fm_v2(well_data, 0.1)
        df_lst.append(well_data_interp)
    df_interp = pd.concat(df_lst)
    df_interp = df_interp.round({'MD':1, 'TVD_SCS':1, 'TST':1})
    print('Start joining')
    def well_bal_interp_join(dataset):
        df_tst = df[(df.TST.notna()) & (df.FORMATION_up.notna())].round({'MD':1})
        data_fu = df_tst[['well','MD','FORMATION_up', 'FORMATION', 'field']]
        well_join = dataset.set_index(['well','MD']).join(data_fu.set_index(['well','MD']), rsuffix='_r').reset_index()
        well_join.insert(3, 'FORMATION_up', well_join.pop('FORMATION_up'))
        well_join.insert(4, 'FORMATION', well_join.pop('FORMATION'))
        # well_join.insert(5, 'tst_index', well_join.pop('tst_index'))
        return well_join
    well_interp_v2 = well_bal_interp_join(df_interp)
    # well_interp_v2.loc[well_interp_v2.NET_VSH > 0, 'NET_VSH'] = 1
    well_interp_v2.loc[well_interp_v2.NET > 0, 'NET'] = 1
    
    df_lst_2 = []
    for well in well_interp_v2.well.unique():
        field_data = well_interp_v2[well_interp_v2.well == well]
        field_data.field = field_data.field.fillna(method = 'ffill')
        field_data.field = field_data.field.fillna(method = 'bfill')
        field_data.FORMATION_up = field_data.FORMATION_up.fillna(method = 'ffill')
        field_data.FORMATION_up = field_data.FORMATION_up.fillna(method = 'bfill')
        field_data.FORMATION = field_data.FORMATION.fillna(method = 'ffill')
        field_data.FORMATION = field_data.FORMATION.fillna(method = 'bfill')
        df_lst_2.append(field_data)
    well_interp_v3 = pd.concat(df_lst_2)

    return well_interp_v3
new_wells8_v2 = interpolate_by_depth_fm_run_v3(new_wells8, 0.1)

In [None]:
new_wells8_v2.columns = new_wells8_v2.columns.str.lower()
new_wells_phit = new_wells8_v2[new_wells8_v2.net==1].groupby('well')[['phit', 'x', 'y', 'field']].agg(
    {'phit':'mean', 'x':'mean', 'y':'mean', 'field':'first'}).round({'x':0, 'y':0}).rename(
        columns={'phit':'phit_net_mean', 'x':'xmean','y':'ymean'}).reset_index()
new_wells_phit

In [None]:
fig = plt.figure(figsize=(16, 7))
map_phit_avg = pd.read_csv('./report/models_cv/map_phit_avg.csv')
cb = plt.scatter(map_phit_avg.x, map_phit_avg.y, c=map_phit_avg.phit_pred_mean, cmap='coolwarm', s=1)
plt.colorbar(cb)
plt.scatter(new_wells_phit.xmean, new_wells_phit.ymean, c='black', s=10)
for idx, txt in enumerate(new_wells_phit.well):
    plt.annotate(txt, (new_wells_phit.xmean.iloc[idx], new_wells_phit.ymean.iloc[idx]), fontsize=8)

In [None]:
def seism_well_correl_new_wells(seism_map, wells_df, margin, value, file):
    wells_df['xmean_min'] = wells_df['xmean'] - margin/2
    wells_df['xmean_max'] = wells_df['xmean'] + margin/2
    wells_df['ymean_min'] = wells_df['ymean'] - margin/2
    wells_df['ymean_max'] = wells_df['ymean'] + margin/2
    seism_map_short = seism_map[['x', 'y', value]]

    df_lst = []
    df_seism_map_zone_lst = []
    for idx, row in wells_df.iterrows():
        seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                            (seism_map_short['x'] < row['xmean_max']) & 
                                            (seism_map_short['y'] > row['ymean_min']) &
                                            (seism_map_short['y'] < row['ymean_max'])]
        
        mean = seism_map_zone[value].mean()
        p50 = seism_map_zone[value].quantile(0.5)
        p10 = seism_map_zone[value].quantile(0.1)
        p90 = seism_map_zone[value].quantile(0.9)
        df = pd.DataFrame({ 'well':row['well'], 
                            'phit_net_mean':row['phit_net_mean'], 
                            value+'_avg': mean, 
                            value+'_p50': p50, 
                            value+'_p10': p10, 
                            value+'_p90': p90,
                            'xmean':row['xmean'],
                            'ymean':row['ymean'],
                            'xmean_min':row['xmean_min'],
                            'xmean_max':row['xmean_max'],
                            'ymean_min':row['ymean_min'],
                            'ymean_max':row['ymean_max'],
                            'margin':margin,
                            'field':row['field'],
                            'seism_att':file}, index=[0])
        
        df_seism_map_zone = pd.DataFrame(seism_map_zone)
        df_seism_map_zone['well'] = row['well']
        df_lst.append(df)
        df_seism_map_zone_lst.append(df_seism_map_zone)

    result = pd.concat(df_lst).reset_index(drop=True)
    result['bottom_left_x'] = result['xmean'] - margin/2
    result['bottom_left_y'] = result['ymean'] - margin/2
    seism_map_zone = pd.concat(df_seism_map_zone_lst).reset_index(drop=True)
    seism_map_zone['seism_att'] = file
    return result, seism_map_zone
new_wells_phit, new_wells_square = seism_well_correl_new_wells(map_phit_avg, new_wells_phit, 300, 'phit_pred_mean', 'new_wells_phit')
new_wells_phit

In [None]:
rng = 0.0115*1
sns.scatterplot(data=new_wells_phit, x='phit_net_mean', y='phit_pred_mean_avg', 
                s=30, alpha=0.5, ec='black', marker='o')
sns.scatterplot(data=new_wells_phit, x='phit_net_mean', y='phit_pred_mean_p10', 
                s=30, alpha=0.5, ec='black', marker='v')
sns.scatterplot(data=new_wells_phit, x='phit_net_mean', y='phit_pred_mean_p90', 
                s=30, alpha=0.5, ec='black', marker='^')
sns.lineplot(x=[0.18, 0.26], y=[0.18, 0.26], c='r', ls='--')
sns.lineplot(x=[0.18, 0.26], y=[0.18+rng, 0.26+rng], c='g', ls='--')
sns.lineplot(x=[0.18, 0.26], y=[0.18-rng, 0.26-rng], c='g', ls='--')
plt.xlim(0.18, 0.26)
plt.ylim(0.18, 0.26)
plt.grid()
for idx, txt in enumerate(new_wells_phit.well):
    plt.annotate(txt, (new_wells_phit.phit_net_mean.iloc[idx], new_wells_phit.phit_pred_mean_avg.iloc[idx]), fontsize=8)

# Density_Bal8 uploading

In [None]:
path = 'attr/Den8/'
def list_files_by_mask(directory, mask, exclude_mask=None):
    # Construct the full pattern
    pattern = os.path.join(directory, mask)   
    # Use glob to get the list of files
    files = glob.glob(pattern)
    # Filter out files that match the exclude_mask
    if exclude_mask:
        exclude_pattern = os.path.join(directory, exclude_mask)
        exclude_files = glob.glob(exclude_pattern)
        files = [file for file in files if file not in exclude_files]
    # Extract the relative path of each file
    relative_paths = [os.path.relpath(file, directory) for file in files]
    return relative_paths
files = list_files_by_mask(path, '*', '*.xml')
full_files = list_files_by_mask(path, '* 0-80', '*.xml')
full_files, files

In [None]:
margin = 300
file = full_files[1]
def seism_upload(file, delimiter):
    seismic = pd.read_csv(file, delimiter=delimiter, skiprows=20, names=['x', 'y', 'value', 'column', 'row'])
    seismic = seismic.round({'x':0, 'y':0})
    return seismic
seismic_map = seism_upload(path + file, ' ')
seismic_map['path'] = path
seismic_map['attr'] = file
print(f"seismic map <{path + file}> is uploaded")

def intersection_maps(map, wells_df, buffer):
    geometry_map = [Point(xy) for xy in zip(map['x'], map['y'])]
    gdf_map = gpd.GeoDataFrame(map, geometry=geometry_map)

    geometry_points = [Point(xy) for xy in zip(wells_df['xmean'], wells_df['ymean'])]
    gdf_points = gpd.GeoDataFrame(wells_df, geometry=geometry_points)
    convex_hull = gdf_points.unary_union.convex_hull.buffer(buffer)
    intersection = gdf_map[gdf_map.intersects(convex_hull)]
    return intersection
seismic_map_intersect = intersection_maps(seismic_map, df_bal8_v4_flag, 1500)
print(f'seismic map <{path + file}> is intersected with wells')

def seism_well_correl(seism_map, wells_df, margin, file):
    wells_df['xmean_min'] = wells_df['xmean'] - margin/2
    wells_df['xmean_max'] = wells_df['xmean'] + margin/2
    wells_df['ymean_min'] = wells_df['ymean'] - margin/2
    wells_df['ymean_max'] = wells_df['ymean'] + margin/2
    seism_map_short = seism_map[['x', 'y', 'value']]

    df_lst = []
    df_seism_map_zone_lst = []
    for idx, row in wells_df.iterrows():
        seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                            (seism_map_short['x'] < row['xmean_max']) & 
                                            (seism_map_short['y'] > row['ymean_min']) &
                                            (seism_map_short['y'] < row['ymean_max'])]
        seism_map_zone['well'] = row['well']
        mean = seism_map_zone.value.mean()
        p50 = seism_map_zone.value.quantile(0.5)
        p25 = seism_map_zone.value.quantile(0.25)
        p75 = seism_map_zone.value.quantile(0.75)
        df = pd.DataFrame({'well':row['well'], 
                            'phit_net_mean':row['phit_net_mean'], 
                            'mean': mean, 
                            'p50': p50, 
                            'p25': p25, 
                            'p75': p75,
                            'xmean':row['xmean'],
                            'ymean':row['ymean'],
                            'xmean_min':row['xmean_min'],
                            'xmean_max':row['xmean_max'],
                            'ymean_min':row['ymean_min'],
                            'ymean_max':row['ymean_max'],
                            'margin':margin,
                            'seism_att':file}, index=[0])
        df_seism_map_zone = pd.DataFrame(seism_map_zone)

        df_lst.append(df)
        df_seism_map_zone_lst.append(df_seism_map_zone)

    result = pd.concat(df_lst).reset_index(drop=True)
    result['bottom_left_x'] = result['xmean'] - margin/2
    result['bottom_left_y'] = result['ymean'] - margin/2
    seism_map_zone = pd.concat(df_seism_map_zone_lst).reset_index(drop=True)
    
    return result, seism_map_zone
seismic_wells, seism_map_zone = seism_well_correl(seismic_map_intersect, xy8, margin, file)
seismic_map_intersect.loc[seismic_map_intersect.value <= 1.65, 'value'] = np.nan
print(f"seismic map <{path + file}> to wells dataset is done")

fig, ax = plt.subplots(figsize=(15, 7))
cb1 = plt.scatter(seismic_map_intersect['x'], seismic_map_intersect['y'], c=seismic_map_intersect['value'], cmap='coolwarm', s=1)
plt.colorbar(cb1)
plt.scatter(xy8['xmean'], xy8['ymean'], c='black', s=1)
for idx, txt in enumerate(xy8['well']):
    ax.annotate(txt, (xy8['xmean'].iloc[idx], xy8['ymean'].iloc[idx]), fontsize=8)
for idx, row in seismic_wells.iterrows():
    rect = plt.Rectangle((row['bottom_left_x'], row['bottom_left_y']), row['margin'], row['margin'], linewidth=1, edgecolor='b', facecolor='none')
    ax.add_patch(rect)
plt.title(path + file);

## Analize of rhob outliers

In [None]:
cutoff = 0.05
seismic_wells = seismic_wells.rename(columns={'mean':'rhob_avg_map', 'p50':'rhob_p50_map', 'p25':'rhob_p25_map', 'p75':'rhob_p75_map'})
rhob_mean = df_bal8_v4_flag.groupby('well')[['rhob', 'field']].agg({'rhob':'mean', 'field':'first'}).reset_index()
rhob_mean = rhob_mean.rename(columns={'rhob':'rhob_avg_well'})
seismic_wells_v2 = seismic_wells.set_index('well').join(rhob_mean.set_index('well')).reset_index()
seismic_wells_v2['up'] = seismic_wells_v2['rhob_avg_well'] + cutoff
seismic_wells_v2['down'] = seismic_wells_v2['rhob_avg_well'] - cutoff
seismic_wells_v2['qc_up'] = np.where((seismic_wells_v2['rhob_avg_map'] >= seismic_wells_v2['up']), 'bigger', 'norm')
seismic_wells_v2['qc_down'] = np.where((seismic_wells_v2['rhob_avg_map'] <= seismic_wells_v2['down']), 'lower', 'norm')
seismic_wells_v2['qc'] = np.where((seismic_wells_v2['qc_up'] == 'norm') & (seismic_wells_v2['qc_down'] == 'norm'), 'yes', 'no')
print('up',seismic_wells_v2.qc_up.value_counts(),
      '\ndown',seismic_wells_v2.qc_down.value_counts(),
      '\ntotal qc', seismic_wells_v2.qc.value_counts(normalize=True))

In [None]:
fig, ax = plt.subplots(figsize=(7, 5))
sns.scatterplot(data=seismic_wells_v2, x='rhob_avg_well', y='rhob_avg_map', hue='field')
sns.lineplot(x = [1.65, 2.65], y = [1.65, 2.65], color='red', ls='--')
sns.lineplot(x = [1.65, 2.65], y = [1.65+cutoff, 2.65+cutoff], color='green', ls='--')
sns.lineplot(x = [1.65, 2.65], y = [1.65-cutoff, 2.65-cutoff], color='green', ls='--')
down = seismic_wells_v2[seismic_wells_v2.qc_down==0]
up = seismic_wells_v2[seismic_wells_v2.qc_up==0]
for idx, txt in enumerate(down['well']):
    plt.annotate(txt, (down['rhob_avg_well'].iloc[idx], down['rhob_avg_map'].iloc[idx]), fontsize=6, c='r')
for idx, txt in enumerate(up['well']):
    plt.annotate(txt, (up['rhob_avg_well'].iloc[idx], up['rhob_avg_map'].iloc[idx]), fontsize=6, c='b')

In [None]:
sns.scatterplot(data=seismic_wells_v2, x='xmean', y='ymean', c='green', label='norm')
sns.scatterplot(data=seismic_wells_v2[seismic_wells_v2.qc_up == 'bigger'], x='xmean', y='ymean', c='b', label = 'bigger')
sns.scatterplot(data=seismic_wells_v2[seismic_wells_v2.qc_down == 'lower'], x='xmean', y='ymean', c='r', label = 'lower')
plt.legend()

## Density_Bal8 layers

In [None]:
for file in files:
    if ' 0-80' not in file and 'Mean amplitude' in file:
        def seism_upload(file, delimiter):
            seismic = pd.read_csv(file, delimiter=delimiter, skiprows=20, names=['x', 'y', 'value', 'column', 'row'])
            seismic = seismic.round({'x':0, 'y':0})
            return seismic
        seismic_map = seism_upload(path + file, ' ')
        seismic_map['path'] = path
        seismic_map['attr'] = file
        print(f"seismic map <{path + file}> is uploaded")
        
        def intersection_maps(map, wells_df, buffer):
            geometry_map = [Point(xy) for xy in zip(map['x'], map['y'])]
            gdf_map = gpd.GeoDataFrame(map, geometry=geometry_map)

            geometry_points = [Point(xy) for xy in zip(wells_df['xmean'], wells_df['ymean'])]
            gdf_points = gpd.GeoDataFrame(wells_df, geometry=geometry_points)
            convex_hull = gdf_points.unary_union.convex_hull.buffer(buffer)
            intersection = gdf_map[gdf_map.intersects(convex_hull)]
            return intersection
        seismic_map_intersect = intersection_maps(seismic_map, df_bal8_v4_flag, 1500)
        print(f'seismic map <{path + file}> is intersected with wells')

        def seism_well_correl(seism_map, wells_df, margin, file):
            wells_df['xmean_min'] = wells_df['xmean'] - margin/2
            wells_df['xmean_max'] = wells_df['xmean'] + margin/2
            wells_df['ymean_min'] = wells_df['ymean'] - margin/2
            wells_df['ymean_max'] = wells_df['ymean'] + margin/2
            seism_map_short = seism_map[['x', 'y', 'value']]

            df_lst = []
            df_seism_map_zone_lst = []
            for idx, row in wells_df.iterrows():
                seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                                    (seism_map_short['x'] < row['xmean_max']) & 
                                                    (seism_map_short['y'] > row['ymean_min']) &
                                                    (seism_map_short['y'] < row['ymean_max'])]
                seism_map_zone['well'] = row['well']
                mean = seism_map_zone.value.mean()
                p50 = seism_map_zone.value.quantile(0.5)
                p25 = seism_map_zone.value.quantile(0.25)
                p75 = seism_map_zone.value.quantile(0.75)
                df = pd.DataFrame({'well':row['well'], 
                                    'phit_net_mean':row['phit_net_mean'], 
                                    'mean': mean, 
                                    'p50': p50, 
                                    'p25': p25, 
                                    'p75': p75,
                                    'xmean':row['xmean'],
                                    'ymean':row['ymean'],
                                    'xmean_min':row['xmean_min'],
                                    'xmean_max':row['xmean_max'],
                                    'ymean_min':row['ymean_min'],
                                    'ymean_max':row['ymean_max'],
                                    'margin':margin,
                                    'seism_att':file}, index=[0])
                df_seism_map_zone = pd.DataFrame(seism_map_zone)

                df_lst.append(df)
                df_seism_map_zone_lst.append(df_seism_map_zone)

            result = pd.concat(df_lst).reset_index(drop=True)
            result['bottom_left_x'] = result['xmean'] - margin/2
            result['bottom_left_y'] = result['ymean'] - margin/2
            seism_map_zone = pd.concat(df_seism_map_zone_lst).reset_index(drop=True)
            
            return result, seism_map_zone
        seismic_wells, seism_map_zone = seism_well_correl(seismic_map_intersect, xy8, margin, file)
        seismic_map_intersect.loc[seismic_map_intersect.value <= 1.65, 'value'] = np.nan
        print(f"seismic map <{path + file}> to wells dataset is done")

        fig, ax = plt.subplots(figsize=(15, 7))
        cb1 = plt.scatter(seismic_map_intersect['x'], seismic_map_intersect['y'], c=seismic_map_intersect['value'], cmap='coolwarm', s=1)
        plt.colorbar(cb1)
        plt.scatter(xy8['xmean'], xy8['ymean'], c='black', s=1)
        for idx, txt in enumerate(xy8['well']):
            ax.annotate(txt, (xy8['xmean'].iloc[idx], xy8['ymean'].iloc[idx]), fontsize=8)
        for idx, row in seismic_wells.iterrows():
            rect = plt.Rectangle((row['bottom_left_x'], row['bottom_left_y']), row['margin'], row['margin'], linewidth=1, edgecolor='b', facecolor='none')
            ax.add_patch(rect)
        plt.title(path + file);

# Density cube

In [None]:
dens8 = segyio.open(r"C:\Petrel\seismic_data_wv\dens_cube\DEN_cube_comb_cut_trend_comp.sgy")
dens8.ilines, dens8.xlines, dens8.samples, len(dens8.ilines), len(dens8.xlines), len(dens8.samples)
# df_dens_cube = segy_header_scrape(r"C:\Petrel\seismic_data_wv\dens_cube\DEN_cube_comb_cut_trend_comp.sgy", chunk=10000)
# df_dens_cube.to_parquet(r"C:\Petrel\seismic_data_wv\dens_cube\dens_cube8.gzip")

In [109]:
df_dens_cube = pd.read_parquet(r"C:\Petrel\seismic_data_wv\dens_cube\dens_cube8.gzip")
df_xy_lines = df_dens_cube[['CDP_X', 'CDP_Y', 'INLINE_3D', 'CROSSLINE_3D']]
df_xy_lines.columns = df_xy_lines.columns.str.lower()
df_xy_lines = df_xy_lines.rename(columns={'cdp_x':'x', 'cdp_y':'y', 'inline_3d':'iline', 'crossline_3d':'xline'})
df_xy_lines['x'] = df_xy_lines['x']/100
df_xy_lines['y'] = df_xy_lines['y']/100
df_xy_lines = df_xy_lines.round({'x':0, 'y':0})

In [None]:
b02 = df_bal8_v4_flag[df_bal8_v4_flag['well'] == 'B02']

df_xy_lines_v2 = df_xy_lines[((df_xy_lines.iline >=3900) & (df_xy_lines.iline <= 6900)) & ((df_xy_lines.iline >= 3900) & (df_xy_lines.iline <= 6900))]
# df_xy_lines_v2 = df_xy_lines
margin = 100
well_xmax = b02['xmean']+margin
well_xmin = b02['xmean']-margin
well_ymax = b02['ymean']+margin
well_ymin = b02['ymean']-margin

df_xy_lines_v3 = df_xy_lines_v2[((df_xy_lines_v2['x'] > well_xmin.values[0]) & (df_xy_lines_v2['x'] < well_xmax.values[0])) & 
                                ((df_xy_lines_v2['y'] > well_ymin.values[0]) & (df_xy_lines_v2['y'] < well_ymax.values[0]))]
in_max, xn_max, in_min, xn_min = df_xy_lines_v3.iline.max(), df_xy_lines_v3.xline.max(), df_xy_lines_v3.iline.min(), df_xy_lines_v3.xline.min()

tvd_max = round(b02['tvd_scs'].max(),0)
tvd_min = round(b02['tvd_scs'].min(),0)
tvd_max, tvd_min

tvd_max_idx = np.where(dens8.samples.round(0) == tvd_max)[0][0]
tvd_min_idx = np.where(dens8.samples.round(0) == tvd_min)[0][0]
print(in_max, xn_max, in_min, xn_min, tvd_min_idx,  tvd_max_idx)

## lexcube

In [None]:
iline_min = in_min
iline_max = in_max
xline_min = xn_min 
xline_max = xn_max
sample_min = tvd_min_idx-350
sample_max = tvd_max_idx

offset_val = 0

ils = np.arange(iline_min - offset_val, iline_max + offset_val)
# xls = np.arange(xline_min - offset_val, xline_max + offset_val)
xls = np.arange(xline_min - offset_val, xline_max + offset_val) - dens8.xlines.min()
samples_dim = dens8.samples[sample_min:sample_max] # ignore out of bal8 values (manual adjust)

three_d_array = np.empty((len(ils), len(xls), len(samples_dim)))
# three_d_array = np.empty((ils, len(xls), len(samples_dim)))

for il_idx, il in enumerate(ils):
    for xl_idx, xl in enumerate(xls):
        rhob = dens8.iline[il][xl][sample_min:sample_max] # ignore out of bal8 values (manual adjust)
#         gr_adj = (gr - 43.948) / (69.331 - 43.948) * (70.384 - 36.241) + 36.241
#         gr_adj = np.where(gr_adj < 0, 0, gr_adj)
#         igr = (gr_adj - 33) / (86-33)
#         vsh = 1.7 - (3.38-(igr+0.7)**2)**0.5
        # rhob = np.where(gr == -1, -9999, gr)
        # rhov = np.where(gr > 0.2, 1, gr)
#         vsh = np.where(vsh < 0, 0, vsh)
        three_d_array[il_idx, xl_idx, :] = rhob

data_array = xr.DataArray(three_d_array, coords={
    'iline': ils,
    'xline': np.arange(xline_min - offset_val, xline_max + offset_val),
    'z':samples_dim,
})

data_array = data_array.transpose('xline','z','iline')

w = lexcube.Cube3DWidget(data_array,cmap="viridis_r", vmin=1.95, vmax=2.95)
w

## Nadir algo

In [77]:
# SEGY file
v = segyio.open(r"C:\Petrel\seismic_data_wv\dens_cube\DEN_cube_comb_cut_trend_comp.sgy")

# SEGY coordinates
df_cube = pd.read_parquet(r"C:\Petrel\seismic_data_wv\dens_cube\dens_cube8.gzip")

df_cube = df_cube[[
    'CDP_X', 
    'CDP_Y', 
    'INLINE_3D', 
    'CROSSLINE_3D'
]]

df_cube['CDP_X'] /= 100
df_cube['CDP_Y'] /= 100

In [None]:
path = "./support/well_traj"

def list_files_by_mask(directory, mask, exclude_mask=None):
    # Construct the full pattern
    pattern = os.path.join(directory, mask)
    
    # Use glob to get the list of files
    files = glob.glob(pattern)

    # Filter out files that match the exclude_mask
    if exclude_mask:
        exclude_pattern = os.path.join(directory, exclude_mask)
        exclude_files = glob.glob(exclude_pattern)
        files = [file for file in files if file not in exclude_files]
    
    # Extract the relative path of each file
    relative_paths = [os.path.relpath(file, directory) for file in files]
    
    return relative_paths
files = list_files_by_mask(path, '*.las', '')

df_lst = []
for well_name in tqdm(files):
    las = lasio.read(path + '/' +  well_name)
    well_log = las.df().reset_index()[['X','Y','TVD_SCS','DEPTH']]
    df = well_log.dropna()
    df['WELL'] = well_name.split('.')[0]
    df_lst.append(df)

well_traj = pd.concat(df_lst).reset_index(drop=True)
well_traj_v2 = well_traj[well_traj.WELL.isin(df_bal8_v4_flag.well.unique())]

In [98]:
well_traj_v2.WELL.unique()
df=well_traj[well_traj['WELL']=='C18']

In [99]:
# Z interpolation
new_TVD_SCS = np.arange(1380, df['TVD_SCS'].max() + 1)

cs_x = CubicSpline(df['TVD_SCS'], df['X'])
cs_y = CubicSpline(df['TVD_SCS'], df['Y'])
cs_depth = CubicSpline(df['TVD_SCS'], df['DEPTH'])

new_x = cs_x(new_TVD_SCS)
new_y = cs_y(new_TVD_SCS)
new_depth = cs_depth(new_TVD_SCS)

interpolated_df = pd.DataFrame({'TVD_SCS': new_TVD_SCS, 'X': new_x, 'Y': new_y, 'DEPTH':new_depth})

# Cleaning for extraction
interpolated_df = interpolated_df[interpolated_df['TVD_SCS'] >= 1380][interpolated_df['TVD_SCS'] <= 4250]

interpolated_df.rename(columns={
    'X':'CDP_X',
    'Y':'CDP_Y'
}, inplace=True)

# Closest 4 points
tree = cKDTree(df_cube[['CDP_X', 'CDP_Y']])

k = 4
distances, indices = tree.query(interpolated_df[['CDP_X', 'CDP_Y']], k=k)

closest_points = pd.concat([df_cube.iloc[indices[:, i]].reset_index(drop=True) for i in range(k)], axis=1)
distances_df = pd.DataFrame(distances, columns=[f'distance_{i+1}' for i in range(k)])

closest_points.columns = [f'{col}_{i+1}' for i in range(k) for col in df_cube.columns]

result = pd.concat([interpolated_df.reset_index(drop=True), closest_points, distances_df], axis=1)

# GR extract
def GR_extract(iline, xline, sample):
    iline = iline
    xline = xline - v.xlines.min()
    sample = sample - int(v.samples.min())

    return v.iline[iline][xline][sample]

def FourPointAvg(row):
    nom = (row['RHOB1'] / row['distance_1']) + (row['RHOB2'] / row['distance_2']) + (row['RHOB3'] / row['distance_3']) + (row['RHOB4'] / row['distance_4'])
    de = (1 / row['distance_1']) + (1 / row['distance_2']) + (1 / row['distance_3']) + (1 / row['distance_4'])

    return nom / de

for i in range(4):
    result[f'RHOB{i+1}'] = result.apply(lambda row: GR_extract(row[f'INLINE_3D_{i+1}'].astype(int), 
                                                               row[f'CROSSLINE_3D_{i+1}'].astype(int), 
                                                               row['TVD_SCS'].astype(int)), axis=1)

# GR 4 point average
result['RHOB_avg'] = result.apply(lambda row: FourPointAvg(row), axis=1)

result_mod = result[[
    'DEPTH',
    'TVD_SCS',
    'RHOB_avg'
]]

# MD interpolation
new_md = np.arange(round(result_mod['DEPTH'].min()), result_mod['DEPTH'].max() + 0.1, 0.1)

cs_gravg = CubicSpline(result_mod['DEPTH'], result_mod['RHOB_avg'])
cs_tvd = CubicSpline(result_mod['DEPTH'], result_mod['TVD_SCS'])

new_gravg = cs_gravg(new_md)
new_tvd = cs_tvd(new_md)

res_interp = pd.DataFrame({
    'DEPTH':new_md, 
    'TVD_SCS': new_tvd, 
    'RHOB_avg':new_gravg
})

In [100]:
logs = df_bal8_v4_flag[df_bal8_v4_flag.well == 'C18']

In [None]:
res_interp

In [None]:
fig, ax = plt.subplots(figsize=(3, 7))
ax.plot(res_interp['RHOB_avg'], res_interp['TVD_SCS'], label='cube')
ax.plot(logs['rhob'], logs['tvd_scs'], label='log')
ax.invert_yaxis()
ax.set_xlim(1.95, 2.95)
# ax.set_ylim(2450, 1900)
ax.legend()