# Libs

In [2]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.model_selection import train_test_split

import glob
import os
import tpot
import joblib
import ast

import geopandas as gpd
from shapely.geometry import Point
from scipy.interpolate import interp1d

pd.options.display.precision = 3
pd.options.display.float_format = lambda x: '%.5f' % x
pd.options.display.max_columns = 15
pd.options.display.max_rows = 6

# Data upload

In [4]:
# net = df_bal8_v4_flag[df_bal8_v4_flag.net == 1].groupby('well')['tst'].count().reset_index()
# total = df_bal8_v4_flag.groupby('well')['tst'].count().reset_index()
# final = net.merge(total, left_index=True, right_index=True, suffixes=('_net', '_total'))
# final['ntg'] = final['tst_net'] / final['tst_total']
# final = final.drop(['tst_net', 'tst_total', 'well_total'], axis=1).rename(columns={'well_net': 'well'})
# final['fm'] = 'bal8'
# final.to_csv('ntg_bal8.csv', index=False)

In [3]:
df_bal8_v4 = pd.read_csv('C:\jupyter\SPP\inputoutput\general_logs\df_bal8_azr_v4.csv')
df_bal8_v4.columns = df_bal8_v4.columns.str.lower()
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII sand','formation'] = '1_bal8_sand'
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII 25','formation'] = '2_bal8_25'
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII 20','formation'] = '3_bal8_20'
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII 15','formation'] = '4_bal8_15'
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII 10','formation'] = '5_bal8_10'
df_bal8_v4.loc[df_bal8_v4.formation=='Balakhany VIII 5','formation'] = '6_bal8_5'
well_phit_flag8 = df_bal8_v4[df_bal8_v4.phit_flag==1].groupby('well')['phit_flag'].apply(lambda x: x.iloc[0]).reset_index().well.unique()
df_bal8_v4_flag = df_bal8_v4[df_bal8_v4.well.isin(well_phit_flag8)]

ntd_top_phi_bot8_bp_v4 = pd.read_csv(r'C:\jupyter\SPP\inputoutput\layers\ntd_top_phi_bot8_bp_v4.csv').drop('Unnamed: 0', axis=1)
ntd_top_phi_bot8_bp_v4.columns = ntd_top_phi_bot8_bp_v4.columns.str.lower()

df_bal10_v4 = pd.read_csv('C:\jupyter\SPP\inputoutput\general_logs\df_bal10_vshclp2_v4.csv')
df_bal10_v4.columns = df_bal10_v4.columns.str.lower()
df_bal10_v4.loc[df_bal10_v4.formation=='Balakhany X sand','formation'] = '1_bal10_sand'
df_bal10_v4.loc[df_bal10_v4.formation=='Balakhany X 50','formation'] = '2_bal10_40'
df_bal10_v4.loc[df_bal10_v4.formation=='Balakhany X 40','formation'] = '2_bal10_40'
df_bal10_v4.loc[df_bal10_v4.formation=='Balakhany X 20','formation'] = '3_bal10_20'
well_phit_flag10 = df_bal10_v4[df_bal10_v4.phit_flag==1].groupby('well')['phit_flag'].apply(lambda x: x.iloc[0]).reset_index().well.unique()
df_bal10_v4_flag = df_bal10_v4[df_bal10_v4.well.isin(well_phit_flag10)]

In [4]:
xy8 = df_bal8_v4_flag.groupby('well')[['xmean','ymean','field']].first().reset_index()
xy8 = xy8.round({'xmean':0, 'ymean':0})
phit8 = df_bal8_v4_flag[df_bal8_v4_flag.net == 1].groupby('well')['phit'].mean().reset_index()
xy8_phit = pd.merge(xy8, phit8, left_on='well', right_on='well', how='left')
xy8_phit = xy8_phit.rename(columns={'phit':'phit_net_mean'})

xy10 = df_bal10_v4_flag.groupby('well')[['xmean','ymean','field']].first().reset_index()
xy10 = xy10.round({'xmean':0, 'ymean':0})
phit10 = df_bal10_v4_flag[df_bal10_v4_flag.net == 1].groupby('well')['phit'].mean().reset_index()
xy10_phit = pd.merge(xy10, phit10, left_on='well', right_on='well', how='left')
xy10_phit = xy10_phit.rename(columns={'phit':'phit_net_mean'})

# Seism data analize

In [None]:
def list_files_by_mask(directory, mask):
    # Construct the full pattern
    pattern = os.path.join(directory, mask)
    
    # Use glob to get the list of files
    files = glob.glob(pattern)
    
    # Extract the relative path of each file
    relative_paths = [os.path.relpath(file, directory) for file in files]
    
    return relative_paths
files = list_files_by_mask('input/', 'Bal8*')
files

In [None]:
def seis_well_run(file, wells_df, buffer, margin, disp_map=1, disp_xplots=1):
    def seism_upload(file, delimiter):
        seismic = pd.read_csv(file, delimiter=delimiter)
        seismic = seismic.round({'x':0, 'y':0})
        return seismic
    seism = seism_upload(file, ' ')
    print(f"seismic map {file} is uploaded")
    
    def intersection_maps(map, wells_df, buffer):
        geometry_map = [Point(xy) for xy in zip(map['x'], map['y'])]
        gdf_map = gpd.GeoDataFrame(map, geometry=geometry_map)

        geometry_points = [Point(xy) for xy in zip(wells_df['xmean'], wells_df['ymean'])]
        gdf_points = gpd.GeoDataFrame(wells_df, geometry=geometry_points)
        convex_hull = gdf_points.unary_union.convex_hull.buffer(buffer)
        intersection = gdf_map[gdf_map.intersects(convex_hull)]
        return intersection
    seism_intersect = intersection_maps(seism, wells_df, buffer)
    print('seismic map is intersected with wells')

    def display_map(seism_map, wells_df, file):
        plt.subplots(figsize=(14, 6))
        plt.scatter(seism_map['x'], seism_map['y'], c=seism_map['value'], cmap='coolwarm')
        sc = plt.scatter(seism_map['x'], seism_map['y'], c=seism_map['value'], cmap='coolwarm', alpha=0.5)
        plt.colorbar(sc)
        plt.scatter(wells_df['xmean'], wells_df['ymean'], c=wells_df['phit_net_mean'], s=50, ec='black', lw=0.5, alpha=0.5)
        for i, txt in enumerate(wells_df['well']):
            plt.annotate(txt, (wells_df['xmean'].iloc[i], wells_df['ymean'].iloc[i]), fontsize=6)
        plt.show()
        plt.title(f'Seismic {file} map with wells')
    if disp_map == 1:
        display_map(seism_intersect, wells_df, file)
    else:
        pass
    
    def seism_well_correl(seism_map, wells_df, margin, file):
        wells_df['xmean_min'] = wells_df['xmean'] - margin
        wells_df['xmean_max'] = wells_df['xmean'] + margin
        wells_df['ymean_min'] = wells_df['ymean'] - margin
        wells_df['ymean_max'] = wells_df['ymean'] + margin
        seism_map_short = seism_map[['x', 'y', 'value']]

        df_lst = []
        for idx, row in wells_df.iterrows():
            seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                                (seism_map_short['x'] < row['xmean_max']) & 
                                                (seism_map_short['y'] > row['ymean_min']) &
                                                (seism_map_short['y'] < row['ymean_max'])]
            mean = seism_map_zone.value.mean()
            p50 = seism_map_zone.value.quantile(0.5)
            p25 = seism_map_zone.value.quantile(0.25)
            p75 = seism_map_zone.value.quantile(0.75)
            df = pd.DataFrame({'well':row['well'], 
                                'phit_net_mean':row['phit_net_mean'], 
                                'field':row['field'],
                                'mean': mean, 
                                'p50': p50, 
                                'p25': p25, 
                                'p75': p75,
                                'xmean_min':row['xmean_min'],
                                'xmean_max':row['xmean_max'],
                                'ymean_min':row['ymean_min'],
                                'ymean_max':row['ymean_max'],
                                'margin':margin,
                                'seism_att':file}, index=[0])
            df_lst.append(df)
        result = pd.concat(df_lst).reset_index(drop=True)
        return result
    seism_wells = seism_well_correl(seism_intersect, wells_df, margin, file)
    print(f"map {file} to wells dataset is done")

    def seism_well_xplots(seism_well):
        fig, ax = plt.subplots(1, 4, figsize=(18, 4))
        sns.scatterplot(data=seism_well, x='phit_net_mean', y='mean', ax=ax[0])
        sns.scatterplot(data=seism_well, x='phit_net_mean', y='p25', ax=ax[2])
        sns.scatterplot(data=seism_well, x='phit_net_mean', y='p50', ax=ax[1])
        sns.scatterplot(data=seism_well, x='phit_net_mean', y='p75', ax=ax[3])
        plt.tight_layout()
        plt.show()
        plt.suptitle(f'Seismic {file} vs wells')
    if disp_xplots == 1:
        seism_well_xplots(seism_wells)
    
    resulting_dict = {'seism_map':seism_intersect, 'seism_wells':seism_wells}
    
    return resulting_dict

seism_lst = []
for file in files:
    seism_lst.append(seis_well_run('input/'+file, xy8_phit, buffer=1500, margin=100, disp_map=0, disp_xplots=0)['seism_wells'])

final = pd.concat(seism_lst).reset_index(drop=True)
final.to_csv('io/seism_wells_bal8.csv', index=False)

In [11]:
final = pd.read_csv('io/seism_wells_bal8.csv')

In [None]:
for att in final.seism_att.unique():
    data = final[final.seism_att == att]
    fig, ax = plt.subplots(1, 4, figsize=(18, 4))
    sns.scatterplot(data=data, x='phit_net_mean', y='mean', hue='field', ax=ax[0])
    sns.scatterplot(data=data, x='phit_net_mean', y='p25', hue='field', ax=ax[2])
    sns.scatterplot(data=data, x='phit_net_mean', y='p50', hue='field', ax=ax[1])
    sns.scatterplot(data=data, x='phit_net_mean', y='p75', hue='field', ax=ax[3])
    plt.suptitle(f'Seismic {att} vs wells')
    plt.tight_layout()
    plt.show()

In [None]:
for seism in final.seism_att.unique():
    ref = final.seism_att.unique()[0]
    data1 = final[final.seism_att == ref]
    data2 = final[final.seism_att == seism]
    siesm_name = data2.seism_att.unique()[0]
    data_join = data1.merge(data2, on='well', suffixes=('_ref', '_seism'))
    plt.figure(figsize=(6, 4))
    sns.scatterplot(data=data_join, x='mean_ref', y='mean_seism', hue='field_ref')
    plt.title(f'{ref} vs {siesm_name}')

# Porosity prediction

In [None]:
def seism_upload(file, delimiter):
    seismic = pd.read_csv(file, delimiter=delimiter)
    seismic = seismic.round({'x':0, 'y':0})
    return seismic
seism = seism_upload('Bal8_AV', ' ')

def intersection_maps(map, wells_df, buffer):
    geometry_map = [Point(xy) for xy in zip(map['x'], map['y'])]
    gdf_map = gpd.GeoDataFrame(map, geometry=geometry_map)

    geometry_points = [Point(xy) for xy in zip(wells_df['xmean'], wells_df['ymean'])]
    gdf_points = gpd.GeoDataFrame(wells_df, geometry=geometry_points)
    convex_hull = gdf_points.unary_union.convex_hull.buffer(buffer)
    intersection = gdf_map[gdf_map.intersects(convex_hull)]
    return intersection
seism_intersect = intersection_maps(seism, xy8_phit, 1500)
print('seismic map is intersected with wells')

In [None]:
df_av = final[final.seism_att == 'Bal8_AV']
# df_av = df_av.rename(columns={'mean':'mean_av', 'p25':'p25_av', 'p50':'p50_av', 'p75':'p75_av'})
sns.scatterplot(data=df_av, x='phit_net_mean', y='mean', hue='field')
plt.legend(loc='upper right')
for idx, txt in enumerate(df_av['well']):
    plt.annotate(txt, (df_av['phit_net_mean'].iloc[idx], df_av['mean'].iloc[idx]), fontsize=6)

In [None]:
df_av_v2 = df_av[~df_av.well.isin(['B39', 'D02Y', 'D34', 'B31', 'B20'])]
xy = df_bal8_v4_flag.groupby('well')[['xmean','ymean']].first().reset_index()
df_av_v2 = df_av_v2.merge(xy, on='well')
df_av_v2 = df_av_v2.rename(columns = {'xmean':'x', 'ymean':'y'})
df_av_v2 = df_av_v2[~df_av_v2.well.isin(['B01ST1', 'D01', 'C14', 'C01A', 'B06', 'C13Z', 'C06', 'D01Z','C07'])]
sns.scatterplot(data=df_av_v2, x='phit_net_mean', y='mean', hue='field')
plt.legend(loc='upper right')
for idx, txt in enumerate(df_av_v2['well']):
    plt.annotate(txt, (df_av_v2['phit_net_mean'].iloc[idx], df_av_v2['mean'].iloc[idx]), fontsize=6)

## model fit

In [211]:
X = df_av_v2[['x', 'y', 'mean']].rename(columns={'mean':'value'})
y = df_av_v2['phit_net_mean']

X_train, y_train = X, y
X_test = seism_intersect[['x', 'y', 'value']]

In [227]:
tregr = tpot.TPOTRegressor(n_jobs=7, verbosity=2, generations=20, 
                           warm_start=True, random_state=42, memory='auto')

In [None]:
tregr.fit(X_train, y_train)
tregr.fitted_pipeline_

In [None]:
joblib.dump(tregr.fitted_pipeline_, 'tregr_v6.pkl')

In [None]:
model_descr = tregr.fitted_pipeline_
model_descr.steps

## prediction

In [271]:
# model = joblib.load('tregr_v1.pkl')
model = joblib.load('tregr_v2.pkl')
X_test = X_test.rename(columns={'value':'mean_av'})
y_pred = model.predict(X_test)
model_df = pd.DataFrame({'x': X_test.iloc[:,0], 'y': X_test.iloc[:,1],
                          'phit_pred': y_pred})

In [None]:
model.steps

In [None]:
fig, ax = plt.subplots(figsize=(15, 7))
cb1 = plt.scatter(model_df['x'], model_df['y'], c=model_df['phit_pred'], cmap='coolwarm')
plt.colorbar(cb1)
cb2 = plt.scatter(df_av_v2['x'], df_av_v2['y'], c=df_av_v2['phit_net_mean'], s=50, ec='black', lw=0.5, alpha=0.5, cmap='coolwarm')
plt.colorbar(cb2)

In [None]:
def seism_well_correl_pred(seism_map, wells_df, margin, file):
    wells_df['xmean_min'] = wells_df['xmean'] - margin
    wells_df['xmean_max'] = wells_df['xmean'] + margin
    wells_df['ymean_min'] = wells_df['ymean'] - margin
    wells_df['ymean_max'] = wells_df['ymean'] + margin
    seism_map_short = seism_map[['x', 'y', 'value']]

    df_lst = []
    for idx, row in wells_df.iterrows():
        seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                            (seism_map_short['x'] < row['xmean_max']) & 
                                            (seism_map_short['y'] > row['ymean_min']) &
                                            (seism_map_short['y'] < row['ymean_max'])]
        mean = seism_map_zone.value.mean()
        df = pd.DataFrame({'well':row['well'], 
                            'phit_net_mean':row['phit_net_mean'], 
                            'field':row['field'],
                            'mean': mean, 
                            'margin':margin,
                            'seism_att':file}, index=[0])
        df_lst.append(df)
    result = pd.concat(df_lst).reset_index(drop=True)
    return result

model_df = model_df.rename(columns={'phit_pred':'value'})
wells_pred = seism_well_correl_pred(model_df, xy8_phit, 100, 'Bal8_AV')
wells_pred = wells_pred[['well', 'mean']].rename(columns={'mean':'phit_pred'})

wells_true = df_av_v2[['well','phit_net_mean','field']]
wells_pred_true = wells_pred.set_index('well').join(wells_true.set_index('well'), how='inner').reset_index()
wells_pred_true['up_1.15pu'] = wells_pred_true.phit_net_mean+0.0115
wells_pred_true['down_1.15pu'] = wells_pred_true.phit_net_mean-0.0115
wells_pred_true['qc'] = np.where((wells_pred_true.phit_pred >= wells_pred_true['down_1.15pu']) 
& (wells_pred_true.phit_pred <= wells_pred_true['up_1.15pu']), 1, 0)
display(wells_pred_true.qc.value_counts(normalize=True))

sns.scatterplot(data=wells_pred_true, x='phit_net_mean', y='phit_pred', hue='field')
sns.lineplot(x=[0.13, 0.29], y=[0.13, 0.29], color='red', ls='--')
sns.lineplot(x=[0.13, 0.29], y=[0.13+0.0115, 0.29+0.0115], color='black', ls='--')
sns.lineplot(x=[0.13, 0.29], y=[0.13-0.0115, 0.29-0.0115], color='black', ls='--')

# Final script Bal8

In [None]:
def list_files_by_mask(directory, mask):
    # Construct the full pattern
    pattern = os.path.join(directory, mask)
    
    # Use glob to get the list of files
    files = glob.glob(pattern)
    
    # Extract the relative path of each file
    relative_paths = [os.path.relpath(file, directory) for file in files]
    
    return relative_paths
files8 = list_files_by_mask('', 'Bal8*')
files8

In [None]:
df_lst8 = []
for file in tqdm(files8[:1]):
    def seism_well_run_v2(file, wells_df, buffer, margin):
        def seism_upload(file, delimiter):
            seismic = pd.read_csv(file, delimiter=delimiter)
            seismic = seismic.round({'x':0, 'y':0})
            return seismic
        seismic_map = seism_upload(file, ' ')
        print(f"seismic map {file} is uploaded")
        
        def intersection_maps(map, wells_df, buffer):
            geometry_map = [Point(xy) for xy in zip(map['x'], map['y'])]
            gdf_map = gpd.GeoDataFrame(map, geometry=geometry_map)

            geometry_points = [Point(xy) for xy in zip(wells_df['xmean'], wells_df['ymean'])]
            gdf_points = gpd.GeoDataFrame(wells_df, geometry=geometry_points)
            convex_hull = gdf_points.unary_union.convex_hull.buffer(buffer)
            intersection = gdf_map[gdf_map.intersects(convex_hull)]
            return intersection
        seismic_map_intersect = intersection_maps(seismic_map, wells_df, buffer)
        print('seismic map is intersected with wells')
    
        def seism_well_correl_init(seism_map, wells_df, margin, file):
            wells_df['xmean_min'] = wells_df['xmean'] - margin
            wells_df['xmean_max'] = wells_df['xmean'] + margin
            wells_df['ymean_min'] = wells_df['ymean'] - margin
            wells_df['ymean_max'] = wells_df['ymean'] + margin
            seism_map_short = seism_map[['x', 'y', 'value']]

            df_lst = []
            for idx, row in wells_df.iterrows():
                seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                                    (seism_map_short['x'] < row['xmean_max']) & 
                                                    (seism_map_short['y'] > row['ymean_min']) &
                                                    (seism_map_short['y'] < row['ymean_max'])]
                mean = seism_map_zone.value.mean()
                p50 = seism_map_zone.value.quantile(0.5)
                p25 = seism_map_zone.value.quantile(0.25)
                p75 = seism_map_zone.value.quantile(0.75)
                df = pd.DataFrame({'well':row['well'], 
                                    'phit_net_mean':row['phit_net_mean'], 
                                    'field':row['field'],
                                    'mean': mean, 
                                    'p50': p50, 
                                    'p25': p25, 
                                    'p75': p75,
                                    'x':row['xmean'],
                                    'y':row['ymean'],
                                    'xmean_min':row['xmean_min'],
                                    'xmean_max':row['xmean_max'],
                                    'ymean_min':row['ymean_min'],
                                    'ymean_max':row['ymean_max'],
                                    'margin_init':margin,
                                    'seism_att':file}, index=[0])
                df_lst.append(df)
            result = pd.concat(df_lst).reset_index(drop=True)
            return result
        well_data_from_seismic_map = seism_well_correl_init(seismic_map_intersect, wells_df, margin, file)
        print(f"map {file} to wells dataset is recalculated")

        dict_with_results = {'seismic_map_intersect':seismic_map_intersect, 'well_data_from_seismic_map':well_data_from_seismic_map}
        
        return dict_with_results
    print(f'siesmic maps {file} and well data preparation is started')
    data_bal8 = seism_well_run_v2(file, xy8_phit, buffer=1500, margin=100)
    seismic_map8 = data_bal8['seismic_map_intersect']
    well_data8 = data_bal8['well_data_from_seismic_map']

    def preprocessing_data(seismic_map, well_data):
        well_data_v2 = well_data[~well_data.well.isin(['B39', 'D02Y', 'D34', 'B31', 'B20'])] #outliers
        well_data_v3 = well_data_v2[~well_data_v2.well.isin(['B01ST1', 'D01', 'C14', 'C01A', 'B06', 'C13Z', 'C06', 'D01Z','C07'])] #very close to each other wells

        X = well_data_v3[['x', 'y', 'mean']].rename(columns={'mean':'value'})
        y = well_data_v3['phit_net_mean']
        X_train, y_train = X, y
        X_test = seismic_map[['x', 'y', 'value']]
        return X_train, y_train, X_test, well_data_v3
    print('data is preprocessed')
    X_train8, y_train8, X_test8, well_data8_v3 = preprocessing_data(seismic_map8, well_data8)

    tregr8 = tpot.TPOTRegressor(n_jobs=7, verbosity=1, generations=20, random_state=42, scoring='r2', early_stop=5)
    tregr8.fit(X_train8, y_train8)
    tregr8.fitted_pipeline_
    joblib.dump(tregr8.fitted_pipeline_, f'tregr_{file}.pkl')
    print('model is trained')

    model8 = joblib.load(f'tregr_{file}.pkl')
    y_pred8 = model8.predict(X_test8)
    model_df8 = pd.DataFrame({'x': X_test8.iloc[:,0], 'y': X_test8.iloc[:,1],'phit_pred': y_pred8})

    def model_postprocessing8(model_df, well_data_v3, file, margin):
        def seism_well_correl_pred(seism_map, wells_df, margin, file):
            wells_df['xmean_min'] = wells_df['x'] - margin
            wells_df['xmean_max'] = wells_df['x'] + margin
            wells_df['ymean_min'] = wells_df['y'] - margin
            wells_df['ymean_max'] = wells_df['y'] + margin
            seism_map_short = seism_map[['x', 'y', 'value']]

            df_lst = []
            for idx, row in wells_df.iterrows():
                seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                                    (seism_map_short['x'] < row['xmean_max']) & 
                                                    (seism_map_short['y'] > row['ymean_min']) &
                                                    (seism_map_short['y'] < row['ymean_max'])]
                mean = seism_map_zone.value.mean()
                df = pd.DataFrame({'well':row['well'], 
                                    'phit_net_mean':row['phit_net_mean'], 
                                    'field':row['field'],
                                    'mean': mean, 
                                    'margin_pred':margin,
                                    'seism_att':file}, index=[0])
                df_lst.append(df)
            result = pd.concat(df_lst).reset_index(drop=True)
            return result

        model_df = model_df.rename(columns={'phit_pred':'value'})
        wells_pred = seism_well_correl_pred(model_df, well_data_v3, margin, 'Bal8_AV')
        wells_pred = wells_pred[['well', 'mean']].rename(columns={'mean':'phit_pred'})

        wells_pred_true = well_data_v3.set_index('well').join(wells_pred.set_index('well'), how='inner').reset_index()

        wells_pred_true['algorithm'] = tregr8.fitted_pipeline_
        wells_pred_true['up_1.15pu'] = wells_pred_true.phit_net_mean+0.0115
        wells_pred_true['down_1.15pu'] = wells_pred_true.phit_net_mean-0.0115

        wells_pred_true['qc'] = np.where((wells_pred_true.phit_pred >= wells_pred_true['down_1.15pu']) 
        & (wells_pred_true.phit_pred <= wells_pred_true['up_1.15pu']), 1, 0)

        dict_qc = {'1':wells_pred_true.qc.value_counts(normalize=True)[1], '0':wells_pred_true.qc.value_counts(normalize=True)[0]}

        wells_pred_true['qc_result'] = [dict_qc] * len(wells_pred_true)
        wells_pred_true.qc_result.iloc[0]
        return wells_pred_true
    print('model is postprocessed')
    wells_pred_true8 = model_postprocessing8(model_df8, well_data8_v3, file, margin=100)
    wells_pred_true8['scoring'] = 'r2'
    print(f'model based on {file} is ready:', wells_pred_true8.qc_result.iloc[0])
    df_lst8.append(wells_pred_true)

final_pred8 = pd.concat(df_lst8).reset_index(drop=True)    

In [354]:
# final_pred8.to_csv('final_pred_bal8_scoring_r2.csv', index=False)

# Visualization best result bal8

In [None]:
k01 = pd.read_csv(r'C:\jupyter\SPP\input\ACG_k01.csv')[1:].drop('datasetName', axis=1)
k01 = k01.rename(columns={'wellName':'well'})
k01 = k01[k01.FORMATION.isin(['Balakhany VIII sand', 'Balakhany VIII 20','Balakhany VIII 15', 'Balakhany VIII 10', 'Balakhany VIII 5'])]
k01['FORMATION_up'] = 'Balakhany VIII'
k01['field'] = 'ACE'
for col in [    'MD', 'AREA', 'BADPORLOG', 'FLANK', 'Fluidcode',
                'FLUIDCODE_PP', 'GR_N', 'GRMATRIX', 'GRSHALE', 'LPERM',
                'LPERM_DS_Bal', 'LPERM_US_Bal', 'NET', 'NPSS', 'PHIT', 'RDEEP', 'RHOB',
                'RHOF', 'RHOMA', 'TST', 'TVD_SCS', 'X', 'Y']:
    k01[col] = pd.to_numeric(k01[col], errors='coerce')
k01 = k01[(k01.PHIT > 0)]
k01 = k01.round({'MD':1, 'TVD_SCS':1, 'TST':1, 'X':0, 'Y':0})
for col in ['well', 'FORMATION', 'FORMATION_up', 'field']:
    k01[col] = k01[col].astype('string')

def interpolate_by_depth_fm_run_k01(df, step):
    df_tst = df[df.TST.notna()].round({'MD':1})
    
    def interpolate_by_depth_fm_v2(one_well, step):
        one_well = one_well.sort_values(by='TST')
        well_name = one_well["well"].iloc[0]
        formation = one_well["FORMATION"].iloc[0]
        formation_up = one_well["FORMATION_up"].iloc[0]
        field = one_well["field"].iloc[0]
        data_range = np.floor((one_well["TST"].max() - one_well["TST"].min())/step)
        starting_tst = one_well["TST"].iloc[0]
        new_TST_values = [starting_tst + i*0.1 for i in range(1,int(data_range))]
        col_lst = []
        for col in one_well.columns:
            if col not in ['well','FORMATION_up', 'FORMATION','field']:
                interp = interp1d(one_well['TST'], one_well[col], kind='linear', fill_value="extrapolate")
                new_data = {col: interp(new_TST_values)}
                new_df = pd.DataFrame(new_data)
                col_lst.append(new_df)
        new_df = pd.concat(col_lst, axis=1)
        new_df['well'] = well_name
        new_df['TST'] = new_TST_values
        # new_df['FORMATION'] = formation
        # new_df['FORMATION_up'] = formation_up
        # new_df['field'] = field #Index(['FORMATION', 'FORMATION_up', 'field'], dtype='object')
        # new_df = new_df[[   'well', 
        #                     'TST', 'tst_index', 'MD',  'DEVI', 'HAZI', 'NET', 'NET_VSH', 'LPERM',
        #                     'PHIT', 'GR_N', 'VSH', 'NPSS', 'RHOB', 'RDEEP', 'SON', 'SONSH',
        #                     'TVD_SCS', 'X_traj', 'Y_traj', 'Xmean', 'Ymean', 'RHOF', 'RHOMA',
        #                     'tst_sample', 'NET_clp', 'NET_clp2', 'phit_flag', 'PERM_DS', 'PERM_US',
        #                     'k_htst', 'KHtst', 'VSH_smooth', 'NET_smooth', 'NET_orig',
        #                     'NET_VSH_orig', 'TST_interv', 'TST_interv_fu', 'fluid_code', 'calc',
        #                     'calc_net', 'phitd_npss']]
        return new_df
    df_lst = []
    for well in tqdm(df_tst.well.unique()):
        well_data = df_tst[df_tst.well == well]
        well_data_interp = interpolate_by_depth_fm_v2(well_data, 0.1)
        df_lst.append(well_data_interp)
    df_interp = pd.concat(df_lst)
    df_interp = df_interp.round({'MD':1, 'TVD_SCS':1, 'TST':1})
    print('Start joining')
    def well_bal_interp_join(dataset):
        df_tst = df[(df.TST.notna()) & (df.FORMATION_up.notna())].round({'MD':1})
        data_fu = df_tst[['well','MD','FORMATION_up', 'FORMATION', 'field']]
        well_join = dataset.set_index(['well','MD']).join(data_fu.set_index(['well','MD'])).reset_index()
        well_join.insert(3, 'FORMATION_up', well_join.pop('FORMATION_up'))
        well_join.insert(4, 'FORMATION', well_join.pop('FORMATION'))
        # well_join.insert(5, 'tst_index', well_join.pop('tst_index'))
        return well_join
    well_interp_v2 = well_bal_interp_join(df_interp)
    # well_interp_v2.loc[well_interp_v2.NET_VSH > 0, 'NET_VSH'] = 1
    # well_interp_v2.loc[well_interp_v2.NET > 0, 'NET'] = 1
    
    df_lst_2 = []
    for well in well_interp_v2.well.unique():
        field_data = well_interp_v2[well_interp_v2.well == well]
        field_data.field = field_data.field.fillna(method = 'ffill')
        field_data.field = field_data.field.fillna(method = 'bfill')
        field_data.FORMATION_up = field_data.FORMATION_up.fillna(method = 'ffill')
        field_data.FORMATION_up = field_data.FORMATION_up.fillna(method = 'bfill')
        field_data.FORMATION = field_data.FORMATION.fillna(method = 'ffill')
        field_data.FORMATION = field_data.FORMATION.fillna(method = 'bfill')
        df_lst_2.append(field_data)
    well_interp_v3 = pd.concat(df_lst_2)

    return well_interp_v3

k01_intepr = interpolate_by_depth_fm_run_k01(k01, 0.1)
k01_intepr.columns = k01_intepr.columns.str.lower()
k01_intepr['son'] = 0
k01_intepr['sonsh'] = 0
k01_intepr['xmean'] = k01_intepr.x.mean()
k01_intepr['ymean'] = k01_intepr.y.mean()
k01_gb = k01_intepr[k01_intepr.net==1].groupby('well')[['phit', 'xmean', 'ymean']].mean().reset_index()
k01_gb = k01_gb.rename(columns={'phit':'phit_w_avg'})

In [None]:
file = 'Bal8_SNA'
def seism_well_run_v2(file, wells_df, buffer, margin):
    def seism_upload(file, delimiter):
        seismic = pd.read_csv(file, delimiter=delimiter)
        seismic = seismic.round({'x':0, 'y':0})
        return seismic
    seismic_map = seism_upload(file, ' ')
    print(f"seismic map {file} is uploaded")
    
    def intersection_maps(map, wells_df, buffer):
        geometry_map = [Point(xy) for xy in zip(map['x'], map['y'])]
        gdf_map = gpd.GeoDataFrame(map, geometry=geometry_map)

        geometry_points = [Point(xy) for xy in zip(wells_df['xmean'], wells_df['ymean'])]
        gdf_points = gpd.GeoDataFrame(wells_df, geometry=geometry_points)
        convex_hull = gdf_points.unary_union.convex_hull.buffer(buffer)
        intersection = gdf_map[gdf_map.intersects(convex_hull)]
        return intersection
    seismic_map_intersect = intersection_maps(seismic_map, wells_df, buffer)
    print('seismic map is intersected with wells')

    def seism_well_correl_init(seism_map, wells_df, margin, file):
        wells_df['xmean_min'] = wells_df['xmean'] - margin
        wells_df['xmean_max'] = wells_df['xmean'] + margin
        wells_df['ymean_min'] = wells_df['ymean'] - margin
        wells_df['ymean_max'] = wells_df['ymean'] + margin
        seism_map_short = seism_map[['x', 'y', 'value']]

        df_lst = []
        for idx, row in wells_df.iterrows():
            seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                                (seism_map_short['x'] < row['xmean_max']) & 
                                                (seism_map_short['y'] > row['ymean_min']) &
                                                (seism_map_short['y'] < row['ymean_max'])]
            mean = seism_map_zone.value.mean()
            p50 = seism_map_zone.value.quantile(0.5)
            p25 = seism_map_zone.value.quantile(0.25)
            p75 = seism_map_zone.value.quantile(0.75)
            df = pd.DataFrame({'well':row['well'], 
                                'phit_net_mean':row['phit_net_mean'], 
                                'field':row['field'],
                                'mean': mean, 
                                'p50': p50, 
                                'p25': p25, 
                                'p75': p75,
                                'x':row['xmean'],
                                'y':row['ymean'],
                                'xmean_min':row['xmean_min'],
                                'xmean_max':row['xmean_max'],
                                'ymean_min':row['ymean_min'],
                                'ymean_max':row['ymean_max'],
                                'margin_init':margin,
                                'seism_att':file}, index=[0])
            df_lst.append(df)
        result = pd.concat(df_lst).reset_index(drop=True)
        return result
    well_data_from_seismic_map = seism_well_correl_init(seismic_map_intersect, wells_df, margin, file)
    print(f"map {file} to wells dataset is recalculated")

    dict_with_results = {'seismic_map_intersect':seismic_map_intersect, 'well_data_from_seismic_map':well_data_from_seismic_map}
    
    return dict_with_results
print(f'siesmic maps {file} and well data preparation is started')
data_bal8 = seism_well_run_v2(file, xy8_phit, buffer=1500, margin=100)
seismic_map = data_bal8['seismic_map_intersect']
well_data = data_bal8['well_data_from_seismic_map']
X_test = seismic_map[['x', 'y', 'value']]

In [None]:
final_pred = pd.read_csv('final_pred_bal8_scoring_r2.csv')
model = joblib.load(f'tregr_{file}.pkl')
X_test = X_test.rename(columns={'mean':'value'})
y_pred = model.predict(X_test)
model_df = pd.DataFrame({'x': X_test.iloc[:,0], 'y': X_test.iloc[:,1],
                          'phit_pred': y_pred})
final_pred[final_pred.seism_att == file].qc_result.iloc[0]

In [None]:
model

In [None]:
fig, ax = plt.subplots(figsize=(14, 7))
xy8_phit_preproc = xy8_phit[~xy8_phit.well.isin(['B39', 'D02Y', 'D34', 'B31', 'B20',
                                                 'B01ST1', 'D01', 'C14', 'C01A', 'B06', 'C13Z', 'C06', 'D01Z','C07'])]
all_values = pd.concat([xy8_phit_preproc.phit_net_mean, model_df.phit_pred, k01_gb.phit_w_avg]).reset_index(drop=True)
norm = plt.Normalize(all_values.min(), all_values.max())
cb1 = plt.scatter(model_df['x'], model_df['y'], c=model_df['phit_pred'], cmap='coolwarm', norm=norm, s=1)
# colorbar1 = plt.colorbar(cb1)
# colorbar1.set_label('phit_model')
cb2 = plt.scatter(  xy8_phit_preproc['xmean'], 
                    xy8_phit_preproc['ymean'], 
                    c=xy8_phit_preproc['phit_net_mean'], 
                    s=50, ec='black', lw=0.5, alpha=1, norm=norm, cmap='coolwarm')
# colorbar2 = plt.colorbar(cb2)
# colorbar2.set_label('phit_wells')
plt.scatter(k01_gb['xmean'], k01_gb['ymean'], c=k01_gb['phit_w_avg'], norm=norm, 
            s=50, ec='black', lw=0.5, alpha=0.75, cmap='coolwarm')
plt.title(f'phit prediction based on {file}');
# plt.tight_layout();

In [None]:
phit_pred_sna = final_pred8[final_pred8.seism_att == file].sort_values('phit_net_mean')
dict_obj = ast.literal_eval(phit_pred_sna.qc_result.iloc[0])
fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(data=phit_pred_sna, x='well', y='phit_net_mean', ax=ax, label = 'phit_net_mean')
sns.lineplot(data=phit_pred_sna, x='well', y='phit_pred', ax=ax, label = 'phit_pred')
ax.fill_between( phit_pred_sna['well'], 
                    phit_pred_sna['phit_net_mean'] - 0.0115, 
                    phit_pred_sna['phit_net_mean'] + 0.0115, 
                    color='b', alpha=0.2) 
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, fontsize=6)
plt.title(f"phit_net_mean vs phit_pred based on {phit_pred_sna.seism_att.iloc[0]} with 1:{dict_obj['1']:.3f}")
plt.legend()
plt.grid()

# Test K01 bal8


In [None]:
def seism_well_correl_test(seism_map, wells_df, margin, file):
    """
    well_df.columns = ['well', 'phit_net_mean', 'x', 'y']
    seism_map.columns = ['x', 'y', 'phit_pred']
    """ 
    wells_df['xmean_min'] = wells_df['x'] - margin
    wells_df['xmean_max'] = wells_df['x'] + margin
    wells_df['ymean_min'] = wells_df['y'] - margin
    wells_df['ymean_max'] = wells_df['y'] + margin
    seism_map_short = seism_map[['x', 'y', 'phit_pred']]

    df_lst = []
    for idx, row in wells_df.iterrows():
        seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                            (seism_map_short['x'] < row['xmean_max']) & 
                                            (seism_map_short['y'] > row['ymean_min']) &
                                            (seism_map_short['y'] < row['ymean_max'])]
        mean = seism_map_zone.phit_pred.mean()
        df = pd.DataFrame({ 'well':row['well'], 
                            'phit_net_mean':row['phit_net_mean'], 
                            # 'field':row['field'],
                            'mean': mean, 
                            'margin_pred':margin,
                            'seism_att':file}, index=[0])
        df_lst.append(df)
    result = pd.concat(df_lst).reset_index(drop=True)
    return result
k01_gb = k01_gb.rename(columns = {'xmean':'x', 'ymean':'y', 'phit_w_avg':'phit_net_mean'})
test_k01 = seism_well_correl_test(model_df, k01_gb, 30, 'Bal8_SNA')
test_k01['diff'] = test_k01.phit_net_mean - test_k01['mean']
test_k01['up_1.15pu'] = test_k01.phit_net_mean+0.0115
test_k01['down_1.15pu'] = test_k01.phit_net_mean-0.0115
test_k01['qc'] = np.where((test_k01['mean'] >= test_k01['down_1.15pu']) & (test_k01['mean'] <= test['up_1.15pu']), 1, 0)
test_k01

# Displaing phit_pred maps 

In [None]:
def list_files_by_mask(directory, mask):
    # Construct the full pattern
    pattern = os.path.join(directory, mask)
    
    # Use glob to get the list of files
    files = glob.glob(pattern)
    
    # Extract the relative path of each file
    relative_paths = [os.path.relpath(file, directory) for file in files]
    
    return relative_paths
tregr = list_files_by_mask('', 'tregr_*')
files = list_files_by_mask('', 'Bal8_*')
df_files_tregr = pd.DataFrame(zip(tregr,files), columns=['model','seismic'])
df_files_tregr

In [None]:
def seism_well_run_v2(file, wells_df, buffer, margin):
    def seism_upload(file, delimiter):
        seismic = pd.read_csv(file, delimiter=delimiter)
        seismic = seismic.round({'x':0, 'y':0})
        return seismic
    seismic_map = seism_upload(file, ' ')
    print(f"seismic map {file} is uploaded")
    
    def intersection_maps(map, wells_df, buffer):
        geometry_map = [Point(xy) for xy in zip(map['x'], map['y'])]
        gdf_map = gpd.GeoDataFrame(map, geometry=geometry_map)

        geometry_points = [Point(xy) for xy in zip(wells_df['xmean'], wells_df['ymean'])]
        gdf_points = gpd.GeoDataFrame(wells_df, geometry=geometry_points)
        convex_hull = gdf_points.unary_union.convex_hull.buffer(buffer)
        intersection = gdf_map[gdf_map.intersects(convex_hull)]
        return intersection
    seismic_map_intersect = intersection_maps(seismic_map, wells_df, buffer)
    print('seismic map is intersected with wells')

    def seism_well_correl_init(seism_map, wells_df, margin, file):
        wells_df['xmean_min'] = wells_df['xmean'] - margin
        wells_df['xmean_max'] = wells_df['xmean'] + margin
        wells_df['ymean_min'] = wells_df['ymean'] - margin
        wells_df['ymean_max'] = wells_df['ymean'] + margin
        seism_map_short = seism_map[['x', 'y', 'value']]

        df_lst = []
        for idx, row in wells_df.iterrows():
            seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                                (seism_map_short['x'] < row['xmean_max']) & 
                                                (seism_map_short['y'] > row['ymean_min']) &
                                                (seism_map_short['y'] < row['ymean_max'])]
            mean = seism_map_zone.value.mean()
            p50 = seism_map_zone.value.quantile(0.5)
            p25 = seism_map_zone.value.quantile(0.25)
            p75 = seism_map_zone.value.quantile(0.75)
            df = pd.DataFrame({'well':row['well'], 
                                'phit_net_mean':row['phit_net_mean'], 
                                'field':row['field'],
                                'mean': mean, 
                                'p50': p50, 
                                'p25': p25, 
                                'p75': p75,
                                'x':row['xmean'],
                                'y':row['ymean'],
                                'xmean_min':row['xmean_min'],
                                'xmean_max':row['xmean_max'],
                                'ymean_min':row['ymean_min'],
                                'ymean_max':row['ymean_max'],
                                'margin_init':margin,
                                'seism_att':file}, index=[0])
            df_lst.append(df)
        result = pd.concat(df_lst).reset_index(drop=True)
        return result
    well_data_from_seismic_map = seism_well_correl_init(seismic_map_intersect, wells_df, margin, file)
    print(f"map {file} to wells dataset is recalculated")

    dict_with_results = {'seismic_map_intersect':seismic_map_intersect, 'well_data_from_seismic_map':well_data_from_seismic_map}
    
    return dict_with_results

for idx, row in df_files_tregr.iterrows():
    print(f"siesmic maps {row['seismic']} and model {row['model']} are started")
    data_bal8 = seism_well_run_v2(row['seismic'], xy8_phit, buffer=1500, margin=100)
    seismic_map = data_bal8['seismic_map_intersect']
    X_test = seismic_map[['x', 'y', 'value']]

    model = joblib.load(row['model'])
    X_test = X_test.rename(columns={'mean':'value'})
    y_pred = model.predict(X_test)
    model_df = pd.DataFrame({'x': X_test.iloc[:,0], 'y': X_test.iloc[:,1], 'phit_pred': y_pred})

    phit_pred_sna = final_pred[final_pred.seism_att == row['seismic']]
    dict_obj = ast.literal_eval(phit_pred_sna.qc_result.iloc[0])

    fig, ax = plt.subplots(figsize=(14, 7))
    cb1 = plt.scatter(model_df['x'], model_df['y'], c=model_df['phit_pred'], cmap='coolwarm', norm=norm, s=1)
    colorbar1 = plt.colorbar(cb1)
    plt.title(f"phit prediction based on {row['seismic']} and model {row['model']} and score {dict_obj['1']:.3f}")
    plt.show();
    

# Experiments with Bal8

In [None]:
X_train, X_val, y_train, y_val = train_test_split(well_data8.drop('phit_net_mean', axis=1), well_data8['phit_net_mean'], test_size=0.3, random_state=42)
y_train

In [None]:
file = 'Bal8_SNA'

def seism_well_run_v2(file, wells_df, buffer, margin):
    def seism_upload(file, delimiter):
        seismic = pd.read_csv(file, delimiter=delimiter)
        seismic = seismic.round({'x':0, 'y':0})
        return seismic
    seismic_map = seism_upload(file, ' ')
    print(f"seismic map {file} is uploaded")
    
    def intersection_maps(map, wells_df, buffer):
        geometry_map = [Point(xy) for xy in zip(map['x'], map['y'])]
        gdf_map = gpd.GeoDataFrame(map, geometry=geometry_map)

        geometry_points = [Point(xy) for xy in zip(wells_df['xmean'], wells_df['ymean'])]
        gdf_points = gpd.GeoDataFrame(wells_df, geometry=geometry_points)
        convex_hull = gdf_points.unary_union.convex_hull.buffer(buffer)
        intersection = gdf_map[gdf_map.intersects(convex_hull)]
        return intersection
    seismic_map_intersect = intersection_maps(seismic_map, wells_df, buffer)
    print('seismic map is intersected with wells')

    def seism_well_correl_init(seism_map, wells_df, margin, file):
        wells_df['xmean_min'] = wells_df['xmean'] - margin
        wells_df['xmean_max'] = wells_df['xmean'] + margin
        wells_df['ymean_min'] = wells_df['ymean'] - margin
        wells_df['ymean_max'] = wells_df['ymean'] + margin
        seism_map_short = seism_map[['x', 'y', 'value']]

        df_lst = []
        for idx, row in wells_df.iterrows():
            seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                                (seism_map_short['x'] < row['xmean_max']) & 
                                                (seism_map_short['y'] > row['ymean_min']) &
                                                (seism_map_short['y'] < row['ymean_max'])]
            mean = seism_map_zone.value.mean()
            p50 = seism_map_zone.value.quantile(0.5)
            p25 = seism_map_zone.value.quantile(0.25)
            p75 = seism_map_zone.value.quantile(0.75)
            df = pd.DataFrame({'well':row['well'], 
                                'phit_net_mean':row['phit_net_mean'], 
                                'field':row['field'],
                                'mean': mean, 
                                'p50': p50, 
                                'p25': p25, 
                                'p75': p75,
                                'x':row['xmean'],
                                'y':row['ymean'],
                                'xmean_min':row['xmean_min'],
                                'xmean_max':row['xmean_max'],
                                'ymean_min':row['ymean_min'],
                                'ymean_max':row['ymean_max'],
                                'margin_init':margin,
                                'seism_att':file}, index=[0])
            df_lst.append(df)
        result = pd.concat(df_lst).reset_index(drop=True)
        return result
    well_data_from_seismic_map = seism_well_correl_init(seismic_map_intersect, wells_df, margin, file)
    print(f"map {file} to wells dataset is recalculated")

    dict_with_results = {'seismic_map_intersect':seismic_map_intersect, 'well_data_from_seismic_map':well_data_from_seismic_map}
    
    return dict_with_results
print(f'siesmic maps {file} and well data preparation is started')
data_bal8 = seism_well_run_v2(file, xy8_phit, buffer=1500, margin=100)
seismic_map8 = data_bal8['seismic_map_intersect']
well_data8 = data_bal8['well_data_from_seismic_map']

def preprocessing_data(seismic_map, well_data):
    well_data_v2 = well_data[~well_data.well.isin(['B39', 'D02Y', 'D34', 'B31', 'B20'])] #outliers
    well_data_v3 = well_data_v2[~well_data_v2.well.isin(['B01ST1', 'D01', 'C14', 'C01A', 'B06', 'C13Z', 'C06', 'D01Z','C07'])] #very close to each other wells

    X_train, X_val, y_train, y_val = train_test_split(well_data_v3.drop(['phit_net_mean']), well_data_v3['phit_net_mean'], test_size=0.3, random_state=42)

    X = well_data_v3[['x', 'y', 'mean']].rename(columns={'mean':'value'})
    y = well_data_v3['phit_net_mean']

    

    X_test = seismic_map[['x', 'y', 'value']]
    return X_train, y_train, X_val, y_val, X_test, well_data_v3
print('data is preprocessed')
X_train8, y_train8, X_val8, y_val8, X_test8, well_data8_v3 = preprocessing_data(seismic_map8, well_data8)

tregr8 = tpot.TPOTRegressor(n_jobs=7, verbosity=1, generations=20, random_state=42, scoring='r2', early_stop=5)
tregr8.fit(X_train8, y_train8)
tregr8.fitted_pipeline_
joblib.dump(tregr8.fitted_pipeline_, f'tregr_test_{file}.pkl')
print('model is trained')

model8 = joblib.load(f'tregr_test_{file}.pkl')
y_pred8 = model8.predict(X_test8)
model_df8 = pd.DataFrame({'x': X_test8.iloc[:,0], 'y': X_test8.iloc[:,1],'phit_pred': y_pred8})

def model_postprocessing8(model_df, well_data_v3, file, margin):
    def seism_well_correl_pred(seism_map, wells_df, margin, file):
        wells_df['xmean_min'] = wells_df['x'] - margin
        wells_df['xmean_max'] = wells_df['x'] + margin
        wells_df['ymean_min'] = wells_df['y'] - margin
        wells_df['ymean_max'] = wells_df['y'] + margin
        seism_map_short = seism_map[['x', 'y', 'value']]

        df_lst = []
        for idx, row in wells_df.iterrows():
            seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                                (seism_map_short['x'] < row['xmean_max']) & 
                                                (seism_map_short['y'] > row['ymean_min']) &
                                                (seism_map_short['y'] < row['ymean_max'])]
            mean = seism_map_zone.value.mean()
            df = pd.DataFrame({'well':row['well'], 
                                'phit_net_mean':row['phit_net_mean'], 
                                'field':row['field'],
                                'mean': mean, 
                                'margin_pred':margin,
                                'seism_att':file}, index=[0])
            df_lst.append(df)
        result = pd.concat(df_lst).reset_index(drop=True)
        return result

    model_df = model_df.rename(columns={'phit_pred':'value'})
    wells_pred = seism_well_correl_pred(model_df, well_data_v3, margin, 'Bal8_AV')
    wells_pred = wells_pred[['well', 'mean']].rename(columns={'mean':'phit_pred'})

    wells_pred_true = well_data_v3.set_index('well').join(wells_pred.set_index('well'), how='inner').reset_index()

    wells_pred_true['algorithm'] = tregr8.fitted_pipeline_
    wells_pred_true['up_1.15pu'] = wells_pred_true.phit_net_mean+0.0115
    wells_pred_true['down_1.15pu'] = wells_pred_true.phit_net_mean-0.0115

    wells_pred_true['qc'] = np.where((wells_pred_true.phit_pred >= wells_pred_true['down_1.15pu']) 
    & (wells_pred_true.phit_pred <= wells_pred_true['up_1.15pu']), 1, 0)

    dict_qc = {'1':wells_pred_true.qc.value_counts(normalize=True)[1], '0':wells_pred_true.qc.value_counts(normalize=True)[0]}

    wells_pred_true['qc_result'] = [dict_qc] * len(wells_pred_true)
    wells_pred_true.qc_result.iloc[0]
    return wells_pred_true
print('model is postprocessed')
wells_pred_true8 = model_postprocessing8(model_df8, well_data8_v3, file, margin=100)
wells_pred_true8['scoring'] = 'r2'
print(f'model based on {file} is ready:', wells_pred_true8.qc_result.iloc[0])

final_pred8_test1 = wells_pred_true8
val_test1 = X_val8
val_test1['phit_net_mean'] = y_val8
val_test1_v2 = val_test1.join(xy8_phit['well'])

In [None]:
def seism_well_correl_test(seism_map, wells_df, margin, file):
    """
    well_df.columns = ['well', 'phit_net_mean', 'x', 'y']
    seism_map.columns = ['x', 'y', 'phit_pred']
    """ 
    wells_df['xmean_min'] = wells_df['x'] - margin
    wells_df['xmean_max'] = wells_df['x'] + margin
    wells_df['ymean_min'] = wells_df['y'] - margin
    wells_df['ymean_max'] = wells_df['y'] + margin
    seism_map_short = seism_map[['x', 'y', 'phit_pred']]

    df_lst = []
    for idx, row in wells_df.iterrows():
        seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                            (seism_map_short['x'] < row['xmean_max']) & 
                                            (seism_map_short['y'] > row['ymean_min']) &
                                            (seism_map_short['y'] < row['ymean_max'])]
        mean = seism_map_zone.phit_pred.mean()
        df = pd.DataFrame({ 'well':row['well'], 
                            'phit_net_mean':row['phit_net_mean'], 
                            # 'field':row['field'],
                            'mean': mean, 
                            'margin_pred':margin,
                            'seism_att':file}, index=[0])
        df_lst.append(df)
    result = pd.concat(df_lst).reset_index(drop=True)
    return result
val_test1_v2 = val_test1_v2.rename(columns={'phit_val':'phit_net_mean'})
test_val = seism_well_correl_test(model_df8, val_test1_v2, 100, 'Bal8_SNA')
test_val['diff'] = test_val.phit_net_mean - test_val['mean']
test_val['up_1.15pu'] = test_val.phit_net_mean+0.0115
test_val['down_1.15pu'] = test_val.phit_net_mean-0.0115
test_val['qc'] = np.where((test_val['mean'] >= test_val['down_1.15pu']) & (test_val['mean'] <= test_val['up_1.15pu']), 1, 0)
test_val.qc.value_counts(normalize=True)[1]

In [None]:
test_val

In [None]:
test_val = test_val.rename(columns={'mean':'phit_pred'})
test_val = test_val.sort_values('phit_net_mean')
fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(data=test_val, x='well', y='phit_net_mean', ax=ax, label = 'phit_net_mean')
sns.lineplot(data=test_val, x='well', y='phit_pred', ax=ax, label = 'phit_pred')
ax.fill_between( test_val['well'], 
                    test_val['phit_net_mean'] - 0.0115, 
                    test_val['phit_net_mean'] + 0.0115, 
                    color='b', alpha=0.2) 
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, fontsize=6)
plt.title(f"phit_net_mean vs phit_pred based on {test_val.seism_att.iloc[0]} with 1:{test_val.qc.value_counts(normalize=True)[1]:.3f}")
plt.legend()
plt.grid()

# Final script Bal10

In [None]:
def list_files_by_mask(directory, mask):
    # Construct the full pattern
    pattern = os.path.join(directory, mask)
    
    # Use glob to get the list of files
    files = glob.glob(pattern)
    
    # Extract the relative path of each file
    relative_paths = [os.path.relpath(file, directory) for file in files]
    
    return relative_paths
files10 = list_files_by_mask('', 'Bal10*')
files10

In [None]:
df_lst10 = []
for file in tqdm(files10):
    def seism_well_run_v2(file, wells_df, buffer, margin):
        def seism_upload(file, delimiter):
            seismic = pd.read_csv(file, delimiter=delimiter)
            seismic = seismic.round({'x':0, 'y':0})
            return seismic
        seismic_map = seism_upload(file, ' ')
        print(f"seismic map {file} is uploaded")
        
        def intersection_maps(map, wells_df, buffer):
            geometry_map = [Point(xy) for xy in zip(map['x'], map['y'])]
            gdf_map = gpd.GeoDataFrame(map, geometry=geometry_map)

            geometry_points = [Point(xy) for xy in zip(wells_df['xmean'], wells_df['ymean'])]
            gdf_points = gpd.GeoDataFrame(wells_df, geometry=geometry_points)
            convex_hull = gdf_points.unary_union.convex_hull.buffer(buffer)
            intersection = gdf_map[gdf_map.intersects(convex_hull)]
            return intersection
        seismic_map_intersect = intersection_maps(seismic_map, wells_df, buffer)
        print('seismic map is intersected with wells')
    
        def seism_well_correl_init(seism_map, wells_df, margin, file):
            wells_df['xmean_min'] = wells_df['xmean'] - margin
            wells_df['xmean_max'] = wells_df['xmean'] + margin
            wells_df['ymean_min'] = wells_df['ymean'] - margin
            wells_df['ymean_max'] = wells_df['ymean'] + margin
            seism_map_short = seism_map[['x', 'y', 'value']]

            df_lst = []
            for idx, row in wells_df.iterrows():
                seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                                    (seism_map_short['x'] < row['xmean_max']) & 
                                                    (seism_map_short['y'] > row['ymean_min']) &
                                                    (seism_map_short['y'] < row['ymean_max'])]
                mean = seism_map_zone.value.mean()
                p50 = seism_map_zone.value.quantile(0.5)
                p25 = seism_map_zone.value.quantile(0.25)
                p75 = seism_map_zone.value.quantile(0.75)
                df = pd.DataFrame({'well':row['well'], 
                                    'phit_net_mean':row['phit_net_mean'], 
                                    'field':row['field'],
                                    'mean': mean, 
                                    'p50': p50, 
                                    'p25': p25, 
                                    'p75': p75,
                                    'x':row['xmean'],
                                    'y':row['ymean'],
                                    'xmean_min':row['xmean_min'],
                                    'xmean_max':row['xmean_max'],
                                    'ymean_min':row['ymean_min'],
                                    'ymean_max':row['ymean_max'],
                                    'margin_init':margin,
                                    'seism_att':file}, index=[0])
                df_lst.append(df)
            result = pd.concat(df_lst).reset_index(drop=True)
            return result
        well_data_from_seismic_map = seism_well_correl_init(seismic_map_intersect, wells_df, margin, file)
        print(f"map {file} to wells dataset is recalculated")

        dict_with_results = {'seismic_map_intersect':seismic_map_intersect, 'well_data_from_seismic_map':well_data_from_seismic_map}
        
        return dict_with_results
    print(f'siesmic maps {file} and well data preparation is started')
    data_bal10 = seism_well_run_v2(file, xy10_phit, buffer=1500, margin=100)
    seismic_map10 = data_bal10['seismic_map_intersect']
    well_data10 = data_bal10['well_data_from_seismic_map']

    def preprocessing_data(seismic_map, well_data):
        well_data_v2 = well_data[~well_data.well.isin(['B39', 'D02Y', 'D34', 'B31', 'B20'])] #outliers
        well_data_v3 = well_data_v2[~well_data_v2.well.isin(['B01ST1', 'D01', 'C14', 'C01A', 'B06', 'C13Z', 'C06', 'D01Z','C07'])] #very close to each other wells

        X = well_data_v3[['x', 'y', 'mean']].rename(columns={'mean':'value'})
        y = well_data_v3['phit_net_mean']
        X_train, y_train = X, y
        X_test = seismic_map[['x', 'y', 'value']]
        return X_train, y_train, X_test, well_data_v3
    print('data is preprocessed')
    X_train10, y_train10, X_test10, well_data10_v3 = preprocessing_data(seismic_map10, well_data10)

    tregr10 = tpot.TPOTRegressor(n_jobs=7, verbosity=1, generations=20, random_state=42, scoring='r2', early_stop=5)
    tregr10.fit(X_train10, y_train10)
    tregr10.fitted_pipeline_
    joblib.dump(tregr10.fitted_pipeline_, f'tregr_{file}.pkl')
    print('model is trained')

    model10 = joblib.load(f'tregr_{file}.pkl')
    y_pred10 = model10.predict(X_test10)
    model_df10 = pd.DataFrame({'x': X_test10.iloc[:,0], 'y': X_test10.iloc[:,1],'phit_pred': y_pred10})

    def model_postprocessing(model_df, well_data_v3, file, margin):
        def seism_well_correl_pred(seism_map, wells_df, margin, file):
            wells_df['xmean_min'] = wells_df['x'] - margin
            wells_df['xmean_max'] = wells_df['x'] + margin
            wells_df['ymean_min'] = wells_df['y'] - margin
            wells_df['ymean_max'] = wells_df['y'] + margin
            seism_map_short = seism_map[['x', 'y', 'value']]

            df_lst = []
            for idx, row in wells_df.iterrows():
                seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                                    (seism_map_short['x'] < row['xmean_max']) & 
                                                    (seism_map_short['y'] > row['ymean_min']) &
                                                    (seism_map_short['y'] < row['ymean_max'])]
                mean = seism_map_zone.value.mean()
                df = pd.DataFrame({'well':row['well'], 
                                    'phit_net_mean':row['phit_net_mean'], 
                                    'field':row['field'],
                                    'mean': mean, 
                                    'margin_pred':margin,
                                    'seism_att':file}, index=[0])
                df_lst.append(df)
            result = pd.concat(df_lst).reset_index(drop=True)
            return result

        model_df = model_df.rename(columns={'phit_pred':'value'})
        wells_pred = seism_well_correl_pred(model_df, well_data_v3, margin, 'Bal8_AV')
        wells_pred = wells_pred[['well', 'mean']].rename(columns={'mean':'phit_pred'})

        wells_pred_true = well_data_v3.set_index('well').join(wells_pred.set_index('well'), how='inner').reset_index()

        wells_pred_true['algorithm'] = tregr10.fitted_pipeline_
        wells_pred_true['up_1.15pu'] = wells_pred_true.phit_net_mean+0.0115
        wells_pred_true['down_1.15pu'] = wells_pred_true.phit_net_mean-0.0115

        wells_pred_true['qc'] = np.where((wells_pred_true.phit_pred >= wells_pred_true['down_1.15pu']) 
        & (wells_pred_true.phit_pred <= wells_pred_true['up_1.15pu']), 1, 0)

        dict_qc = {'1':wells_pred_true.qc.value_counts(normalize=True)[1], '0':wells_pred_true.qc.value_counts(normalize=True)[0]}

        wells_pred_true['qc_result'] = [dict_qc] * len(wells_pred_true)

        return wells_pred_true
    print('model is postprocessed')
    wells_pred_true10 = model_postprocessing(model_df10, well_data10_v3, file, margin=100)
    wells_pred_true10['scoring'] = 'r2'
    print(f'model based on {file} is ready:', wells_pred_true10.qc_result.iloc[0])
    df_lst10.append(wells_pred_true10)

final_pred10 = pd.concat(df_lst10).reset_index(drop=True)    

In [22]:
final_pred10.to_csv('final_pred_bal10_scoring_r2.csv', index=False)

# Visualization result bal10

In [None]:
file = 'Bal10_AV'
def seism_well_run_v2(file, wells_df, buffer, margin):
    def seism_upload(file, delimiter):
        seismic = pd.read_csv(file, delimiter=delimiter)
        seismic = seismic.round({'x':0, 'y':0})
        return seismic
    seismic_map = seism_upload(file, ' ')
    print(f"seismic map {file} is uploaded")
    
    def intersection_maps(map, wells_df, buffer):
        geometry_map = [Point(xy) for xy in zip(map['x'], map['y'])]
        gdf_map = gpd.GeoDataFrame(map, geometry=geometry_map)

        geometry_points = [Point(xy) for xy in zip(wells_df['xmean'], wells_df['ymean'])]
        gdf_points = gpd.GeoDataFrame(wells_df, geometry=geometry_points)
        convex_hull = gdf_points.unary_union.convex_hull.buffer(buffer)
        intersection = gdf_map[gdf_map.intersects(convex_hull)]
        return intersection
    seismic_map_intersect = intersection_maps(seismic_map, wells_df, buffer)
    print('seismic map is intersected with wells')

    def seism_well_correl_init(seism_map, wells_df, margin, file):
        wells_df['xmean_min'] = wells_df['xmean'] - margin
        wells_df['xmean_max'] = wells_df['xmean'] + margin
        wells_df['ymean_min'] = wells_df['ymean'] - margin
        wells_df['ymean_max'] = wells_df['ymean'] + margin
        seism_map_short = seism_map[['x', 'y', 'value']]

        df_lst = []
        for idx, row in wells_df.iterrows():
            seism_map_zone = seism_map_short[   (seism_map_short['x'] > row['xmean_min']) &
                                                (seism_map_short['x'] < row['xmean_max']) & 
                                                (seism_map_short['y'] > row['ymean_min']) &
                                                (seism_map_short['y'] < row['ymean_max'])]
            mean = seism_map_zone.value.mean()
            p50 = seism_map_zone.value.quantile(0.5)
            p25 = seism_map_zone.value.quantile(0.25)
            p75 = seism_map_zone.value.quantile(0.75)
            df = pd.DataFrame({'well':row['well'], 
                                'phit_net_mean':row['phit_net_mean'], 
                                'field':row['field'],
                                'mean': mean, 
                                'p50': p50, 
                                'p25': p25, 
                                'p75': p75,
                                'x':row['xmean'],
                                'y':row['ymean'],
                                'xmean_min':row['xmean_min'],
                                'xmean_max':row['xmean_max'],
                                'ymean_min':row['ymean_min'],
                                'ymean_max':row['ymean_max'],
                                'margin_init':margin,
                                'seism_att':file}, index=[0])
            df_lst.append(df)
        result = pd.concat(df_lst).reset_index(drop=True)
        return result
    well_data_from_seismic_map = seism_well_correl_init(seismic_map_intersect, wells_df, margin, file)
    print(f"map {file} to wells dataset is recalculated")

    dict_with_results = {'seismic_map_intersect':seismic_map_intersect, 'well_data_from_seismic_map':well_data_from_seismic_map}
    
    return dict_with_results
print(f'siesmic maps {file} and well data preparation is started')
data_bal10 = seism_well_run_v2(file, xy10_phit, buffer=1500, margin=100)
seismic_map10 = data_bal10['seismic_map_intersect']
well_data10 = data_bal10['well_data_from_seismic_map']
X_test10 = seismic_map10[['x', 'y', 'value']]

final_pred10 = pd.read_csv('final_pred_bal10_scoring_r2.csv')
model10 = joblib.load(f'tregr_{file}.pkl')
X_test10 = X_test10.rename(columns={'mean':'value'})
y_pred10 = model10.predict(X_test10)
model_df10 = pd.DataFrame({'x': X_test10.iloc[:,0], 'y': X_test10.iloc[:,1], 'phit_pred': y_pred10})
display(final_pred10[final_pred10.seism_att == file].qc_result.iloc[0])

fig, ax = plt.subplots(figsize=(14, 7))
xy10_phit_preproc = xy10_phit[~xy10_phit.well.isin(['A13Z', 'H01Y', 'A01X', 'A12X', 'D01', 'E31', 'G01Y', 'A09Y', 
                                                                 'E01X', 'A07Y', 'A01W', 'C27Y', 'C03ST1', 'H01', 'D07'])]
all_values10 = pd.concat([xy10_phit_preproc.phit_net_mean, model_df10.phit_pred]).reset_index(drop=True)
norm10 = plt.Normalize(all_values10.min(), all_values10.max())
cb1 = plt.scatter(model_df10['x'], model_df10['y'], c=model_df10['phit_pred'], cmap='coolwarm', norm=norm10, s=1)
# colorbar1 = plt.colorbar(cb1)
# colorbar1.set_label('phit_model')
cb2 = plt.scatter(  xy10_phit_preproc['xmean'], 
                    xy10_phit_preproc['ymean'], 
                    c=xy10_phit_preproc['phit_net_mean'], 
                    s=50, ec='black', lw=0.5, alpha=1, norm=norm10, cmap='coolwarm')
# colorbar2 = plt.colorbar(cb2)
# colorbar2.set_label('phit_wells')
# plt.scatter(k01_gb['xmean'], k01_gb['ymean'], c=k01_gb['phit_w_avg'], norm=norm, 
#             s=50, ec='black', lw=0.5, alpha=0.75, cmap='coolwarm')
plt.title(f'phit prediction based on {file}');
# plt.tight_layout();

In [None]:
phit_pred10 = final_pred10[final_pred10.seism_att == file].sort_values('phit_net_mean')
dict_obj = ast.literal_eval(phit_pred10.qc_result.iloc[0])
fig, ax = plt.subplots(figsize=(16,4))
sns.lineplot(data=phit_pred10, x='well', y='phit_net_mean', ax=ax, label = 'phit_net_mean')
sns.lineplot(data=phit_pred10, x='well', y='phit_pred', ax=ax, label = 'phit_pred')
ax.fill_between(    phit_pred10['well'], 
                    phit_pred10['phit_net_mean'] - 0.0115, 
                    phit_pred10['phit_net_mean'] + 0.0115, 
                    color='b', alpha=0.2) 
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, fontsize=6)
plt.title(f"phit_net_mean vs phit_pred based on {phit_pred10.seism_att.iloc[0]} with 1:{dict_obj['1']:.3f}")
plt.legend()
plt.grid()