In [1]:
import matplotlib.pyplot as plt
from matplotlib.path import Path
import pandas as pd
import os
import sys
import seaborn as sns
sys.path.insert(0, 'C:/Users/gmendoza/OneDrive - UGent/Documentos/PhD/Pedophysics_code')
sys.path.insert(0, 'C:/Users/mendo/OneDrive - UGent/Documentos/PhD/Pedophysics_code')

from pedophysics import predict, Soil
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from scipy.optimize import root
from scipy.stats import spearmanr
from scipy.optimize import minimize
from IPython.display import clear_output


from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from scipy.stats import pearsonr
from PyPDF2 import PdfMerger

# Electromagnetic induction data inversion package
from plots import *
from PM import *
import sys
%matplotlib inline

pd.set_option('display.max_columns', None)
from utils.spatial_utils import get_coincident


In [2]:
# User input
s_site = 'P'; # P = Proefhoeve; M = Middelkerke
# Define input datatype and source folder
datafolder = 'Data' # data folder
em_intype = 'reca'   # 'reca', 'LIN' 
cal = 'calibrated' # 'non_calibrated', 'drift_calibrated'
instrument_code = '21HS' # 421S, '21HS'

In [3]:
if s_site == 'P':
    profile_prefix = 'proefhoeve'
    if instrument_code == '21HS':
        emfile_prefix = 'proefhoeve_21HS'
    else: 
        emfile_prefix = 'proefhoeve_421S'
else:
    profile_prefix = 'middelkerke'
    emfile_prefix = 'middelkerke_421S'
    # check if correct instrument (only 421S data available for Middelkerke)

cal_folder = os.path.join(datafolder, cal)
em_survey = os.path.join(cal_folder, f'{emfile_prefix}_{cal}_{em_intype}.csv')
samplocs = os.path.join(datafolder, f'{profile_prefix}_samps.csv')
inv_s = os.path.join(datafolder, f'{profile_prefix}_inverted_samples_{instrument_code}c.csv')

# Profile smoothing parameters 
window_size = 1 # Define your window size for simple moving average filter (1 = no filtering)

# 1.0 Data import and structuring into dataframe
em_survey = pd.read_csv(em_survey, sep=',', header=0)
inverted = pd.read_csv(inv_s, sep=',', header=0)
samples = pd.read_csv(samplocs, sep=',', header=0)
em_samples = get_coincident(em_survey, samples)

In [4]:
pd.options.future.infer_string = True

ds_c = em_samples.copy()
# Caclculate Bulk EC from HydraProbe data at 50Mhz
offset = 4
water_perm = 80
ds_c['bulk_ec_hp'] = logsdon(50e6, ds_c.rperm, ds_c.iperm)

ds_c['bulk_ec_dc_hp'] = predict.BulkECDC(Soil(frequency_ec = 50e6,
                                              bulk_ec = ds_c.bulk_ec_hp.values))

ds_c['bulk_ec_tc_hp'] = SheetsHendrickxEC( ds_c.bulk_ec_hp, ds_c.temp)
ds_c['bulk_ec_dc_tc_hp'] = predict.BulkECDCTC(Soil(temperature = ds_c.temp.values,
                                                    bulk_ec_dc = ds_c.bulk_ec_dc_hp.values
                                                    ))

# Caclculate Water EC from HydraProbe data at 50Mhz
ds_c['water_ec_hp'] = Hilhorst(ds_c.bulk_ec_hp, ds_c.rperm, water_perm, offset)
ds_c['water_ec_hp_t'] = WraithOr(ds_c.water_ec_hp, ds_c.temp)
ds_c['iperm_water_t'] = ds_c.water_ec_hp_t/(50e6*2*pi*epsilon_0)

  soil.info.loc[i, 'bulk_ec_dc'] = str(soil.info.bulk_ec_dc[i]) + "--> EM frequency shift from actual to zero Hz using LongmireSmithEC function in predict.bulk_ec_dc.non_dc_to_dc"


In [5]:
inv_columns = inverted.columns[3:-1]
ds_c[inv_columns] = np.nan

for idc, c in enumerate(inv_columns):

    for i in range(len(inverted.x)):
        ds_c.loc[ds_c.code == i+1, c] = inverted.loc[i, c]

def closest_ec(row):
    depth = row['depth']
    # Filter columns that start with 'EC_' but not 'EC_end'
    ec_cols = [col for col in row.index if col.startswith('EC_') and col != 'EC_end']
    # Convert the part after 'EC_' to float and calculate the absolute difference with depth
    differences = {col: abs(depth/100 - float(col.split('_')[1])) for col in ec_cols}
    # Find the column name with the minimum difference
    closest_col = min(differences, key=differences.get)
    return row[closest_col]

# Apply the function to each row
ds_c['bulk_ec_inv'] = ds_c.apply(closest_ec, axis=1)

ds_c['bulk_ec_dc_tc_inv'] = predict.BulkECDCTC(Soil(temperature = ds_c.temp.values+273.15,
                                                      frequency_ec = 9e3,
                                                      bulk_ec = ds_c.bulk_ec_inv.values/1000))
    
folder_path = 'output_tables/'
file_name = 'ds_c_'+profile_prefix+'.csv'
ds_c.to_csv(folder_path + file_name, index=False)

print(ds_c)

                 x            y          z         t     HCP0.5     PRP0.6  \
5166   107720.1177  183326.9599  26.190915  40954.67  52.187318  49.052734   
19244  107755.5892  183331.1005  26.895158  42473.18  16.512444  18.074690   
22350  107797.4015  183278.5790  28.470657  42812.37  90.708144  57.723088   
19352  107763.8156  183318.1123  27.344669  42491.37   8.642003  14.845471   
1556   107723.0051  183309.1818  26.586000  40592.63  66.431144  58.135345   
14669  107742.6515  183330.2073  26.664000  41906.95  29.118481  28.924502   
23383  107805.8221  183273.9873  28.650000  42915.67  87.053971  49.010738   
15706  107758.6852  183312.6012  27.350000  42107.27  15.352851  21.686269   
22198  107776.6922  183310.7557  27.551037  42797.16  42.876764  26.556867   
5128   107726.3707  183318.7191  26.563000  40950.86  73.239161  58.994960   
5166   107720.1177  183326.9599  26.190915  40954.67  52.187318  49.052734   
19244  107755.5892  183331.1005  26.895158  42473.18  16.512444 

  soil.info.loc[i, 'bulk_ec_dc'] = str(soil.info.bulk_ec_dc[i]) + "--> EM frequency shift from actual to zero Hz using LongmireSmithEC function in predict.bulk_ec_dc.non_dc_to_dc"


In [6]:
clay_mean = np.mean(ds_c.clay)
bd_mean = np.mean(ds_c.bd)
water_ec_hp_mean = np.mean(ds_c.water_ec_hp)
water_ec_hp_mean_t = np.mean(ds_c.water_ec_hp_t)
temp_mean = np.mean(ds_c.temp)
vwc_mean = np.mean(ds_c.vwc)

f_ec = 9000
t_conv = 273.15
t_mean_conv = temp_mean+t_conv

In [20]:
X = ds_c['bulk_ec_dc_tc_inv'].values
Y = ds_c['vwc'].values
mX = np.mean(X)
VWC_mean = np.mean(Y)
print('VWC_mean', VWC_mean)

from scipy.optimize import minimize, differential_evolution

def deterministic(EC_mean):
    VWC_mean_pred = predict.Water(Soil(
        bulk_ec = EC_mean[0],  # Assuming EC_mean is an array-like structure
        frequency_ec=f_ec,
        clay = clay_mean,
        bulk_density = bd_mean,
        water_ec = water_ec_hp_mean,
        temperature = t_mean_conv
    ))[0]

    diff = VWC_mean_pred - VWC_mean + 6
    return diff**2  # Squaring to ensure the objective function is always positive

# Trying a wider range and a different method
bounds = [(0, 1)]  # Example of wider bounds
res = differential_evolution(deterministic, bounds)
print('res.x[0]', res.x[0])

VWC_mean_p = predict.Water(Soil(
                                bulk_ec = res.x[0],  # Assuming EC_mean is an array-like structure
                                frequency_ec=f_ec,
                                clay = clay_mean,
                                bulk_density = bd_mean,
                                water_ec = water_ec_hp_mean,
                                temperature = t_mean_conv
                            ))[0]

print('VWC_mean_p', VWC_mean_p)

VWC_mean 0.2891096666666666


  soil.info.loc[i, 'bulk_ec_dc'] = str(soil.info.bulk_ec_dc[i]) + "--> EM frequency shift from actual to zero Hz using LongmireSmithEC function in predict.bulk_ec_dc.non_dc_to_dc"
  soil.info.loc[(np.isnan(soil.df['sand'])) & (np.isnan(soil.df['silt'])) & (np.isnan(soil.df['clay'])) & (soil.texture == texture), ['sand', 'silt', 'clay']] = ('Fraction calculated using soil.texture', 'Fraction calculated using soil.texture', 'Fraction calculated using soil.texture')
  soil.info.loc[(np.isnan(soil.df['sand'])) & (np.isnan(soil.df['silt'])) & (np.isnan(soil.df['clay'])) & (soil.texture == texture), ['sand', 'silt', 'clay']] = ('Fraction calculated using soil.texture', 'Fraction calculated using soil.texture', 'Fraction calculated using soil.texture')
  soil.info.loc[i, 'bulk_ec_dc'] = str(soil.info.bulk_ec_dc[i]) + "--> EM frequency shift from actual to zero Hz using LongmireSmithEC function in predict.bulk_ec_dc.non_dc_to_dc"
  soil.info.loc[(np.isnan(soil.df['sand'])) & (np.isnan(soil.df[

res.x[0] 0.00028361088620887376
VWC_mean_p 0.002


  soil.info.loc[(np.isnan(soil.df['sand'])) & (np.isnan(soil.df['silt'])) & (np.isnan(soil.df['clay'])) & (soil.texture == texture), ['sand', 'silt', 'clay']] = ('Fraction calculated using soil.texture', 'Fraction calculated using soil.texture', 'Fraction calculated using soil.texture')
  soil.info.loc[i, 'bulk_ec_dc'] = str(soil.info.bulk_ec_dc[i]) + "--> EM frequency shift from actual to zero Hz using LongmireSmithEC function in predict.bulk_ec_dc.non_dc_to_dc"
  soil.info.loc[(np.isnan(soil.df['sand'])) & (np.isnan(soil.df['silt'])) & (np.isnan(soil.df['clay'])) & (soil.texture == texture), ['sand', 'silt', 'clay']] = ('Fraction calculated using soil.texture', 'Fraction calculated using soil.texture', 'Fraction calculated using soil.texture')
  soil.info.loc[(np.isnan(soil.df['sand'])) & (np.isnan(soil.df['silt'])) & (np.isnan(soil.df['clay'])) & (soil.texture == texture), ['sand', 'silt', 'clay']] = ('Fraction calculated using soil.texture', 'Fraction calculated using soil.texture'