In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import root_mean_squared_error, mean_absolute_error, r2_score

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [2]:
# Reading our test data
density_test_data = pd.read_excel("density_test_data.xlsx")
porosity_test_data = pd.read_excel("porosity_test_data.xlsx")
permeability_test_data = pd.read_excel("permeability_test_data.xlsx")

In [3]:
density_test_data.head()

Unnamed: 0,dt,gr,res,sp,rhob
0,53.008987,55.125946,0.001849,100.76033,2.810103
1,52.09375,41.828125,0.501,-136.8125,2.782104
2,74.625,43.257812,0.138125,-201.84375,2.469727
3,77.109375,54.085938,0.075703,-140.8125,2.460815
4,50.202072,42.700867,0.00129,106.510223,2.854489


In [4]:
density_test_data['vp_ms'] = 304800 / density_test_data['dt']

In [5]:
density_test_data['vp_kms'] = 304.8 / density_test_data['dt']

In [6]:
density_test_data.head()

Unnamed: 0,dt,gr,res,sp,rhob,vp_ms,vp_kms
0,53.008987,55.125946,0.001849,100.76033,2.810103,5749.968351,5.749968
1,52.09375,41.828125,0.501,-136.8125,2.782104,5850.989802,5.85099
2,74.625,43.257812,0.138125,-201.84375,2.469727,4084.422111,4.084422
3,77.109375,54.085938,0.075703,-140.8125,2.460815,3952.826748,3.952827
4,50.202072,42.700867,0.00129,106.510223,2.854489,6071.462531,6.071463


In [7]:
porosity_test_data.head()

Unnamed: 0,Vp,Density,porosity
0,5.758811,2.934398,1.112759
1,5.381853,2.743289,0.994562
2,3.608643,2.175328,17.066241
3,5.628452,2.804748,0.873837
4,5.429688,2.7333,2.494238


In [8]:
porosity_test_data['Vp_ms'] = porosity_test_data['Vp']*1000

In [9]:
porosity_test_data.head()

Unnamed: 0,Vp,Density,porosity,Vp_ms
0,5.758811,2.934398,1.112759,5758.811475
1,5.381853,2.743289,0.994562,5381.853282
2,3.608643,2.175328,17.066241,3608.643381
3,5.628452,2.804748,0.873837,5628.451883
4,5.429688,2.7333,2.494238,5429.6875


In [10]:
permeability_test_data.head()

Unnamed: 0,Density,Vp,Porosity,micro_perm
0,2.367226,4.341611,10.431472,1.0
1,2.521859,4.756958,1.712904,3.472222
2,2.634296,5.141798,2.352289,4.219409
3,2.867079,5.813295,0.809638,1.8
4,2.423011,4.573096,7.105013,1.9


In [11]:
# 1. Density comp.

# xgb_model = r2: 0.91, rmse: 0.06, mae: 0.04
y_true = density_test_data['rhob']

# X. Chen et al. (2015)
# dens = (vp + 2366) / 2598.4
y_chen_dens = (density_test_data['vp_ms']+2366)/2598.4

# Rossetti et al. (2019)
# dens = 0.2652*vp + 1.2905
y_rossetti_dens = 0.2652*density_test_data['vp_kms'] + 1.2905

# Vedanti et al. (2018)
# dens1 = (33.06 + sqrt(28.44*vp -97.038))/14.22
y_vendati_dens = (33.06 + np.sqrt(28.44*density_test_data['vp_kms'] -97.038))/14.22

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [12]:
results_list = []

predictions = {
    'X. Chen et al. (2015)': y_chen_dens,
    'Vedanti et al. (2018)': y_vendati_dens,
    'Rossetti et al. (2019)': y_rossetti_dens
}

In [None]:
# Calc metrics
# Loop throuth each model
for model_name, y_pred in predictions.items():
    
    # y_pred is NaN (e.g., Vedanti case with Vp < 3.412)
    # or where y_true is NaN (if there is missing data)
    temp_df = pd.DataFrame({'true': y_true, 'pred': y_pred}).dropna()
    
    # Get the clean data
    clean_true = temp_df['true']
    clean_pred = temp_df['pred']
    
    # Calculate metrics (only if valid data exists)
    if not temp_df.empty:
        rmse = root_mean_squared_error(clean_true, clean_pred)
        mae = mean_absolute_error(clean_true, clean_pred)
        r2 = r2_score(clean_true, clean_pred)
    else:
        # If there is no valid data (e.g., all NaNs)
        rmse, mae, r2 = np.nan, np.nan, np.nan
    
    # Add to list
    results_list.append({
        'model': model_name,
        'rmse': rmse,
        'mae': mae,
        'r2': r2
    })

# Add our reference model results
results_list.append({
    'model': 'XGBoost Model',
    'rmse': 0.06,
    'mae': 0.04,
    'r2': 0.91
})

# Create the Results DataFrame
df_results = pd.DataFrame(results_list)
df_results = df_results.set_index('model').round(2)

In [14]:
print("\n--- DataFrame to Compare the Models ---")
print(df_results)


--- DataFrame to Compare the Models ---
                        rmse   mae    r2
model                                   
X. Chen et al. (2015)   0.25  0.21 -0.63
Vedanti et al. (2018)   0.12  0.09  0.56
Rossetti et al. (2019)  0.11  0.09  0.65
XGBoost Model           0.06  0.04  0.91


In [15]:
# 2. Porosity comp.

# gb_model = r2: 0.81, rmse: 0.17, mae: 0.12
y_true = porosity_test_data['porosity']

# Al-Harthi et al. (1999)
# poro = ln(vp_ms/6320)/-0.016
y_harthi = np.log(porosity_test_data['Vp_ms']/6320)/-0.016

# X. Chen et al. (2015)
# poro = (18027 +- sqrt(132456*vp_ms-395638067))/66228
# applied the + because it got the best results between +-
y_chen_poro1 = (18027 + np.sqrt(132456*porosity_test_data['Vp_ms']-395638067))/66228

# Vedanti et al. (2018)
# poro1 = (density-2.94)/-0.04
y_vendati_poro1 = (porosity_test_data['Density']-2.94)/-0.04

# Rossetti et al. (2019)
# poro = (vp_kms - 5.3764)/-0.0934
y_rossetti_poro = (porosity_test_data['Vp'] - 5.3764)/-0.0934

# Navarro et al. (2020)
# poro = 54.746*e^(-0.821*(vp_kms-2.369))
y_navarro_poro = 54.746 * np.exp(-0.821 * (porosity_test_data['Vp'] - 2.369))

In [16]:
results_poro_list = []

predictions = {
    'Al-Harthi et al. (1999)': y_harthi,
    'X. Chen et al. (2015)': y_chen_poro1,
    'Vedanti et al. (2018)': y_vendati_poro1,
    'Rossetti et al. (2019)': y_rossetti_poro,
    'Navarro et al. (2020)': y_navarro_poro

}

In [None]:
# Calc metrics
# Loop throuth each model
for model_name, y_pred in predictions.items():
    
    # y_pred is NaN (e.g., Vedanti case with Vp < 3.412)
    # or where y_true is NaN (if there is missing data)
    temp_df = pd.DataFrame({'true': y_true, 'pred': y_pred}).dropna()
    
    # Get the clean data
    clean_true = temp_df['true']
    clean_pred = temp_df['pred']
    
    # Calculate metrics (only if valid data exists)
    if not temp_df.empty:
        rmse = root_mean_squared_error(clean_true, clean_pred)
        mae = mean_absolute_error(clean_true, clean_pred)
        r2 = r2_score(clean_true, clean_pred)
    else:
        # If there is no valid data (e.g., all NaNs)
        rmse, mae, r2 = np.nan, np.nan, np.nan
    
    # Add to list
    results_poro_list.append({
        'model': model_name,
        'rmse': rmse,
        'mae': mae,
        'r2': r2
    })

# Add our reference model results (manually)
results_poro_list.append({
    'model': 'Gradient Boosting Model',
    'rmse': 0.06,
    'mae': 0.04,
    'r2': 0.91
})

# Create the Results DataFrame
df_results_poro = pd.DataFrame(results_poro_list)
df_results_poro = df_results_poro.set_index('model').round(2)

In [18]:
print("\n--- DataFrame to Compare the Models ---")
print(df_results_poro)


--- DataFrame to Compare the Models ---
                          rmse   mae    r2
model                                     
Al-Harthi et al. (1999)  11.36  9.94 -1.74
X. Chen et al. (2015)     8.39  4.81 -0.49
Vedanti et al. (2018)     4.51  3.77  0.57
Rossetti et al. (2019)    5.36  4.02  0.39
Navarro et al. (2020)     4.91  3.86  0.49
Gradient Boosting Model   0.06  0.04  0.91


In [19]:
# 3. Permeability comp.

# rf_model = r2: 0.66, rmse: 0.39, mae: 0.30
y_true = permeability_test_data['micro_perm']

poro_fracao = permeability_test_data['Porosity'] / 100

M2_TO_MICRODARCY = 1 / 9.869233e-19

# Lamur et al. (2017)
# perm1 = 3*10^(-17)*poro^(3.11)
y_lamur_perm1 = (3e-17 * (poro_fracao ** 3.11)) * M2_TO_MICRODARCY

# Mueller et al. (2005)
# perm1 = 1*10^(-17)*poro^(3)
y_muller_perm1 = (1e-17 * (poro_fracao ** 3)) * M2_TO_MICRODARCY

# Navarro et al. (2020)
# perm_m2 = 4*10^(-18)*poro^(3.245)
y_navarro_perm = (4e-18 * (poro_fracao ** 3.245)) * M2_TO_MICRODARCY

# Yokoyama and Takeuchi (2009)
# perm_m2 = 2*10^(-19)*poro^(3.5)
y_yokoyama_perm = (2e-19 * (poro_fracao ** 3.5)) * M2_TO_MICRODARCY

In [20]:
results_perm_list = []

predictions = {
    'Mueller et al. (2005)': y_muller_perm1,
    'Yokoyama and Takeuchi (2009)': y_yokoyama_perm,
    'Lamur et al. (2017)': y_lamur_perm1,
    'Navarro et al. (2020)': y_navarro_perm,
}

In [None]:
# Calc metrics
# Loop throuth each model
for model_name, y_pred in predictions.items():
    
    # y_pred is NaN (e.g., Vedanti case with Vp < 3.412)
    # or where y_true is NaN (if there is missing data)
    temp_df = pd.DataFrame({'true': y_true, 'pred': y_pred}).dropna()
    
    # Get the clean data
    clean_true = temp_df['true']
    clean_pred = temp_df['pred']
    
    # Calculate metrics (only if valid data exists)
    if not temp_df.empty:
        rmse = root_mean_squared_error(clean_true, clean_pred)
        mae = mean_absolute_error(clean_true, clean_pred)
        r2 = r2_score(clean_true, clean_pred)
    else:
        # If there is no valid data (e.g., all NaNs)
        rmse, mae, r2 = np.nan, np.nan, np.nan
    
    # Add to list
    results_perm_list.append({
        'model': model_name,
        'rmse': rmse,
        'mae': mae,
        'r2': r2
    })

# Add our reference model results (manually)
results_perm_list.append({
    'model': 'Random Forest Model',
    'rmse': 0.39,
    'mae': 0.30,
    'r2': 0.66
})

# Create the Results DataFrame
df_results_perm = pd.DataFrame(results_perm_list)
df_results_perm = df_results_perm.set_index('model').round(2)

In [22]:
print("\n--- DataFrame to Compare the Models ---")
print(df_results_perm)


--- DataFrame to Compare the Models ---
                                rmse    mae    r2
model                                            
Mueller et al. (2005)         178.44  39.64 -0.05
Yokoyama and Takeuchi (2009)  178.49  39.66 -0.05
Lamur et al. (2017)           178.37  39.61 -0.05
Navarro et al. (2020)         178.47  39.65 -0.05
Random Forest Model             0.39   0.30  0.66
