In [49]:
# Example applying a multi-sensor spectral soil library (XRF, GRS, vis and NIR) for soil fertility attributes quantification (example for exCa)

# instantiating the necessary libraries
import numpy as np
import pandas as pd
import os
pd.options.plotting.backend = 'plotly'

#let's start by importing the data
XGRS = pd.read_csv('datasets/XGRS.csv', header=0, sep=';')
XVIS = pd.read_csv('datasets/XVIS.csv', header=0, sep=';')
XNIR = pd.read_csv('datasets/XNIR.csv', header=0, sep=';')
XXRF = pd.read_csv('datasets/XXRF.csv', header=0, sep=';')
Y = pd.read_csv('datasets/Y.csv', header=0, sep=';')


## **kennard-stone**

In [50]:
# Applying the Kennard-Stone for splittng calibration and prediction sets
import kennard_stone as ks

Ycal, Ypred = ks.train_test_split(Y.drop(['Samples'], axis=1), test_size = 0.30)
indices_cal = Ycal.index
indices_pred = Ypred.index
Ycal.insert(0, 'Samples', Y['Samples'].iloc[indices_cal])
Ycal = Ycal.reset_index(drop=True)
Ypred.insert(0, 'Samples', Y['Samples'].iloc[indices_pred])
Ypred = Ypred.reset_index(drop=True)

Calculating pairwise distances using scikit-learn.
Calculating pairwise distances using scikit-learn.



'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.



In [51]:
# Reproducing the same calibration and prediction splitting for each individual sensor dataset

XcalXRF = XXRF.iloc[indices_cal].reset_index(drop=True)
XpredXRF = XXRF.iloc[indices_pred].reset_index(drop=True)
XcalNIR = XNIR.iloc[indices_cal].reset_index(drop=True)
XpredNIR = XNIR.iloc[indices_pred].reset_index(drop=True)
XcalGRS = XGRS.iloc[indices_cal].reset_index(drop=True)
XpredGRS = XGRS.iloc[indices_pred].reset_index(drop=True)
XcalVIS = XVIS.iloc[indices_cal].reset_index(drop=True)
XpredVIS = XVIS.iloc[indices_pred].reset_index(drop=True)

# **preprocessings**

In [52]:
import preprocessings as prepr # poisson scaling by particular library

XcalXRF_pre, meancal_xrf, meancalpoisson_xrf = prepr.poisson(XcalXRF, mc=True)
XpredXRF_pre = (XpredXRF / np.sqrt(meancal_xrf)) - meancalpoisson_xrf

In [53]:
from scipy.signal import savgol_filter # SAVGOL smoothing

XcalNIR_pre = pd.DataFrame(savgol_filter(XcalNIR,
                                        window_length=11,
                                        polyorder=1,
                                        deriv=1))

XpredNIR_pre = pd.DataFrame(savgol_filter(XpredNIR,
                                        window_length=11,
                                        polyorder=1,
                                        deriv=1))

XcalNIR_pre, meancal_nir = prepr.mc(XcalNIR_pre)
XpredNIR_pre = XpredNIR_pre - meancal_nir

In [54]:
XcalVIS_pre = pd.DataFrame(savgol_filter(XcalVIS,
                                        window_length=3,
                                        polyorder=1,
                                        deriv=1))

XpredVIS_pre = pd.DataFrame(savgol_filter(XpredVIS,
                                        window_length=3,
                                        polyorder=1,
                                        deriv=1))

XcalVIS_pre, meancal_vis = prepr.mc(XcalVIS_pre)
XpredVIS_pre = XpredVIS_pre - meancal_vis

In [55]:
XcalGRS_pre = pd.DataFrame(savgol_filter(XcalGRS,
                                        window_length=11,
                                        polyorder=1,
                                        deriv=1))

XpredGRS_pre = pd.DataFrame(savgol_filter(XpredGRS,
                                        window_length=11,
                                        polyorder=1,
                                        deriv=1))

XcalGRS_pre, meancal_grs = prepr.mc(XcalGRS_pre)
XpredGRS_pre = XpredGRS_pre - meancal_grs

# **low-level data fusion**
Just like individual modeling, we can use **model='pls'** or **'rf'** or **'svm'**. The only difference is that in **'pls'** all combinations of models and number of LVs will be tested. <br>
Dictionaries (**cal** and **pred**) whose keys contain the individual predictions of each sensor must be inputed. Then, a low-level fusion models will be constructed for all combinations between the keys. Lets use 'rf'

In [56]:
import automated_datafusion as df
spectracal_dict = {
    'xrf': XcalXRF_pre,
    'nir': XcalNIR_pre,
    'vis': XcalVIS_pre,
    'grs': XcalGRS_pre
}

spectrapred_dict = {
    'xrf': XpredXRF_pre,
    'nir': XpredNIR_pre,
    'vis': XpredVIS_pre,
    'grs': XpredGRS_pre
}

results_low_level = df.low_level_fusion_automatizado(spectracal_dict, spectrapred_dict, Ycal, Ypred, 
                                                  target='exCa', 
                                                  model='rf',
                                                  scale=True,
                                                  random_seed=1)

In [57]:
rows = []

for combination, models in results_low_level.items(): 
    for model_key, model_info in models.items():
        row = {
            'Combination': combination,
            'Model': model_key 
        }
        row.update(model_info['metrics']) 
        rows.append(row) 

metrics_low_level = pd.DataFrame(rows)
metrics_low_level.sort_values('RMSEP', axis=0, ascending=True)

Unnamed: 0,Combination,Model,R2 Cal,r2 Cal,RMSEC,R2 Pred,r2 Pred,RMSEP,Bias Pred,tbias Pred,RPD Pred,RPIQ Pred
10,xrf_nir_vis_grs,RF,0.953313,0.963198,0.416388,0.690908,0.76565,0.841466,-0.391533,2.521027,1.798691,2.332239
1,xrf_vis,RF,0.961697,0.969588,0.377151,0.662919,0.772059,0.87874,-0.491629,3.237168,1.722395,2.233312
7,xrf_nir_grs,RF,0.957624,0.967179,0.396695,0.65957,0.736715,0.883094,-0.401421,2.447475,1.713903,2.222301
2,xrf_grs,RF,0.958397,0.967423,0.393064,0.653753,0.730652,0.890607,-0.383992,2.291709,1.699443,2.203552
0,xrf_nir,RF,0.95985,0.967507,0.386137,0.650561,0.778741,0.894702,-0.540629,3.636979,1.691665,2.193467
6,xrf_nir_vis,RF,0.958934,0.96838,0.390515,0.648945,0.776759,0.89677,-0.539296,3.609789,1.687766,2.18841
8,xrf_vis_grs,RF,0.956714,0.965493,0.400933,0.643556,0.72448,0.903626,-0.409254,2.436226,1.674959,2.171805
3,nir_vis,RF,0.86557,0.97228,0.706558,0.289984,0.377028,1.275343,-0.17585,0.667648,1.186768,1.538801
9,nir_vis_grs,RF,0.85535,0.973713,0.732923,0.063342,0.064056,1.464817,-0.036658,0.120057,1.03326,1.339758
4,nir_grs,RF,0.85386,0.976884,0.736689,0.045152,0.048143,1.478972,-0.06455,0.209515,1.023371,1.326935


In [58]:
import plotly.express as px

# Extracting the data for the bar chart
rmseps = metrics_low_level['RMSEP']
combinations = metrics_low_level['Combination']

# Creating the bar chart
fig = px.bar(
    metrics_low_level,
    x=combinations,
    y=rmseps,
    color=combinations,  # Change bar color by the X axis
    title='Comparison of RMSEPs of All Models',
    labels={'x': 'Combination', 'y': 'RMSEP'}
)

# Display the plot
fig.show()