In [32]:
# Example applying a multi-sensor spectral soil library (XRF, GRS, vis and NIR) for soil fertility attributes quantification (example for exCa)

# instantiating the necessary libraries
import numpy as np
import pandas as pd
pd.options.plotting.backend = 'plotly'

#let's start by importing the data
XXRF = pd.read_csv('datasets/XXRF.csv', header=0, sep=';')
Y = pd.read_csv('datasets/Y.csv', header=0, sep=';')


## **kennard-stone**

In [33]:
# Applying the Kennard-Stone for splittng calibration and prediction sets
import kennard_stone as ks

Ycal, Ypred = ks.train_test_split(Y.drop(['Samples'], axis=1), test_size = 0.30)
indices_cal = Ycal.index
indices_pred = Ypred.index
Ycal.insert(0, 'Samples', Y['Samples'].iloc[indices_cal])
Ycal = Ycal.reset_index(drop=True)
Ypred.insert(0, 'Samples', Y['Samples'].iloc[indices_pred])
Ypred = Ypred.reset_index(drop=True)

Calculating pairwise distances using scikit-learn.
Calculating pairwise distances using scikit-learn.



'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.



In [34]:
# Reproducing the same calibration and prediction splitting for each individual sensor dataset

XcalXRF = XXRF.iloc[indices_cal].reset_index(drop=True)
XpredXRF = XXRF.iloc[indices_pred].reset_index(drop=True)
XcalNIR = XNIR.iloc[indices_cal].reset_index(drop=True)
XpredNIR = XNIR.iloc[indices_pred].reset_index(drop=True)
XcalGRS = XGRS.iloc[indices_cal].reset_index(drop=True)
XpredGRS = XGRS.iloc[indices_pred].reset_index(drop=True)
XcalNIX = XNIX.iloc[indices_cal].reset_index(drop=True)
XpredNIX = XNIX.iloc[indices_pred].reset_index(drop=True)

# **preprocessings**

In [35]:
import preprocessings as prepr # poisson scaling by particular library

XcalXRF_pre, meancal_xrf, meancalpoisson_xrf = prepr.poisson(XcalXRF, mc=True)
XpredXRF_pre = (XpredXRF / np.sqrt(meancal_xrf)) - meancalpoisson_xrf

In [36]:
from scipy.signal import savgol_filter # SAVGOL smoothing

XcalNIR_pre = pd.DataFrame(savgol_filter(XcalNIR,
                                        window_length=11,
                                        polyorder=1,
                                        deriv=1))

XpredNIR_pre = pd.DataFrame(savgol_filter(XpredNIR,
                                        window_length=11,
                                        polyorder=1,
                                        deriv=1))

XcalNIR_pre, meancal_nir = prepr.mc(XcalNIR_pre)
XpredNIR_pre = XpredNIR_pre - meancal_nir

In [37]:
XcalNIX_pre = pd.DataFrame(savgol_filter(XcalNIX,
                                        window_length=3,
                                        polyorder=1,
                                        deriv=1))

XpredNIX_pre = pd.DataFrame(savgol_filter(XpredNIX,
                                        window_length=3,
                                        polyorder=1,
                                        deriv=1))

XcalNIX_pre, meancal_nix = prepr.mc(XcalNIX_pre)
XpredNIX_pre = XpredNIX_pre - meancal_nix

In [38]:
XcalGRS_pre = pd.DataFrame(savgol_filter(XcalGRS,
                                        window_length=11,
                                        polyorder=1,
                                        deriv=1))

XpredGRS_pre = pd.DataFrame(savgol_filter(XpredGRS,
                                        window_length=11,
                                        polyorder=1,
                                        deriv=1))

XcalGRS_pre, meancal_grs = prepr.mc(XcalGRS_pre)
XpredGRS_pre = XpredGRS_pre - meancal_grs

# **individual models**

Let's test individual modeling by comparing models based on XRF spectral signatures. <br> We can choose between model=**'pls'** (set the number of LVs), **'rf**' (set the random seed) or **'svm'** (set the kernel). Additionally, if **LVscores = True**, the LV scores are also extracted.

In [39]:
import automated_datafusion as df
overview_xrf_pls, calres_xrf_pls, predres_xrf_pls = df.modelo_individual_otimizado(Xcal=XcalXRF_pre,
                                   ycal=Ycal, 
                                   Xpred=XpredXRF_pre, 
                                   ypred=Ypred,
                                   model='pls', # by using PLS
                                   maxLV=3,
                                   target='exCa')                    
overview_xrf_pls

Unnamed: 0,LVs number,R2 Cal,r2 Cal,RMSEC,R2 CV,r2 CV,RMSECV,Bias CV,tbias CV,RPD CV,RPIQ CV,R2 Pred,r2 Pred,RMSEP,Bias Pred,tbias Pred,RPD Pred,RPIQ Pred
0,1,0.597204,0.597204,1.223043,0.494973,0.495112,1.369483,0.022517,0.121954,1.419892,1.911305,0.570256,0.64098,0.992197,-0.03027,0.146381,1.558249,1.977933
1,2,0.626063,0.626063,1.178416,0.311194,0.43525,1.599365,-0.092499,0.429635,1.215806,1.636587,0.65709,0.755785,0.886305,-0.020249,0.109595,1.744421,2.214248
2,3,0.670572,0.670572,1.106062,0.256313,0.423179,1.66186,-0.139772,0.625962,1.170085,1.575043,0.706116,0.841085,0.820505,-0.127243,0.752841,1.884315,2.391819


In [40]:
import automated_datafusion as df
overview_xrf_rf, calres_xrf_rf, predres_xrf_rf = df.modelo_individual_otimizado(Xcal=XcalXRF_pre,
                                   ycal=Ycal, 
                                   Xpred=XpredXRF_pre, 
                                   ypred=Ypred,
                                   model='rf', # by using RF
                                   target='exCa',
                                   random_seed=1)                    
overview_xrf_rf

Unnamed: 0,Model,R2 Cal,r2 Cal,RMSEC,R2 Pred,r2 Pred,RMSEP,Bias Pred,tbias Pred,RPD Pred,RPIQ Pred
0,RF,0.962403,0.970683,0.37366,0.650085,0.760432,0.895311,-0.496254,3.193733,1.726874,2.191975


In [41]:
import automated_datafusion as df
overview_xrf_svm, calres_xrf_svm, predres_xrf_svm = df.modelo_individual_otimizado(Xcal=XcalXRF_pre,
                                   ycal=Ycal, 
                                   Xpred=XpredXRF_pre, 
                                   ypred=Ypred,
                                   model='svm', # by using svm
                                   target='exCa',
                                   kern='linear')                    
overview_xrf_svm

Unnamed: 0,Model,R2 Cal,r2 Cal,RMSEC,R2 Pred,r2 Pred,RMSEP,Bias Pred,tbias Pred,RPD Pred,RPIQ Pred
0,SVM,0.949322,0.949641,0.43382,0.647375,0.69056,0.898772,-0.276005,1.547534,1.720225,2.183535


In [42]:
# Plotting the RMSEPs
# Extract RMSEP values
import plotly.graph_objects as go

rmsep_svm = overview_xrf_svm['RMSEP'].values[0]
rmsep_rf = overview_xrf_rf['RMSEP'].values[0]
rmsep_pls = overview_xrf_pls['RMSEP'].values[2]  # Assuming we want the RMSEP for LV_3

# Create a scatter plot with lines and markers
fig = go.Figure(data=[
    go.Scatter(name='SVM', x=['SVM'], y=[rmsep_svm], mode='lines+markers', marker=dict(symbol='circle', size=20), line=dict(width=2)),
    go.Scatter(name='RF', x=['RF'], y=[rmsep_rf], mode='lines+markers', marker=dict(symbol='circle', size=20), line=dict(width=2)),
    go.Scatter(name='PLS', x=['PLS'], y=[rmsep_pls], mode='lines+markers', marker=dict(symbol='circle', size=20), line=dict(width=2)),
    go.Scatter(name='Line', x=['SVM', 'RF', 'PLS'], y=[rmsep_svm, rmsep_rf, rmsep_pls], mode='lines', line=dict(dash='dash', width=3))
])

# Update layout
fig.update_layout(
    title='Comparison of RMSEP Values',
    xaxis_title='Model',
    yaxis_title='RMSEP'
)

# Show the figure
fig.show()