In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from preprocessing.process import derivative
from preprocessing.feature import rfe
from tabpfn import TabPFNRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the data
data_path = "D:/A/CSU/wheat/Test_ManufacturerB.xlsx" #B instrument
df = pd.read_excel(data_path)
spectra = df.iloc[:, 2:1063].values  
y = df.iloc[:, 1].values.ravel()

# Data partitioning
X_train, X_test, y_train, y_test = train_test_split(spectra, y, test_size=0.25, random_state=42, shuffle=True)

# Spectral preprocessing
X_train_de = derivative(X_train)
X_test_de = derivative(X_test)

# Feature Selection
X_train_rfe, X_test_rfe = rfe(X_train_de, y_train, X_test_de, n_features_to_select=200)

# Modelling and prediction
model = TabPFNRegressor(random_state=42, ignore_pretraining_limits=True)
model.fit(X_train_rfe, y_train)

y_train_pred = model.predict(X_train_rfe)
y_test_pred = model.predict(X_test_rfe)

# Evaluation
RMSE_train = np.sqrt(mean_squared_error(y_train, y_train_pred))
RMSE_test = np.sqrt(mean_squared_error(y_test, y_test_pred))
R2_train = r2_score(y_train, y_train_pred)
R2_test = r2_score(y_test, y_test_pred)

print('Training RMSE: {:.4f}, R2: {:.4f}'.format(RMSE_train, R2_train))
print('Testing RMSE: {:.4f}, R2: {:.4f}'.format(RMSE_test, R2_test))