In [8]:
#load in data
import pandas as pd

#ground truth
gt_path = 'D:/datasets/soilHyperspec/train_data/train_data/train_gt.csv'

#MaskedTransformed PCA Averaged data
pca_mAv_path = r'C:\Users\oakle\OneDrive - University of Lincoln\uniProjects\soil\remote_sensing_hyperspectral_soil-main\remote_sensing_hyperspectral_soil-main\mAveragedPCAs.csv'

#MaskedTransformed TruncatedSVD Averaged data
svd_mAv_path = r'C:\Users\oakle\OneDrive - University of Lincoln\uniProjects\soil\remote_sensing_hyperspectral_soil-main\remote_sensing_hyperspectral_soil-main\mAveragedSVD.csv'

gt_df = pd.read_csv(gt_path)
pca_mAv_df = pd.read_csv(mAv_path,header=None).values
svd_mAv_df = pd.read_csv(svd_mAv_path,header=None).values

## Fitting Regressors

In [4]:
import numpy as np
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.gaussian_process import GaussianProcessRegressor

depVars = ['P' , 'K', 'Mg', 'pH']


#models
def regressors(features):
    lr = LinearRegression()
    svr = SVR(C=1.0, epsilon=0.2, kernel = 'poly')
    gpr = GaussianProcessRegressor()
    knr = KNeighborsRegressor(weights = 'distance', n_neighbors = 3)

    regressors = [lr, svr, gpr, knr]
    regString = ['Linear Regression', 'Support Vector Regressor', 'Gaussian Process Regressor', 'K-Nearest Regressor']

    #for each variable to predict
    for i in range(0, len(depVars)):
        y = gt_df[depVars[i]].values

        #try out a different regressor
        for r in range(0, len(regressors)):

            #train/test split 70/30
            X_train, X_test, y_train, y_test = train_test_split(features, y, test_size=0.3, random_state=30)


            model = regressors[r]


            model.fit(X_train, y_train)


            y_pred = model.predict(X_test)



            predictions = np.zeros_like(y_test)
            mse = np.mean((y_test - y_pred) ** 2, axis=0)

            print(depVars[i], regString[r], 'mse:',mse)
        print()

## Average PCA fit

In [9]:
print('Average PCA values fitted to regressors:')
regressors(pca_mAv_df)

Average PCA values fitted to regressors:
P Linear Regression mse: 901.0208668110034
P Support Vector Regressor mse: 944.6564441485037
P Gaussian Process Regressor mse: 5858.217491250066
P K-Nearest Regressor mse: 1233.078582841769

K Linear Regression mse: 4034.840917149956
K Support Vector Regressor mse: 4149.549363279607
K Gaussian Process Regressor mse: 55781.47357563792
K K-Nearest Regressor mse: 4991.877768174437

Mg Linear Regression mse: 1599.813488098176
Mg Support Vector Regressor mse: 1592.9954452651775
Mg Gaussian Process Regressor mse: 26201.30141708591
Mg K-Nearest Regressor mse: 1899.1377056773729

pH Linear Regression mse: 0.0655481243641423
pH Support Vector Regressor mse: 0.06494162096137615
pH Gaussian Process Regressor mse: 45.656665436568446
pH K-Nearest Regressor mse: 0.07616023235186985



## Average SVD fit

In [10]:
print('Average PCA values fitted to regressors:')
regressors(svd_mAv_df)

Average PCA values fitted to regressors:
P Linear Regression mse: 900.9993201847466
P Support Vector Regressor mse: 947.4834571257445
P Gaussian Process Regressor mse: 5834.984863983891
P K-Nearest Regressor mse: 1210.7723483089835

K Linear Regression mse: 4035.52641329631
K Support Vector Regressor mse: 4161.3436556147135
K Gaussian Process Regressor mse: 55480.02358808652
K K-Nearest Regressor mse: 4888.874124241744

Mg Linear Regression mse: 1599.8110236545153
Mg Support Vector Regressor mse: 1578.2313576849017
Mg Gaussian Process Regressor mse: 26046.651310707042
Mg K-Nearest Regressor mse: 1842.3689680113473

pH Linear Regression mse: 0.06555113087324606
pH Support Vector Regressor mse: 0.06511785874512944
pH Gaussian Process Regressor mse: 45.35907881928534
pH K-Nearest Regressor mse: 0.07636251838893009

