In [30]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
import numpy as np
GRAIN_TYPE = 'Wheat'
# GRAIN_TYPE = 'Oats'
# GRAIN_TYPE = 'Barley'
# GRAIN_TYPE = 'Sorghum'
# GRAIN_TYPE = 'Soybeans'
# GRAIN_TYPE = 'Corn'

URL = "../../Datasets/processed/" + GRAIN_TYPE + ".csv"

r2_scores_mc = []
mse_scores_mc = []
mae_scores_mc = []
min_abs_errors_mc = []
max_abs_errors_mc = []
max_error_indices_mc = []

# Load the dataset
df = pd.read_csv(URL)
# Encode the 'Variety' column
label_encoder = LabelEncoder()
df['Variety'] = label_encoder.fit_transform(df['Variety'])

In [31]:
# Separate the dataset into features (X) and target (y)
X = df[['Freq', 'd(cm)', 'Attn', 'Phase_Corr', 'Permittivity_real', 'Permittivity_imaginary', 'Variety']]  # Features
y = df['M%']  # Target
# Initialize KFold
kf = KFold(n_splits=10, shuffle=True, random_state=42)
# Setup the pipeline steps for scaling and modeling
# # pipeline = Pipeline([
# #     ('scaler', StandardScaler()),
# #     ('svr', SVR())
# # ])

# # # Define the parameter grid to search
# # param_grid = {
# #     'svr__C': [0.1, 1, 10, 100],  # SVR regularization parameter
# #     'svr__gamma': ['scale', 'auto', 0.1, 0.01, 0.001],  # Kernel coefficient
# #     'svr__kernel': ['rbf', 'linear']  # Consider adding 'poly' and adjusting 'degree' if using polynomial
# # }
# # grid_search = GridSearchCV(pipeline, param_grid, cv=10, verbose=2, n_jobs=-1)
# # # Fit the grid search to the data
# # grid_search.fit(X, y)

# # # Best parameter set found
# # print("Best parameters found: ", grid_search.best_params_)

# # # Best model (optional: evaluate its performance further)
# # best_model = grid_search.best_estimator_

for train_index, test_index in kf.split(X):
    # Splitting the dataset
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    # Standardize the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train the SVM model
    svm_model = SVR(kernel='rbf',C= 1,gamma= 0.01)
    svm_model.fit(X_train_scaled, y_train)
    
    # Predictions
    y_pred = svm_model.predict(X_test_scaled)
    
    # Calculate and store the metrics
    mse_scores_mc.append(mean_squared_error(y_test, y_pred))
    r2_scores_mc.append(r2_score(y_test, y_pred))
    mae_scores_mc.append(mean_absolute_error(y_test, y_pred))
    abs_errors = np.abs(y_test - y_pred)
    min_abs_errors_mc.append(np.min(abs_errors))
    max_abs_errors_mc.append(np.max(abs_errors))
    max_error_indices_mc.append(test_index[np.argmax(abs_errors)])

In [32]:

# Print the average metrics for moisture content
print("Moisture Content Metrics:")
print("R^2:", np.mean(r2_scores_mc))
print("Mean Squared Error:", np.mean(mse_scores_mc))
print("Mean Absolute Error:", np.mean(mae_scores_mc))
print("Min Absolute Error:", np.mean(min_abs_errors_mc))
print("Max Absolute Error:", np.mean(max_abs_errors_mc))


Moisture Content Metrics:
R^2: 0.9403215967346545
Mean Squared Error: 0.8534556053401049
Mean Absolute Error: 0.6286997050510679
Min Absolute Error: 0.010471102783853325
Max Absolute Error: 3.6299628185697257
