In [120]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

GRAIN_TYPE = 'Wheat'
#GRAIN_TYPE = 'newWheatData'
#GRAIN_TYPE = 'CornAdded_Type'
#GRAIN_TYPE = 'Combined_Grains'
# GRAIN_TYPE = 'Oats'
# GRAIN_TYPE = 'Barley'
# GRAIN_TYPE = 'Sorghum'
# GRAIN_TYPE = 'Soybeans'
# GRAIN_TYPE = 'Corn'

Load dataset and handle processing

In [121]:
URL = "../../Datasets/processed/" + GRAIN_TYPE + ".csv"

# Load the dataset
df = pd.read_csv(URL)  # Update 'path_to_your_file.csv' to the actual path of your CSV file
#df = df[df['Variety'] == 'SOUTH DAKOTA']

# Preprocess the data
# Handle missing values if any.
# df.dropna(subset=['Phase', 'Attn', 'Density', 'M%'], inplace=True)

# Encode categorical variables if 'Phase' is categorical
if df['Variety'].dtype == 'object':
    le = LabelEncoder()
    df['Variety'] = le.fit_transform(df['Variety'])

# Split the dataset into training and testing sets
X = df[['Freq', 
        'd(cm)', 
        'Attn', 
        'Phase', 
        'Phase_Corr', 
        'Permittivity_real', 
        'Permittivity_imaginary',
        'Variety',
        ]]

y = df[['M%', 'Density']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


train model

In [122]:
regressor = RandomForestRegressor(n_estimators=100, random_state=42, min_samples_leaf = 4)
regressor.fit(X_train, y_train)


RandomForestRegressor(min_samples_leaf=4, random_state=42)

Predict on test set

In [123]:
y_pred = regressor.predict(X_test)

evaluate


In [124]:
from sklearn.metrics import mean_squared_error, mean_absolute_error,max_error, r2_score
from sigfig import round
print("Moisture %")
mc_r2_score = r2_score(y_test['M%'], y_pred[:,0])
print("R^2: {:#.4g}".format(mc_r2_score))
mse_mc = mean_squared_error(y_test['M%'], y_pred[:,0], squared=True)
print('Mean Squared Error: ', "{0:.4g}".format(mse_mc))
mae_mc = mean_absolute_error(y_test['M%'], y_pred[:,0])
print('Mean Absolute Error: ', "{0:.4g}".format(mae_mc))

sums = []
for i in range(len(y_test['M%'])):
    sum = y_test['M%'].iloc[i] - y_pred[:,0][i]
    #print(Y_actual[:,0][i]," - ",Y_pred[:,0][i],'=',sum)
    sums.append(abs(sum))
print("Min Absolute Error: ",min(sums))
print("Max Absolute Error: ",max(sums))

Moisture %
R^2: 0.9876
Mean Squared Error:  0.1736
Mean Absolute Error:  0.2042
Min Absolute Error:  3.552713678800501e-15
Max Absolute Error:  1.9569657142857224


In [125]:
from sklearn.metrics import mean_squared_error, mean_absolute_error,max_error, r2_score
from sigfig import round
print("Density")
mc_r2_score = r2_score(y_test['Density'], y_pred[:,1])
print("R^2: {:#.4g}".format(mc_r2_score))
mse_mc = mean_squared_error(y_test['Density'], y_pred[:,1], squared=True)
print('Mean Squared Error: ', "{0:.4g}".format(mse_mc))
mae_mc = mean_absolute_error(y_test['Density'], y_pred[:,1])
print('Mean Absolute Error: ', "{0:.4g}".format(mae_mc))

sums = []
for i in range(len(y_test['Density'])):
    sum = y_test['Density'].iloc[i] - y_pred[:,1][i]
    #print(Y_actual[:,0][i]," - ",Y_pred[:,0][i],'=',sum)
    sums.append(abs(sum))
print("Min Absolute Error: ",min(sums))
print("Max Absolute Error: ",max(sums))

Density
R^2: 0.8664
Mean Squared Error:  0.0006341
Mean Absolute Error:  0.02008
Min Absolute Error:  0.00024003633866132024
Max Absolute Error:  0.07775270169830151
