In [1]:
# Helper libraries
import matplotlib.pyplot as plt
import pandas as pd

# Tensorflow and Scikit
from tensorflow import keras
from sklearn import preprocessing


In [2]:
# Import dataset
df = pd.read_csv('datasets\\peanut_mold_detection_watershed.csv')

# Define Data Columns
info_col = ['sample_id', 'area']
features_col = ['red', 'green', 'blue', 'hue', 'sat', 'value', 'gray', 'contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'asm']
target_col = ['class']

# Split info, features and class
sample_info = df[info_col]
X = df[features_col]
y = df[target_col]

max_f = X.shape[1]+1    # max features
test_size = 0.3         # test dataset size

In [3]:
# Normalize feature data
scaler = preprocessing.MinMaxScaler()
d = scaler.fit_transform(X)
SCALED_X = pd.DataFrame(d, columns=features_col)
SCALED_X.head()

Unnamed: 0,red,green,blue,hue,sat,value,gray,contrast,dissimilarity,homogeneity,energy,correlation,asm
0,0.665454,0.467331,0.143971,0.355984,0.865238,0.665244,0.485194,0.125244,0.116112,0.542372,0.286834,0.879719,0.174404
1,0.792649,0.584239,0.310303,0.35903,0.712812,0.792465,0.615305,0.211435,0.195039,0.530225,0.413116,0.884903,0.279861
2,0.76023,0.574285,0.278962,0.364585,0.740217,0.760057,0.595725,0.155582,0.262068,0.385167,0.404129,0.919319,0.271776
3,0.752523,0.600287,0.384692,0.258084,0.615565,0.752291,0.624045,0.2138,0.233776,0.469578,0.411608,0.883392,0.278498
4,0.689456,0.551259,0.33985,0.281672,0.636426,0.689223,0.570399,0.098627,0.1236,0.664742,0.590508,0.992561,0.457606


In [4]:
# Features to Drop based from Correlation Matrices
colseg_drop = ['energy', 'green', 'red']
watershed_drop = ['energy', 'value', 'green']


In [5]:
# DO NOT RUN IF YOU DO NOT WANT TO REMOVE ANY FEATURES
# Choose features to drop in first argument
SCALED_X.drop(watershed_drop, inplace=True, axis=1)

In [6]:
# Combine Info and Features for id tagging
temp_dataset = pd.concat([sample_info, SCALED_X], axis=1)

DATA_INFO = temp_dataset.loc[:, info_col]
temp_dataset.drop(info_col, inplace=True, axis=1)

In [7]:
# Load prediction model
model = keras.models.load_model('models\\model_watershed_2500')

In [8]:
# Run the dataset into the model
predict_test = model.predict(temp_dataset)

In [9]:
# Temp variables for collating data
data_columns = ['Sample ID', 'Sample Label', 'Sample Remark', 'Prediction Label', 'Prediction Remark']
pred_result = []

In [10]:
# Validate each data
match = 0
for i in range(y.shape[0]):
    sample_name = DATA_INFO.iloc[i]['sample_id']
    sample_label = y.iloc[i][0]
    pred_label = predict_test[i][0]

    print("Examining: {}".format(sample_name))
    print("Actual: {} | Predict: {}".format(sample_label, pred_label))
    err_diff = abs(sample_label - pred_label)
    print("Error: {}".format(err_diff*100))
    
    if err_diff < 0.5:
        print("Correct!")

    else:
        print("Incorrect!")

    sample_remark = 'False' if sample_label == 0 else 'True'
    pred_remark = 'False' if 1 - pred_label > 0.5 else 'True'

    match = match+1 if sample_remark == pred_remark else match

    sample_result = [sample_name, sample_label, sample_remark, pred_label, pred_remark]
    pred_result.append(sample_result)


Examining: samples\Non-Contaminated\Output_Watershed\Test_batch_1_001_001.jpg
Actual: 0 | Predict: 0.31424808502197266
Error: 31.424808502197266
Correct!
Examining: samples\Non-Contaminated\Output_Watershed\Test_batch_1_001_002.jpg
Actual: 0 | Predict: 0.042483747005462646
Error: 4.248374700546265
Correct!
Examining: samples\Non-Contaminated\Output_Watershed\Test_batch_1_001_003.jpg
Actual: 0 | Predict: 0.056844860315322876
Error: 5.684486031532288
Correct!
Examining: samples\Non-Contaminated\Output_Watershed\Test_batch_1_001_004.jpg
Actual: 0 | Predict: 0.3269846439361572
Error: 32.69846439361572
Correct!
Examining: samples\Non-Contaminated\Output_Watershed\Test_batch_1_001_005.jpg
Actual: 0 | Predict: 0.5250355005264282
Error: 52.50355005264282
Incorrect!
Examining: samples\Non-Contaminated\Output_Watershed\Test_batch_1_001_006.jpg
Actual: 0 | Predict: 0.061934202909469604
Error: 6.1934202909469604
Correct!
Examining: samples\Non-Contaminated\Output_Watershed\Test_batch_1_001_007.jpg

In [12]:
# Compute Accuracy
accuracy = match/y.shape[0]
print(accuracy)

0.888631090487239


In [13]:
pred_df = pd.DataFrame(data=pred_result, columns=data_columns)
pred_df
pred_df.to_excel('output\\results_watershed_validation_1.xlsx', index=False)