In [3]:
import h5py
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score, roc_auc_score, roc_curve, precision_score, recall_score
from sklearn.model_selection import GridSearchCV

In [32]:
# Path to your HDF5 file
hdf5_file_path = 'preprocessed_images_h5'

# Open the HDF5 file
with h5py.File(hdf5_file_path, 'r') as hf:
    # Access the dataset containing images
    images = hf['preprocessed_images'][:]

# Now 'images' variable contains your array of images
# You can use it as needed
print(f"Loaded HDF5 file with images of shape: {images.shape}")

Loaded HDF5 file with images of shape: (857, 128, 128, 3)


In [34]:
# Path to your HDF5 file
hdf5_file_path = 'labels_h5'

# Open the HDF5 file
with h5py.File(hdf5_file_path, 'r') as hf:
    # Access the dataset containing images
    labels = hf['labels'][:]

# Now 'images' variable contains your array of images
# You can use it as needed
print(f"Loaded HDF5 file with labels of shape: {labels.shape}")

Loaded HDF5 file with labels of shape: (857,)


In [43]:
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

In [44]:
X_train_flat = X_train.reshape(X_train.shape[0], X_train.shape[1]*X_train.shape[2]*X_train.shape[3])
print(X_train_flat.shape)
X_test_flat = X_test.reshape(X_test.shape[0], X_test.shape[1]*X_test.shape[2]*X_test.shape[3])
print(X_test_flat.shape)

(685, 49152)
(172, 49152)


In [40]:
y_train_decode = [label.decode() for label in y_train]
y_test_decode = [label.decode() for label in y_test]
print(np.unique(y_train_decode))

['France grn' 'Fuji' 'USA Envy' 'USA Koru']


In [9]:
df = pd.DataFrame(X_train_flat)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,49142,49143,49144,49145,49146,49147,49148,49149,49150,49151
0,0.000000,0.000000,0.000000,0.007843,0.078431,0.235294,0.011765,0.125490,0.356863,0.011765,...,0.305882,0.058824,0.109804,0.305882,0.039216,0.070588,0.203922,0.000000,0.000000,0.000000
1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.050980,...,0.266667,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.000000,0.011765,0.011765,0.058824,0.043137,0.047059,0.227451,0.047059,...,0.400000,0.054902,0.133333,0.392157,0.011765,0.035294,0.101961,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
680,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
681,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
682,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
683,0.027451,0.039216,0.058824,0.160784,0.239216,0.352941,0.145098,0.223529,0.337255,0.133333,...,0.349020,0.321569,0.321569,0.352941,0.321569,0.325490,0.356863,0.054902,0.054902,0.058824


In [45]:
scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train_flat)
X_test_scaled = scaler_X.transform(X_test_flat)

In [46]:
X_train = X_train_scaled
X_test = X_test_scaled
y_train = y_train_decode
y_test = y_test_decode

In [47]:
#linear benchmark model
model = SVC(kernel='linear')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

  France grn       1.00      1.00      1.00        27
        Fuji       0.77      0.89      0.83        54
    USA Envy       0.85      0.76      0.80        59
    USA Koru       0.70      0.66      0.68        32

    accuracy                           0.82       172
   macro avg       0.83      0.83      0.83       172
weighted avg       0.82      0.82      0.82       172



In [30]:
#Fine tune linear SVM model

parameters = {'C': [0.01,0.1, 1, 10, 100, 1000]}
grid_search = GridSearchCV(SVC(kernel='linear'), parameters, cv=5)
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_


In [31]:
best_params['C']

0.01

In [13]:
optimal_model = SVC(kernel='linear', C=best_params['C'])
optimal_model.fit(X_train, y_train)
y_pred = optimal_model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

  France grn       1.00      1.00      1.00        27
        Fuji       0.77      0.89      0.83        54
    USA Envy       0.85      0.76      0.80        59
    USA Koru       0.70      0.66      0.68        32

    accuracy                           0.82       172
   macro avg       0.83      0.83      0.83       172
weighted avg       0.82      0.82      0.82       172



In [29]:
#rbf
model = SVC(kernel='rbf', C = 0.01)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

  France grn       0.00      0.00      0.00        27
        Fuji       0.00      0.00      0.00        54
    USA Envy       0.34      1.00      0.51        59
    USA Koru       0.00      0.00      0.00        32

    accuracy                           0.34       172
   macro avg       0.09      0.25      0.13       172
weighted avg       0.12      0.34      0.18       172



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [25]:
#Fine tuning rbf kernal model
parameters= {
    'C': [0.01, 0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
}
model = SVC(kernel='rbf')

# Setup GridSearchCV
grid_search = GridSearchCV(model, parameters, cv=5, scoring='accuracy', verbose=1)
grid_search.fit(X_train, y_train)
y_pred = grid_search.predict(X_test)


Fitting 5 folds for each of 20 candidates, totalling 100 fits


In [26]:
grid_search.best_estimator_

In [27]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

  France grn       0.00      0.00      0.00        27
        Fuji       0.00      0.00      0.00        54
    USA Envy       0.34      1.00      0.51        59
    USA Koru       0.00      0.00      0.00        32

    accuracy                           0.34       172
   macro avg       0.09      0.25      0.13       172
weighted avg       0.12      0.34      0.18       172



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [17]:
parameters = {
    'C': [0.01, 0.1, 1, 10, 100],             # Regularization parameter
    'degree': [2, 3, 4, 5],              # Degree of the polynomial kernel   # Kernel coefficient
}

model = SVC(kernel='poly')

# Setup GridSearchCV
grid_search = GridSearchCV(model, parameters, cv=5, scoring='accuracy', verbose=1)

# Perform grid search
grid_search.fit(X_train, y_train)

best_svm = grid_search.best_estimator_
y_pred = best_svm.predict(X_test)


Fitting 5 folds for each of 20 candidates, totalling 100 fits


In [18]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

  France grn       1.00      0.96      0.98        27
        Fuji       0.65      0.93      0.76        54
    USA Envy       0.88      0.59      0.71        59
    USA Koru       0.72      0.66      0.69        32

    accuracy                           0.77       172
   macro avg       0.81      0.78      0.79       172
weighted avg       0.80      0.77      0.76       172

