In [1]:
import numpy as np
import matplotlib.pyplot as plt
from skimage.feature import hog
from skimage import exposure
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [2]:
# Load the augmented data from the saved file
data = np.load('../data/external/coral_augmented_combined.npz')

# Access the arrays from the loaded data
X_train = data['X_train']
y_train = data['y_train']
X_test = data['X_test']
y_test = data['y_test']
X_valid = data['X_valid']
y_valid = data['y_valid']

# Combine X_train and X_valid
X_train = np.concatenate((X_train, X_valid), axis=0)

# Combine y_train and y_valid
y_train = np.concatenate((y_train, y_valid), axis=0)

# Print the shapes to verify
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (5752, 224, 224, 3)
y_train shape: (5752,)
X_test shape: (1440, 224, 224, 3)
y_test shape: (1440,)


In [3]:
def extract_hog_features(images):
    features = []
    for image in images:
        # Specify channel_axis=2 to indicate that channels are along the third axis (for color images)
        fd, hog_image = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=True, multichannel=True, channel_axis=2)
        features.append(fd)
    return np.array(features)

X_train_features = extract_hog_features(X_train)
X_test_features = extract_hog_features(X_test)
print(f"X_train_features Shape: {X_train_features.shape}")
print(f"X_test_features Shape: {X_test_features.shape}")

  fd, hog_image = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=True, multichannel=True, channel_axis=2)


X_train_features Shape: (5752, 26244)
X_test_features Shape: (1440, 26244)


In [5]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC
import pickle
from sklearn.metrics import classification_report

#defining support vector machine parameters
parameters = {
    'kernel':['linear','poly','rbf','sigmoid'], 
    'gamma':['scale','auto'],
    'C':np.linspace(0.01,.75,20)
    }

#defining randomized grid search cv
clf = RandomizedSearchCV(SVC(),param_distributions=parameters,random_state=99,verbose=3)
#fitting to LDA transformed training data 
search_lda = clf.fit(X_train_features, y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5] END C=0.438421052631579, gamma=scale, kernel=sigmoid;, score=0.862 total time=35.2min
[CV 2/5] END C=0.438421052631579, gamma=scale, kernel=sigmoid;, score=0.862 total time=34.6min
[CV 3/5] END C=0.438421052631579, gamma=scale, kernel=sigmoid;, score=0.863 total time=35.5min
[CV 4/5] END C=0.438421052631579, gamma=scale, kernel=sigmoid;, score=0.863 total time=35.3min
[CV 5/5] END C=0.438421052631579, gamma=scale, kernel=sigmoid;, score=0.863 total time=19.5min
[CV 1/5] END C=0.20473684210526316, gamma=auto, kernel=rbf;, score=0.862 total time= 1.9min
[CV 2/5] END C=0.20473684210526316, gamma=auto, kernel=rbf;, score=0.862 total time= 1.8min
[CV 3/5] END C=0.20473684210526316, gamma=auto, kernel=rbf;, score=0.863 total time= 1.8min
[CV 4/5] END C=0.20473684210526316, gamma=auto, kernel=rbf;, score=0.863 total time= 1.9min
[CV 5/5] END C=0.20473684210526316, gamma=auto, kernel=rbf;, score=0.863 total time= 1.9min
[CV 

In [6]:
#dumping randomized search cv into pickle file 
with open('..data/exteral/svm_randomized_search_cv.pkl','wb') as f:
  pickle.dump(search_lda,f)

FileNotFoundError: [Errno 2] No such file or directory: '..data/exteral/svm_randomized_search_cv.pkl'

In [7]:
#best model
search_lda.best_params_

{'kernel': 'sigmoid', 'gamma': 'scale', 'C': 0.438421052631579}

In [4]:
# import matplotlib.pyplot as plt

# # Display some sample HOG features from X_train_features
# num_samples = 5  # Number of samples to display

# for i in range(num_samples):
#     plt.figure(figsize=(8, 4))
#     plt.subplot(1, 2, 1)
#     plt.imshow(X_train[i])  # Display the original image
#     plt.title("Original Image")

#     plt.subplot(1, 2, 2)
#     hog_feature = X_train_features[i]
#     plt.plot(hog_feature, color='black')  # Display the HOG feature as a 1D vector
#     plt.title("HOG Feature (1D)")
#     plt.show()

# # Display some sample HOG features from X_test_features
# for i in range(num_samples):
#     plt.figure(figsize=(8, 4))
#     plt.subplot(1, 2, 1)
#     plt.imshow(X_test[i])  # Display the original image
#     plt.title("Original Image")

#     plt.subplot(1, 2, 2)
#     hog_feature = X_test_features[i]
#     plt.plot(hog_feature, color='black')  # Display the HOG feature as a 1D vector
#     plt.title("HOG Feature (1D)")
#     plt.show()


In [9]:
# 3. Training: Train an SVM classifier
svm_classifier = SVC(kernel='sigmoid',gamma='scale', C=0.438421052631579, random_state=42)
svm_classifier.fit(X_train_features, y_train)

In [10]:
# 5. Testing
y_train_pred = svm_classifier.predict(X_train_features)
train_accuracy = accuracy_score(y_train, y_train_pred)
print(f"Train Accuracy: {train_accuracy * 100:.2f}%")

y_test_pred = svm_classifier.predict(X_test_features)
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Train Accuracy: 86.23%
Test Accuracy: 90.00%


In [15]:
from sklearn.metrics import classification_report, confusion_matrix, cohen_kappa_score

# Make predictions on the test data
y_pred = svm_classifier.predict(X_test_features)

# Get the class labels
labels = list(set(y_train))

# Cohen's Kappa
kappa = cohen_kappa_score(y_test, y_pred)
print(f"Cohen's Kappa: {kappa}")

# Classification report
report = classification_report(y_test, y_pred, target_names=labels)
print("Classification report:")
print(report)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)




Cohen's Kappa: 0.0
Classification report:
                precision    recall  f1-score   support

pseudodiploria       0.00      0.00      0.00       144
          apal       0.90      1.00      0.95      1296

      accuracy                           0.90      1440
     macro avg       0.45      0.50      0.47      1440
  weighted avg       0.81      0.90      0.85      1440

Confusion Matrix:
[[   0  144]
 [   0 1296]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
evaluate_model(y_train, y_pred_rfc_train, letters)

In [13]:
# Evaluate the model on the test data
accuracy = svm_classifier.evaluate(X_test, y_test_onehot)[1]  # Index 1 corresponds to accuracy
print("Accuracy of the model is:", accuracy * 100, "%")

# You can also calculate other evaluation metrics, such as precision, recall, and F1-score.
from sklearn.metrics import classification_report

# Get model predictions
y_pred = model.predict(X_test)

# Convert one-hot encoded labels back to class labels
y_test_class = label_encoder.inverse_transform(np.argmax(y_test_onehot, axis=1))
y_pred_class = label_encoder.inverse_transform(np.argmax(y_pred, axis=1))

cohen_kappa = cohen_kappa_score(y_test_class, y_pred_class)

# Generate a classification report
report = classification_report(y_test_class, y_pred_class)

print("Cohen's Kappa:", cohen_kappa)
print("Classification Report:")
print(report)


AttributeError: 'SVC' object has no attribute 'evaluate'

In [None]:
# Example image (replace with your own image)
image_to_apply_hog = X_train[0]

# Calculate HOG features for the image
fd, hog_image = hog(image_to_apply_hog, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=True, multichannel=True, channel_axis=2)

# Rescale HOG image for better visualization
hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))

# Display the original image
plt.figure(figsize=(8, 4))
plt.subplot(1, 2, 1)
plt.imshow(image_to_apply_hog)
plt.title("Original Image")

# Display the HOG image
plt.subplot(1, 2, 2)
plt.imshow(hog_image_rescaled, cmap='gray')
plt.title("HOG Features")
plt.show()
