In [1]:
import io_functions
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [2]:
# Loaded as flatten rgb images
X_train, y_train = io_functions.read_data("dataset/train")
X_test, y_test = io_functions.read_data("dataset/test")

#### **Apply HOG Feature Extraction**

**1)** Reshape to original shape 128x128x3

**2)** Convert to grayscale

**3)** Compute HOG features

**4)** Apply L2 normalization to each vector such that Euclidean norm (L2 norm) is equal to 1

In [3]:
from skimage import color, feature

# Compute HOG Features
def compute_hog_features(rgb_image_flat):
    # Reshape the flattened image to the original shape
    rgb_image = rgb_image_flat.reshape((128, 128, 3)) 

    # Convert RGB to grayscale
    image_gray = color.rgb2gray(rgb_image)

    # Compute HOG features
    hog_features = feature.hog(image_gray, block_norm='L2-Hys', visualize=False)

    return hog_features

In [4]:
# Apply the HOG feature extraction to all images
X_train = np.array([compute_hog_features(image_flat) for image_flat in X_train])
X_test = np.array([compute_hog_features(image_flat) for image_flat in X_test])

# Normalize pixel values
X_train = preprocessing.normalize(X_train)
X_test = preprocessing.normalize(X_test)

In [5]:
def evaluate(model):
    # Make predictions
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)

    # Evaluate accuracy
    train_accuracy = accuracy_score(y_train, y_pred_train)
    test_accuracy = accuracy_score(y_test, y_pred_test)

    print("Train Accuracy:", train_accuracy)
    print("Test Accuracy:", test_accuracy)

    return train_accuracy, test_accuracy

#### **Logistic Regression**

In [8]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression(multi_class='multinomial', max_iter=1000, n_jobs=-1)
logreg.fit(X_train, y_train)

logreg_train_acc, logreg_test_acc = evaluate(logreg)

Train Accuracy: 0.5949661976977891
Test Accuracy: 0.5697589481373265


#### **Random Forest**

In [10]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_jobs=-1)
rf.fit(X_train, y_train)

rf_train_acc, rf_test_acc = evaluate(rf)

Train Accuracy: 1.0
Test Accuracy: 0.5441928414901388


#### **Weighted KNN**

In [14]:
from sklearn.neighbors import KNeighborsClassifier

# Define a range of k values
k_values = np.arange(1, 21)
train_accuracies = []
test_accuracies = []

# Iterate over different k values
for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k, weights='distance', n_jobs=-1)
    knn.fit(X_train, y_train)

    knn_train_acc, knn_test_acc = evaluate(knn)
    train_accuracies.append(knn_train_acc)
    test_accuracies.append(knn_test_acc)

# Plot the accuracy curve
plt.figure(figsize=(10, 6))
plt.plot(k_values, train_accuracies, label='Training Accuracy')
plt.plot(k_values, test_accuracies, label='Test Accuracy')
plt.xlabel('Number of Neighbors (k)')
plt.ylabel('Accuracy')
plt.title('k-NN Accuracy Curve')
plt.legend()
plt.show()

Train Accuracy: 1.0
Test Accuracy: 0.5449233016800584


In [7]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=2, weights='distance', n_jobs=-1)
knn.fit(X_train, y_train)

knn_train_acc, knn_test_acc = evaluate(knn)

Train Accuracy: 1.0
Test Accuracy: 0.5449233016800584
