In [1]:
import warnings
warnings.filterwarnings('ignore')

import cv2
import numpy as np
import pickle
from sklearn.cluster import MiniBatchKMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# Parts we added
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score, classification_report, accuracy_score
import pandas as pd
import matplotlib.pyplot as plt
import random

# Added for week 2
from numpy import linalg as LA
from sklearn.preprocessing import power_transform
from sklearn import svm

In [2]:
train_images_filenames = pickle.load(open('train_images_filenames.dat','rb'))
test_images_filenames = pickle.load(open('test_images_filenames.dat','rb'))
train_images_filenames = ['..' + n[15:] for n in train_images_filenames]
test_images_filenames  = ['..' + n[15:] for n in test_images_filenames]
train_labels = pickle.load(open('train_labels.dat','rb'))
test_labels = pickle.load(open('test_labels.dat','rb'))

## We created functions to get SIFT descriptors and calculate Dense SIFT

In [3]:
def create_dense_kp(img_shape, step_size):
    return [cv2.KeyPoint(x, y, step_size) for y in range(0, img_shape[0], step_size) 
                                          for x in range(0, img_shape[1], step_size)]

def get_descriptors(dense=True, feat_num=250, step_size=20, mode="train"):
    
    descriptors = []
    label_per_descriptor = []
    
    if mode == "train":        
        img_filenames = train_images_filenames
        lbl_filenames = train_labels
        
    else:
        img_filenames = test_images_filenames
        lbl_filenames = test_labels
        
    Detector = cv2.SIFT_create(feat_num)

    for filename,labels in zip(img_filenames, lbl_filenames):
        ima=cv2.imread(filename)
        gray=cv2.cvtColor(ima, cv2.COLOR_BGR2GRAY)
        
        if not dense:
            kpt, des=Detector.detectAndCompute(gray,None)
            
        else:
            kpt = create_dense_kp(gray.shape, step_size=step_size)                              
            _, des = Detector.compute(gray, kpt)
            
        descriptors.append(des)
        label_per_descriptor.append(labels)
    
    return descriptors, label_per_descriptor

## This function is used to find visual words

In [4]:
def get_visual_words(descriptors, k=128):
    
    D = np.vstack(descriptors)
    codebook = MiniBatchKMeans(n_clusters=k, verbose=False, batch_size=k*20, compute_labels=False,
                               reassignment_ratio=10**-4, random_state=42)
    codebook.fit(D)
    
    visual_words=np.zeros((len(descriptors), k),dtype=np.float32)
    
    for i in range(len(descriptors)):
        words=codebook.predict(descriptors[i])
        visual_words[i,:]=np.bincount(words,minlength=k)
        
    return codebook, visual_words

## We get the most common words for each class and plot them to see if there are any significant differences between them.

In [None]:
Train_descriptors, train_labels = get_descriptors(dense=False, feat_num=500)
codebook, visual_words = get_visual_words(Train_descriptors, 128)

class_dict = {}

for lbl, elem in zip(train_labels, visual_words):
    if lbl not in class_dict.keys():
        class_dict[lbl] = np.argsort(elem)[::-1][:10]
    else:
        class_dict[lbl] = np.concatenate((class_dict[lbl], np.argsort(elem)[::-1][:10]))

In [None]:
colors = plt.rcParams["axes.prop_cycle"]()

fig, ax = plt.subplots(nrows=4, ncols=2, figsize=(15, 20))

for num, key in enumerate(class_dict.keys()):
    c = next(colors)["color"]
    i = int(num/2)
    j = int(num%2)
    tmp_cls = pd.Series(class_dict[key]).value_counts()[:10]
    ax[i][j].bar(tmp_cls.index, tmp_cls.values, color=c)
    ax[i][j].title.set_text("Most common 10 words for " + key)

## Hyperparameter search for SIFT and KNN with cross validation

We tried different amount of local features, codebook sizes, distance metrics and n_neighbors.
Since this takes a lot of time we ran it once and put the results in a csv file. 
We did a grid search with splitting the data into 8 for cross validation.

You can get the results from the csv instead of running the cell below

In [None]:
knn_res_df = pd.read_csv("knn_hp_results.csv")

In [None]:
knn_res_df = pd.DataFrame(columns=["n_features", "codebook_size", "n_neighbors","dist_metric",
                                   "mean_train_acc", "mean_test_acc", "mean_train_f1", "mean_test_f1"])

params = {
    "n_features": np.arange(200, 800, 100),
    #"dense": [True, False],
    #"step_size": np.arange(10, 60, 10),
    "codebook_size": np.arange(32, 256, 32),
    "n_neighbors": np.arange(3, 9, 1),
    "metric": ["euclidean", "manhattan", "chebyshev", "minkowski"]
}

for n in params["n_features"]:
    Train_descriptors, train_labels = get_descriptors(dense=False, feat_num=n)
    for cs in params["codebook_size"]:
        codebook, visual_words = get_visual_words(Train_descriptors, cs)

        print(n, cs)

        knn = KNeighborsClassifier(n_jobs=-1)
        knn_grid = GridSearchCV(knn, {k:params[k] for k in ("n_neighbors", "metric") if k in params}, cv=8,
                                scoring=["accuracy", "f1_macro"], refit="accuracy", return_train_score=True)
        knn_grid.fit(visual_words, train_labels) 

        print("Finished this iteration!")
        best_acc = np.argmin(knn_grid.cv_results_["rank_test_accuracy"])
        knn_res_df = knn_res_df.append({
            "n_features": n,
            "codebook_size": cs,
            "n_neighbors": knn_grid.best_params_["n_neighbors"],
            "dist_metric": knn_grid.best_params_["metric"],
            "mean_train_acc": knn_grid.cv_results_["mean_train_accuracy"][best_acc], 
            "mean_test_acc": knn_grid.cv_results_["mean_test_accuracy"][best_acc], 
            "mean_train_f1": knn_grid.cv_results_["mean_train_f1_macro"][best_acc], 
            "mean_test_f1": knn_grid.cv_results_["mean_test_f1_macro"][best_acc]
        }, ignore_index=True)
        
knn_res_df.to_csv("knn_hp_results.csv", index=False)

## Effect of N_features, Codebook size and N_neighbors

In [None]:
knn_res_df.groupby("n_features").mean()["mean_test_acc"].plot(marker='o', linestyle="None")

In [None]:
knn_res_df.groupby("codebook_size").mean()["mean_test_acc"].plot(marker='o', linestyle="None")

In [None]:
knn_res_df.groupby("n_neighbors").mean()["mean_test_acc"].plot(marker='o', linestyle="None")

## For SIFT, best combination is:

<b> N_features </b> = 500  <br>
<b> Codebook_size </b> = 96 <br>
<b> N_neighbors </b> = 8 <br>
<b> Distance_metric </b> = euclidean

<b> We can achieve a test accuracy of %58. </b>

In [None]:
knn_res_df.sort_values("mean_test_acc", ascending=False)

## Hyperparameter search for Dense-SIFT and KNN

We see that best n_feature size is 500 for SIFT, so we tried to tune the step size while keeping the n_features at 500.
We did a grid search with splitting the data into 8 for cross validation.

You can get the results from the csv instead of running the cell below.

In [None]:
knn_dSIFT_res_df = pd.read_csv("knn_dSIFT_res.csv")

In [None]:
knn_dSIFT_res_df = pd.DataFrame(columns=["step_size", "codebook_size", "n_neighbors","dist_metric",
                                   "mean_train_acc", "mean_test_acc", "mean_train_f1", "mean_test_f1"])

params = {
    #"dense": [True, False],
    "step_size": np.arange(10, 60, 10),
    "codebook_size": np.arange(64, 256, 32),
    "n_neighbors": np.arange(6, 9, 1),
    "metric": ["euclidean", "manhattan", "chebyshev", "minkowski"]
}

for ss in params["step_size"]:
    Train_descriptors, train_labels = get_descriptors(feat_num=500, step_size=int(ss))
    #Test_descriptors, test_labels = get_descriptors(False, n, ss, mode="test")

    for cs in params["codebook_size"]:
        codebook, visual_words = get_visual_words(Train_descriptors, cs)

        ##visual_words_test=np.zeros((len(test_labels),cs), dtype=np.float32)
        ##for i in range(len(Test_descriptors)):
        ##    words = codebook.predict(Test_descriptors[i])
        ##    visual_words_test[i,:] = np.bincount(words, minlength=cs)

        print(ss, cs)

        knn = KNeighborsClassifier(n_jobs=-1)
        knn_grid = GridSearchCV(knn, {k:params[k] for k in ("n_neighbors", "metric") if k in params}, cv=8,
                                scoring=["accuracy", "f1_macro"], refit="accuracy", return_train_score=True)
        knn_grid.fit(visual_words, train_labels) 

        #test_preds = knn_grid.predict(visual_words_test)
        #print(f1_score(test_labels, test_preds, average="micro"))
        print("Finished this iteration!")
        best_acc = np.argmin(knn_grid.cv_results_["rank_test_accuracy"])
        knn_dSIFT_res_df = knn_dSIFT_res_df.append({
            "step_size": ss,
            "codebook_size": cs,
            "n_neighbors": knn_grid.best_params_["n_neighbors"],
            "dist_metric": knn_grid.best_params_["metric"],
            "mean_train_acc": knn_grid.cv_results_["mean_train_accuracy"][best_acc], 
            "mean_test_acc": knn_grid.cv_results_["mean_test_accuracy"][best_acc], 
            "mean_train_f1": knn_grid.cv_results_["mean_train_f1_macro"][best_acc], 
            "mean_test_f1": knn_grid.cv_results_["mean_test_f1_macro"][best_acc]
        }, ignore_index=True)
        
knn_dSIFT_res_df.to_csv("knn_dSIFT_res.csv", index=False)

## Effect of step size. We can see that a smaller step size is better.

In [None]:
knn_dSIFT_res_df.groupby("step_size").mean()["mean_test_acc"].plot(marker='o', linestyle="None")

## For Dense-SIFT, best combination is:

<b> Step_size </b> = 10  <br>
<b> Codebook_size </b> = 224 <br>
<b> N_neighbors </b> = 8 <br>
<b> Distance_metric </b> = manhattan

 We can achieve a test accuracy of <b> %79.</b>

## Dense-SIFT clearly outperforms vanilla SIFT.

In [None]:
knn_dSIFT_res_df.sort_values("mean_test_acc", ascending=False)

## We tried without tuning Logistic Regression with the parameters of Dense-SIFT. 

It achieves <b> %99 </b> accuracy in train set but only <b> %78 </b> in test set, so it suffers from overfitting.

In [None]:
Train_descriptors, train_labels = get_descriptors(feat_num=500, step_size=10)
Test_descriptors, test_labels = get_descriptors(feat_num=500, step_size=10, mode="test")
codebook, visual_words = get_visual_words(Train_descriptors, 224)

visual_words_test=np.zeros((len(test_labels),224), dtype=np.float32)
for i in range(len(Test_descriptors)):
    words = codebook.predict(Test_descriptors[i])
    visual_words_test[i,:] = np.bincount(words, minlength=224)

lg = LogisticRegression(solver='liblinear')

lg.fit(visual_words, train_labels)
train_preds = lg.predict(visual_words)
test_preds = lg.predict(visual_words_test)

print("Train accuracy for LogReg: ", accuracy_score(train_labels, train_preds))
print("Test accuracy for LogReg: ", accuracy_score(test_labels, test_preds))

## We did a simple hyperparameter tuning with only 2 parameters for Logistic Regression to see if we can overcome the overfitting problem.

We did a grid search with splitting the data into 8 for cross validation.

In [None]:
Train_descriptors, train_labels = get_descriptors(feat_num=500, step_size=10)
Test_descriptors, test_labels = get_descriptors(feat_num=500, step_size=10, mode="test")
codebook, visual_words = get_visual_words(Train_descriptors, 224)

visual_words_test=np.zeros((len(test_labels),224), dtype=np.float32)
for i in range(len(Test_descriptors)):
    words = codebook.predict(Test_descriptors[i])
    visual_words_test[i,:] = np.bincount(words, minlength=224)

params = {
    "C": np.arange(0.001, 1, 0.01),
    "max_iter": np.arange(50, 550, 50)
}
    
lg = LogisticRegression(solver='liblinear')

lg_grid = GridSearchCV(lg, params, cv=8,
                        scoring=["accuracy", "f1_macro"], refit="accuracy", return_train_score=True)
lg_grid.fit(visual_words, train_labels) 

Best params for Logistic Regression

In [None]:
lg_grid.best_params_

## Now we make predictions on the whole training and test sets to see the difference between performances in KNN and Logistic Regression

We can see that Logistic Regression outperforms KNN but the gap between Train and Test performances are higher.

<h5> Train Accuracy: KNN = % 84, Logistic Regression = %92 <br>
Test Accuracy: KNN = % 79, Logistic Regression = %84

In [None]:
Train_descriptors, train_labels = get_descriptors(feat_num=500, step_size=10)
Test_descriptors, test_labels = get_descriptors(feat_num=500, step_size=10, mode="test")
codebook, visual_words = get_visual_words(Train_descriptors, 224)

visual_words_test=np.zeros((len(test_labels),224), dtype=np.float32)
for i in range(len(Test_descriptors)):
    words = codebook.predict(Test_descriptors[i])
    visual_words_test[i,:] = np.bincount(words, minlength=224)

In [None]:
knn = KNeighborsClassifier(n_jobs=-1, n_neighbors=8, metric="manhattan")

knn.fit(visual_words, train_labels)
train_preds = knn.predict(visual_words)
test_preds = knn.predict(visual_words_test)

print("Train accuracy for KNN: ", round(accuracy_score(train_labels, train_preds), 3))
print("Test accuracy for KNN: ", round(accuracy_score(test_labels, test_preds), 3))

lg = LogisticRegression(C=0.001, max_iter=50, solver='liblinear')

lg.fit(visual_words, train_labels)

train_preds = lg.predict(visual_words)
test_preds = lg.predict(visual_words_test)

print("Train accuracy for LogReg: ", round(accuracy_score(train_labels, train_preds), 3))
print("Test accuracy for LogReg: ", round(accuracy_score(test_labels, test_preds),3 ))

## Now we try PCA with both KNN and Logistic Regression

<h4> Changes in accuracy for training set: </h4>

<b> KNN: </b> %84 to %82 <br>
<b> Logistic Regression: </b> %92 to %81

<h4> Changes in accuracy for test set: </h4>

<b> KNN: </b> %79 to %75 <br>
<b> Logistic Regression: </b> %84 to %75

<h3> We can see that even though Logistic Regression was better before, it's performance drops gradually after PCA and becomes nearly identical with KNN. </h3>

In [None]:
pca = PCA(n_components=64)
VWpca = pca.fit_transform(visual_words)

knnpca = KNeighborsClassifier(n_jobs=-1, n_neighbors=8, metric="manhattan")
lgpca = LogisticRegression(C=0.001, max_iter=50, solver='liblinear')

knnpca.fit(VWpca, train_labels) 
lgpca.fit(VWpca, train_labels) 
vwtestpca = pca.transform(visual_words_test)

accuracy = round(knnpca.score(VWpca, train_labels), 3)
print("Train Accuracy with PCA for KNN:", accuracy)
accuracy = round(knnpca.score(vwtestpca, test_labels), 3)
print("Test Accuracy with PCA for KNN:", accuracy)

accuracy = round(lgpca.score(VWpca, train_labels), 3)
print("Train Accuracy with PCA for LogReg:", accuracy)
accuracy = round(lgpca.score(vwtestpca, test_labels), 3)
print("Test Accuracy with PCA for LogReg:", accuracy)

## LDA for both KNN and Logistic Regression

<h4> Changes in accuracy for training set: </h4>

<b> KNN: </b> %84 to %92 <br>
<b> Logistic Regression: </b> %92 to %88

<h4> Changes in accuracy for test set: </h4>

<b> KNN: </b> %79 to %84 <br>
<b> Logistic Regression: </b> %84 to %72

<h3> While the performance of Logistic Regression drops, KNN's improve and get even better than before.

In [None]:
lda = LinearDiscriminantAnalysis(n_components=7)
VWlda = lda.fit_transform(visual_words,train_labels)

knnlda = KNeighborsClassifier(n_jobs=-1, n_neighbors=8, metric="manhattan")
lglda = LogisticRegression(C=0.001, max_iter=50, solver='liblinear')

knnlda.fit(VWlda, train_labels) 
lglda.fit(VWlda, train_labels) 

vwtestlda = lda.transform(visual_words_test)

accuracy = round(knnlda.score(VWlda, train_labels), 3)
print("Train Accuracy with PCA for KNN:", accuracy)
accuracy = round(lglda.score(VWlda, train_labels), 3)
print("Train Accuracy with PCA for LogReg:", accuracy)

accuracy = round(knnlda.score(vwtestlda, test_labels), 3)
print("Test Accuracy with PCA for KNN:", accuracy)
accuracy = round(lglda.score(vwtestlda, test_labels), 3)
print("Test Accuracy with PCA for LogReg:", accuracy)


## Conclusion:

<h3> Best performing combinations are: </h3>

<b> Descriptor Method: </b> Dense_SIFT <br>
<b> Step_size </b> = 10  <br>
<b> Codebook_size </b> = 224 <br>

<h3> For Predictors: </h3> <br>

Either <b> KNN </b> with given parameters <br>
<b> N_neighbors </b> = 8 <br>
<b> Distance_metric </b> = manhattan <br>
after applying <b> LDA. </b>

Or <b> Logistic Regression </b> with given parameters <br>
<b> C </b> = 0.001 <br>
<b> Max_iter </b> = 50 <br>
without <b> LDA </b> or <b> PCA </b>.


<h3> Best performance achieved with both descriptors: </h3>

<b>%92</b> Accuracy in Train Set <br>
<b>%84</b> Accuracy in Test Set

## Examining the performances of each class.

We see that the worst performance is in the <b> Opencountry </b> and <b> coast </b> classes for both of them.

In [None]:
preds = knnlda.predict(vwtestlda)
print(classification_report(test_labels, preds))

In [None]:
preds = lg.predict(visual_words_test)
print(classification_report(test_labels, preds))

In [None]:
oc_imgs = [img for img in test_images_filenames if "Opencountry" in img]
random.shuffle(oc_imgs)

fig, axes = plt.subplots(2, 2, figsize=(15,15))
fig.suptitle("Some images from Opencountry class")

for num, img in enumerate(oc_imgs[:4]):
    i = int(num/2)
    j = int(num%2)
    axes[i][j].imshow(cv2.cvtColor(cv2.imread(img), cv2.COLOR_BGR2RGB))

    
cst_imgs = [img for img in test_images_filenames if "coast" in img]
random.shuffle(cst_imgs)

fig, axes = plt.subplots(2, 2, figsize=(15,15))
fig.suptitle("Some images from coast class")

for num, img in enumerate(cst_imgs[:4]):
    i = int(num/2)
    j = int(num%2)
    axes[i][j].imshow(cv2.cvtColor(cv2.imread(img), cv2.COLOR_BGR2RGB))

<b>Dense SIFT with tiny steps</b>

In [None]:
params = {
    "step_size": np.arange(5, 50, 5),
    "codebook_size": np.arange(64, 256, 32),
    "n_neighbors": np.arange(3, 9, 1),
    "metric": ["euclidean", "manhattan", "chebyshev", "minkowski"]
}

image_size = [(64, 64), (128, 128), (256, 256)]
# resize evey image in train_image_dataset and test_image_dataset
for dim in image_size:
    train_images_filenames = cv2.resize(img for img in train_images_filenames, dim)
    test_images_filenames = cv2.resize(img for img in test_images_filenames, dim)
    
    for ss in params["step_size"]:
        Train_descriptors, train_labels = get_descriptors(feat_num=500, step_size=int(ss))
        for cs in params["codebook_size"]:
            codebook, visual_words = get_visual_words(Train_descriptors, cs)

            knn = KNeighborsClassifier(n_jobs=-1)
            knn_grid = GridSearchCV(knn, {k:params[k] for k in ("n_neighbors", "metric") if k in params}, cv=8,
                                    scoring=["accuracy", "f1_macro"], refit="accuracy", return_train_score=True)
            knn_grid.fit(visual_words, train_labels)
            
            knn_grid.best_params_

<b>L2 Norm</b

In [None]:
def get_visual_words_norm(descriptors, k=128, L2norm = True, power_norm_method = 'yeo-johnson'):
    
    D = np.vstack(descriptors)
    
    # normalization of descriptors with L2-norm
    if L2norm is True:
        descriptors = LA.norm(descriptors.ravel(), ord=2)
    else
        descriptors = power_transform(descriptors, method=power_norm_method)
    # normalization of descriptors with power-norm
    # descriptors = power_transform(descriptors, method='box-cox')
    # descriptors = power_transform(descriptors, method='yeo-johnson')
    
    codebook = MiniBatchKMeans(n_clusters=k, verbose=False, batch_size=k*20, compute_labels=False,
                               reassignment_ratio=10**-4, random_state=42)
    codebook.fit(D)
    
    visual_words=np.zeros((len(descriptors), k),dtype=np.float32)
    
    for i in range(len(descriptors)):
        words=codebook.predict(descriptors[i])
        visual_words[i,:]=np.bincount(words,minlength=k)
        
    return codebook, visual_words

<b>SVM Classifier</b>
get the descriptors and then do create SVM model