In [None]:
import numpy as np
import cv2
import os
from scipy import ndimage
from scipy.spatial import distance
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [None]:
from google.colab import drive

drive.mount('/content/drive/')

Mounted at /content/drive/


Load grayscale and binary images from 10 different actions:

In [None]:
!unzip -q /content/drive/MyDrive/train_all.zip -d /content/

In [None]:
!unzip -q /content/drive/MyDrive/test_all.zip -d /content/

In [None]:
def load_images_from_folder(folder):
    images = {}
    for filename in os.listdir(folder):
        category = []
        path = folder + "/" + filename
        for cat in os.listdir(path):
            img = cv2.imread(path + "/" + cat,0)
            if img is not None:
                category.append(img)
        images[filename] = category
    return images

train = load_images_from_folder('/content/train_all')
test = load_images_from_folder('/content/test_all')

In [None]:
#number of acion classes
len(train)

10

Use SIFT algorithm to extract features of images:

In [None]:
def sift_features(images):
    sift_vectors = {}
    descriptor_list = []
    sift = cv2.SIFT_create(nfeatures=500)
    for key,value in images.items():
        features = []
        for img in value:
            kp, des = sift.detectAndCompute(img,None)
            descriptor_list.extend(des)
            features.append(des)
        sift_vectors[key] = features
    return [descriptor_list, sift_vectors]

sifts = sift_features(train)

descriptor_list = sifts[0]

all_bovw_feature = sifts[1]

test_bovw_feature = sift_features(test)[1]

Use Kmeans to generate visual words:

In [None]:
def kmeans(k, descriptor_list):
    kmeans = KMeans(n_clusters = k, n_init=10)
    kmeans.fit(descriptor_list)
    visual_words = kmeans.cluster_centers_
    return visual_words

visual_words = kmeans(500, descriptor_list)

In [None]:
def find_index(image, center):
    count = 0
    ind = 0
    for i in range(len(center)):
        if(i == 0):
           count = distance.euclidean(image, center[i])
        else:
            dist = distance.euclidean(image, center[i])
            if(dist < count):
                ind = i
                count = dist
    return ind

In [None]:
def image_class(all_bovw, centers):
    dict_feature = {}
    for key,value in all_bovw.items():
        category = []
        for img in value:
            histogram = np.zeros(len(centers))
            for each_feature in img:
                ind = find_index(each_feature, centers)
                histogram[ind] += 1
            category.append(histogram)
        dict_feature[key] = category
    return dict_feature

bovw_train = image_class(all_bovw_feature, visual_words)

bovw_test = image_class(test_bovw_feature, visual_words)

In [15]:
X_train = []
y_train = []
for key, value in bovw_train.items():
    for img in value:
        X_train.append(img)
        y_train.append(key)

X_test = []
y_test = []
for key, value in bovw_test.items():
    for img in value:
        X_test.append(img)
        y_test.append(key)

X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)

X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.3, random_state=42)

clf = LinearSVC(C=1.0, penalty='l2', loss='squared_hinge', multi_class='ovr')
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.6984615384615385


In [16]:
report = classification_report(y_test, y_pred)

print(report)

              precision    recall  f1-score   support

        bend       0.86      0.81      0.84        81
        jack       0.72      0.65      0.68        77
        jump       0.68      0.61      0.64        56
       pjump       0.71      0.68      0.70        59
         run       0.32      0.32      0.32        38
        side       0.69      0.71      0.70        58
        skip       0.44      0.44      0.44        48
        walk       0.59      0.66      0.62        73
       wave1       0.85      0.89      0.87        97
       wave2       0.78      0.89      0.83        63

    accuracy                           0.70       650
   macro avg       0.67      0.66      0.66       650
weighted avg       0.70      0.70      0.70       650



In this project, we implement the proposed method of ISA article in the title of "Human action recognition with bag of visual words using different machine learning methods and hyperparameter optimization" with DOI: https://doi.org/10.1007/s00521-019-04365-9