In [15]:
import numpy as np
import cv2
import pandas as pd
import matplotlib.pyplot as plt
import scipy.ndimage as ndimage
from sklearn import preprocessing
import os
import seaborn as sns
from skimage.feature import hog
from scipy.cluster.vq import kmeans,vq

- Trajectory:    2x[trajectory length] (default 30 dimension)
- HOG:           8x[spatial cells]x[spatial cells]x[temporal cells] (default 96 dimension)
- HOF:           9x[spatial cells]x[spatial cells]x[temporal cells] (default 108 dimension)
- MBHx:          8x[spatial cells]x[spatial cells]x[temporal cells] (default 96 dimension)
- MBHy:          8x[spatial cells]x[spatial cells]x[temporal cells] (default 96 dimension)

In [2]:
trajectory_range = 40
hog_range = trajectory_range+96
hof_range = hog_range+108
mbhx_range = hof_range+96
mbhy_range = mbhx_range+96

### Separating files for training and testing

In [3]:
feature_files_path="./features/"
files=[os.path.join(feature_files_path,file) for file in os.listdir(feature_files_path)]

In [4]:
training_files = ["-nm-01-","-nm-02-","-nm-03-","-nm-04-"]
training_set_files = [file for file in files if any(training_file in file for training_file in training_files)]

In [5]:
test_1_files = ["-nm-05-","-nm-06-"]
test_set_1_files = [file for file in files if any(training_file in file for training_file in test_1_files)]

In [6]:
test_2_files = ["-bg-01-","-bg-02-"]
test_set_2_files = [file for file in files if any(training_file in file for training_file in test_2_files)]

In [7]:
test_3_files = ["-cl-01-","-cl-02-"]
test_set_3_files = [file for file in files if any(training_file in file for training_file in test_3_files)]

## Codebook for training data

In [8]:
#hog = []
#mbhx = []
#mbhy = []
#for file in training_set_files:
    
    #features = pd.read_csv(file,'\t',header=None)
    #features.drop(features.columns[-1],axis=1,inplace=True)
    
    #hog.append(features.iloc[:,trajectory_range:hog_range])
    #mbhx.append(features.iloc[:,hof_range:mbhx_range])
    #mbhy.append(features.iloc[:,mbhx_range:mbhy_range])

In [9]:
#print(len(hog))
#print(len(mbhx))
#print(len(mbhy))

496
496
496


In [10]:
#hog_df = pd.DataFrame()
#mbhx_df = pd.DataFrame()
#mbhy_df = pd.DataFrame()

#for i in range(len(hog)):    
#    hog_df = hog_df.append(hog[i])
    
#for i in range(len(mbhx)):    
#    mbhx_df = mbhx_df.append(mbhx[i])
    
#for i in range(len(mbhy)):    
#    mbhy_df = mbhy_df.append(mbhy[i])

In [15]:
#print(hog_df.shape)
#print(mbhx_df.shape)
#print(mbhy_df.shape)

(2969163, 96)
(2969163, 96)
(2969163, 96)


In [16]:
#hog_df.to_csv('hog_features_training.csv')
#mbhx_df.to_csv('mbhx_features_training.csv')
#mbhy_df.to_csv('mbhy_features_training.csv')

## HOG Codebook

In [40]:
hog = pd.read_csv('hog_features_training.csv')
hog = hog.drop(hog.columns[0],axis=1)

In [41]:
print("Shape of all features {} \n".format(hog.shape))

print("Randomly selecting 100000 features \n")
hog = hog.sample(n=100000)

print("Shape after selection {}".format(hog.shape))

Shape of all features (2969163, 96) 

Randomly selecting 100000 features 

Shape after selection (100000, 96)


In [42]:
print("creating HOG codebook")

k=256
hog_codebook,variance=kmeans(hog,k)

print("Shape of codebook {} \n".format(pd.DataFrame(hog_codebook).shape))

print("saving hog codebook")
pd.DataFrame(hog_codebook).to_csv('hog_codebook.csv')

creating HOG codebook
Shape of codebook (256, 96) 

saving hog codebook


## MBHX Codebook

In [43]:
mbhx = pd.read_csv('mbhx_features_training.csv')
mbhx = mbhx.drop(mbhx.columns[0],axis=1)

In [44]:
print("Shape of all features {} \n".format(mbhx.shape))

print("Randomly selecting 100000 features \n")
mbhx = mbhx.sample(n=100000)

print("Shape after selection {}".format(mbhx.shape))

Shape of all features (2969163, 96) 

Randomly selecting 100000 features 

Shape after selection (100000, 96)


In [46]:
print("creating MBHX codebook")

k=256
mbhx_codebook,variance=kmeans(mbhx,k)

print("Shape of codebook {} \n".format(pd.DataFrame(mbhx_codebook).shape))

print("saving mbhx codebook")
pd.DataFrame(mbhx_codebook).to_csv('mbhx_codebook.csv')

creating MBHX codebook
Shape of codebook (256, 96) 

saving mbhx codebook


## MBHY Codebook

In [47]:
mbhy = pd.read_csv('mbhy_features_training.csv')
mbhy = mbhy.drop(mbhy.columns[0],axis=1)

In [48]:
print("Shape of all features {} \n".format(mbhy.shape))

print("Randomly selecting 100000 features \n")
mbhy = mbhy.sample(n=100000)

print("Shape after selection {}".format(mbhy.shape))

Shape of all features (2969163, 96) 

Randomly selecting 100000 features 

Shape after selection (100000, 96)


In [49]:
print("creating MBHY codebook")

k=256
mbhy_codebook,variance=kmeans(mbhy,k)

print("Shape of codebook {} \n".format(pd.DataFrame(mbhy_codebook).shape))

print("saving mbhy codebook")
pd.DataFrame(mbhy_codebook).to_csv('mbhy_codebook.csv')

creating MBHY codebook
Shape of codebook (256, 96) 

saving mbhy codebook
