### Feature_Selection
Author : @leopauly | cnlp@leeds.ac.uk <br>
Description : Selecting the best performing features

In [1]:
import os
from six.moves import xrange  
import PIL.Image as Image
import random
import numpy as np
import cv2
import time
import math
import matplotlib.pyplot as plt
import tensorflow as tf
from keras import backend as K
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from skimage import io
import warnings
warnings.filterwarnings('ignore')

# Custom scripts
import lscript as lsp
import modelling as md

Using TensorFlow backend.


In [2]:
height=112 
width=112 
channel=3
crop_size=112

cluster_length=16
feature_size=8192 
nb_classes=2
saved_path='/nobackup/leopauly/S2l/'

batch_size=32
memory_batch_size_train=266
memory_batch_size_test=170
next_batch_start=0
sample_batch_size=10

####  Loading model 

In [3]:
## Defining placeholders in tf for images and targets
x_image = tf.placeholder(tf.float32, [None, 16,height,width,channel],name='x') 
y_true = tf.placeholder(tf.float32, [None, nb_classes],name='y_true')
y_true_cls = tf.placeholder(tf.int64, [None],name='y_true_cls')

model_keras = md.C3D_ucf101_training_model_tf(summary=False)
out=model_keras(x_image)
y_pred = tf.nn.softmax(out)
y_pred_cls = tf.argmax(out, dimension=1)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

print('Miscellenious items finished..!!',flush=True)

Miscellenious items finished..!!


In [4]:
## Start the session with logging placement.
init_op = tf.global_variables_initializer()
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False))
sess.run(init_op)

## Restore model weights from previously saved model
saver = tf.train.Saver()
saver.restore(sess, os.path.join(saved_path,'activity_model.ckpt-67'))
print("Model restored from file: %s" % saved_path,flush=True)

INFO:tensorflow:Restoring parameters from /nobackup/leopauly/S2l/activity_model.ckpt-67
Model restored from file: /nobackup/leopauly/S2l/


####  Feature extraction

In [5]:
def get_compress_frames_data(filename, num_frames_per_clip=16):
  ''' Given a directory containing extracted frames, return a video clip of
  (num_frames_per_clip) consecutive frames as a list of np arrays '''
  ret_arr = []
  for parent, dirnames, filenames in os.walk(filename):

    filenames = sorted(filenames)
    jump=math.floor((len(filenames)/num_frames_per_clip))
    loop=0
    for i in range(0,len(filenames),jump):
      if (loop>15):
        break
      if (filenames[i].endswith('.png')):
        #print(str(filename) + '/' + str(filenames[i]))
        image_name = str(filename) + '/' + str(filenames[i])
        img = Image.open(image_name)
        img_data = np.array(img)
        ret_arr.append(img_data)
        loop=loop+1
  ret_arr=np.array(ret_arr)
  #ret_arr=ret_arr/255
  return np.array(ret_arr)

In [42]:
## Extraction of features 
layer_name='Softmax:0' #('flatten_1/Reshape:0') #('dropout_2/cond/Merge:0') #('fc8/BiasAdd:0')
def extract_video_features(vid):
    vid_=vid.reshape(-1,cluster_length,height,width,channel)
    f_v = sess.graph.get_tensor_by_name(layer_name)  
    f_v_val=sess.run([f_v], feed_dict={'conv1_input:0':vid_,x_image:vid_,K.learning_phase(): 0 })
    features=np.reshape(f_v_val,(-1))
    return features

In [43]:
## Feature extraction from each class
def get_features_from_class(class_folder):
    feature_set_a=[]
    base_dir_a=class_folder
    sub_dir_a=os.listdir(base_dir_a)
    sub_dir_a=sorted(sub_dir_a)
    if '.DS_Store' in sub_dir_a:
        sub_dir_a.remove('.DS_Store')
    print('\nSub directories:\n',sub_dir_a)
    for sub_dir_a_ in sub_dir_a:
        vid_a=get_compress_frames_data(base_dir_a+sub_dir_a_)
        feature_set_a.append(extract_video_features(vid_a))
    return np.array(feature_set_a)

In [44]:
## Extracting features
feature_set_a=get_features_from_class('/nobackup/leopauly/S2l/Dataset/Feature_Selection_Dataset1/Move/')
feature_set_b=get_features_from_class('/nobackup/leopauly/S2l/Dataset/Feature_Selection_Dataset1/displace/')
features_all=np.concatenate((feature_set_a,feature_set_b),axis=0)
feature_labels_true=np.array([0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1])
print('\nLabel shape:',feature_labels_true.shape)
print('Feature set shape:',features_all.shape)


Sub directories:
 ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

Sub directories:
 ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

Label shape: (20,)
Feature set shape: (20, 101)


####  Feature selection

In [45]:
## Selecting top k features
percentage_of_features=np.array([.2,.4,.6,.8,1])

for percent in (percentage_of_features):
    num_selected_features=int((features_all.shape[1])*percent)
    from sklearn.feature_selection import f_classif, chi2, mutual_info_classif
    from sklearn.feature_selection import SelectKBest
    selector= SelectKBest(f_classif, k=num_selected_features)
    features_all_selected=selector.fit_transform(features_all,feature_labels_true)
    select_feature_cols = selector.get_support(indices=True)
    print('\nIndexes of selected features:\n',select_feature_cols)
    print('No of selected features:',select_feature_cols.shape)

    ## Saving array indexes  
    file_name='sel_feat_idx_'+layer_name.translate({ord('/'): None})+'_'+str(percent)+'.txt'
    np.savetxt(file_name,select_feature_cols)
    print(file_name)


Indexes of selected features:
 [  2  12  14  15  16  17  35  38  43  45  51  55  57  60  64  72  73  81
  98 100]
No of selected features: (20,)
sel_feat_idx_Softmax:0_0.2.txt

Indexes of selected features:
 [  0   2   6   8  12  14  15  16  17  18  21  22  30  31  32  35  38  43
  45  46  48  51  53  55  57  60  61  63  64  66  71  72  73  74  80  81
  88  89  98 100]
No of selected features: (40,)
sel_feat_idx_Softmax:0_0.4.txt

Indexes of selected features:
 [  0   1   2   5   6   7   8  10  11  12  14  15  16  17  18  19  20  21
  22  26  30  31  32  34  35  38  39  42  43  44  45  46  48  49  50  51
  53  54  55  57  60  61  63  64  65  66  71  72  73  74  80  81  86  87
  88  89  93  94  98 100]
No of selected features: (60,)
sel_feat_idx_Softmax:0_0.6.txt

Indexes of selected features:
 [  0   1   2   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
  19  20  21  22  24  26  28  30  31  32  34  35  36  38  39  42  43  44
  45  46  48  49  50  51  52  53  54  55  57  58