In [1]:
%matplotlib inline
import pickle
import numpy as np
from caffe2.proto import caffe2_pb2
from matplotlib import pyplot
from io import StringIO
from caffe2.python import core, utils, workspace
from caffe2.python import model_helper, brew

In [2]:
def load_pkl(path):
    with open(path, 'rb') as f:
        features_w_imname = pickle.load(f)
        return features_w_imname

### Train

##### features

In [3]:
no_feats_txtbox = load_pkl("reading_gray/train/no/textbox_features.pkl")
yes_feats_txtbox = load_pkl("reading_gray/train/yes/textbox_features.pkl")
no_feats_human = load_pkl("reading_gray/train/no/human_features.pkl")
yes_feats_human = load_pkl("reading_gray/train/yes/human_features.pkl")

##### combined features

In [4]:
no_feats_combined = load_pkl("reading_gray/train/no/human-txtbox-combined.pkl")
yes_feats_combined = load_pkl("reading_gray/train/yes/human-txtbox-combined.pkl")

##### keypoints

In [4]:
no_kps_txtbox = load_pkl("reading_gray/train/no/textbox_boundingbox.pkl")
yes_kps_txtbox = load_pkl("reading_gray/train/yes/textbox_boundingbox.pkl")
no_kps_human = load_pkl("reading_gray/train/no/kps.pkl")
yes_kps_human = load_pkl("reading_gray/train/yes/kps.pkl")

### Test

##### features

In [4]:
te_no_feats_txtbox = load_pkl("reading_gray/test/no/textbox_features.pkl")
te_yes_feats_txtbox = load_pkl("reading_gray/test/yes/textbox_features.pkl")
te_no_feats_human = load_pkl("reading_gray/test/no/human_features.pkl")
te_yes_feats_human = load_pkl("reading_gray/test/yes/human_features.pkl")

##### combined features

In [5]:
te_no_feats_combined = load_pkl("reading_gray/test/no/human-txtbox-combined.pkl")
te_yes_feats_combined = load_pkl("reading_gray/test/yes/human-txtbox-combined.pkl")

##### keypoints

In [5]:
te_no_kps_txtbox = load_pkl("reading_gray/test/no/textbox_boundingbox.pkl")
te_yes_kps_txtbox = load_pkl("reading_gray/test/yes/textbox_boundingbox.pkl")
te_no_kps_human = load_pkl("reading_gray/test/no/kps.pkl")
te_yes_kps_human = load_pkl("reading_gray/test/yes/kps.pkl")

### combine features

In [6]:
def group_vectors(feats1, feats2, label):
    data = []
    for i, elem1 in enumerate(feats1):
        #print(elem1[1]) #im_name is [1] for feats_txtbox
        for j, elem2 in enumerate(feats2):
            #print(elem2[0]) #im_name is [0] for feats_human
            if elem1[1] == elem2[0]:
                data.append(np.concatenate((elem1[0], elem2[1]), axis=None))
    
    labels = np.ones(len(data)).astype(int) * label
    
    return np.asarray(data), labels

In [8]:
# CORRECT THE IMBALANCE
yes_feats, yes_labels = yes_feats[:801], yes_labels[:801]
print(len(yes_feats))

801


##### features

In [6]:
# training data
no_feats, no_labels = group_vectors(no_feats_txtbox, no_feats_human, 0)
print(len(no_feats))
yes_feats, yes_labels = group_vectors(yes_feats_txtbox, yes_feats_human, 1)
print(len(yes_feats))

# test data
te_no_feats, te_no_labels = group_vectors(te_no_feats_txtbox, te_no_feats_human, 0)
print(len(te_no_feats))
te_yes_feats, te_yes_labels = group_vectors(te_yes_feats_txtbox, te_yes_feats_human, 1)
print(len(te_yes_feats))

801
824
118
120


##### combined features

In [6]:
# training data
no_feats, no_labels = no_feats_combined, np.ones(len(no_feats_combined)).astype(int) * 0
print(len(no_feats))
yes_feats, yes_labels = yes_feats_combined, np.ones(len(yes_feats_combined)).astype(int) * 1
print(len(yes_feats))

# test data
te_no_feats, te_no_labels = te_no_feats_combined, np.ones(len(te_no_feats_combined)).astype(int) * 0
print(len(te_no_feats))
te_yes_feats, te_yes_labels = te_yes_feats_combined, np.ones(len(te_yes_feats_combined)).astype(int) * 1
print(len(te_yes_feats))

869
903
122
122


##### keypoints

In [7]:
# training data
no_feats, no_labels = group_vectors(no_kps_txtbox, no_kps_human, 0)
print(len(no_feats))
yes_feats, yes_labels = group_vectors(yes_kps_txtbox, yes_kps_human, 1)
print(len(yes_feats))

# test data
te_no_feats, te_no_labels = group_vectors(te_no_kps_txtbox, te_no_kps_human, 0)
print(len(te_no_feats))
te_yes_feats, te_yes_labels = group_vectors(te_yes_kps_txtbox, te_yes_kps_human, 1)
print(len(te_yes_feats))

801
824
118
120


### randomize

In [9]:
train_indices = np.random.permutation(len(no_feats) + len(yes_feats))
test_indices = np.random.permutation(len(te_yes_feats) + len(te_no_feats))

In [10]:
# training data
train_data = np.concatenate((np.asarray(no_feats),np.asarray(yes_feats)))
train_labels = np.concatenate((np.asarray(no_labels),np.asarray(yes_labels)))
rand_train_data = train_data[train_indices]
rand_train_labels = train_labels[train_indices]
print(len(train_data))

# test data
test_data = np.concatenate((np.asarray(te_no_feats),np.asarray(te_yes_feats)))
test_labels = np.concatenate((np.asarray(te_no_labels),np.asarray(te_yes_labels))) 
rand_test_data = test_data[test_indices]
rand_test_labels = test_labels[test_indices]
print(len(test_data))

1602
238


### Save as minidb

In [12]:
# Now, actually write the db.
def write_db(db_type, db_name, features, labels):
    db = core.C.create_db(db_type, db_name, core.C.Mode.write)
    transaction = db.new_transaction()
    for i in range(features.shape[0]):
        feature_and_label = caffe2_pb2.TensorProtos()
        feature_and_label.protos.extend([
            utils.NumpyArrayToCaffe2Tensor(features[i]),
            utils.NumpyArrayToCaffe2Tensor(labels[i])
        ])
        transaction.put(
            'train_%03d'.format(i),
            feature_and_label.SerializeToString())
    # Close the transaction, and then close the db.
    del transaction
    del db

write_db("minidb", "train.minidb", train_data, train_labels)
write_db("minidb", "test.minidb", test_data, test_labels)

### SVM

In [11]:
# for the support vector machine classifier
import sklearn
from sklearn import datasets, svm, metrics

In [12]:
model = sklearn.svm.SVC(kernel="linear", C=1, probability=True)
model.fit(train_data, train_labels)

SVC(C=1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [13]:
predicted_tr = model.predict(train_data)
print("Accuracy training : %s " % metrics.accuracy_score(train_labels, predicted_tr))

Accuracy training : 0.6329588014981273 


In [14]:
# predictions and stats
expected = test_labels
predicted = model.predict(test_data)

print("Classification report for classifier %s:\n%s\n"
      % (model, metrics.classification_report(expected, predicted)))
print("Confusion matrix:\n %s \n" % metrics.confusion_matrix(expected, predicted))
print("Accuracy : %s " % metrics.accuracy_score(expected, predicted))

Classification report for classifier SVC(C=1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False):
              precision    recall  f1-score   support

           0       0.63      0.65      0.64       118
           1       0.64      0.62      0.63       120

    accuracy                           0.63       238
   macro avg       0.63      0.63      0.63       238
weighted avg       0.63      0.63      0.63       238


Confusion matrix:
 [[77 41]
 [46 74]] 

Accuracy : 0.634453781512605 
