In [38]:
%matplotlib inline
import pickle
import numpy as np
from caffe2.proto import caffe2_pb2
from matplotlib import pyplot
from io import StringIO
from caffe2.python import core, utils, workspace
from caffe2.python import model_helper, brew

In [39]:
def load_pkl(path, label):
    with open(path, 'rb') as f:
        kps_w_imname = pickle.load(f)
    labels = np.ones(len(kps_w_imname)).astype(int) * label
    
    # retrieve only the keypoints without image name
    kps = []
    for i, el_ in enumerate(kps_w_imname):
        kps.append(kps_w_imname[i][1]) # [0] is image name, [1] is kps
    return kps, labels

### KPS

In [40]:
yes_path = "/home/hercules/Documents/detectron/extract-features/sitting_rgb/train/yes/kps.pkl"
label = 1 # 0 if no, 1 if yes
tr_yes_kps, tr_yes_labels = load_pkl(yes_path, label)
print(len(tr_yes_kps))

825


In [41]:
len(tr_yes_kps[2])

35

In [42]:
no_path = "/home/hercules/Documents/detectron/extract-features/sitting_rgb/train/no/kps.pkl"
label = 0 # 0 if no, 1 if yes
tr_no_kps, tr_no_labels = load_pkl(no_path, label)
print(len(tr_no_kps))

815


In [43]:
# CORRECT THE IMBALANCE
tr_yes_kps, tr_yes_labels = tr_yes_kps[:815], tr_yes_labels[:815]
print(len(tr_yes_kps))

815


In [44]:
yes_path = "/home/hercules/Documents/detectron/extract-features/sitting_rgb/test/yes/kps.pkl"
label = 1 # 0 if no, 1 if yes
te_yes_kps, te_yes_labels = load_pkl(yes_path, label)
print(len(te_yes_kps))

110


In [45]:
no_path = "/home/hercules/Documents/detectron/extract-features/sitting_rgb/test/no/kps.pkl"
label = 0 # 0 if no, 1 if yes
te_no_kps, te_no_labels = load_pkl(no_path, label)
print(len(te_no_kps))

109


### FEATURES

In [46]:
yes_path = "/home/hercules/Documents/detectron/extract-features/sitting_rgb/train/yes/human_features.pkl"
label = 1 # 0 if no, 1 if yes
tr_yes_feats, tr_yes_labels = load_pkl(yes_path, label)
print(len(tr_yes_feats))

825


In [47]:
no_path = "/home/hercules/Documents/detectron/extract-features/sitting_rgb/train/no/human_features.pkl"
label = 0 # 0 if no, 1 if yes
tr_no_feats, tr_no_labels = load_pkl(no_path, label)
print(len(tr_no_feats))

815


In [48]:
# CORRECT THE IMBALANCE
tr_yes_feats, tr_yes_labels = tr_yes_feats[:815], tr_yes_labels[:815]
print(len(tr_yes_kps))

815


In [49]:
yes_path = "/home/hercules/Documents/detectron/extract-features/sitting_rgb/test/yes/human_features.pkl"
label = 1 # 0 if no, 1 if yes
te_yes_feats, te_yes_labels = load_pkl(yes_path, label)
print(len(te_yes_feats))

110


In [50]:
no_path = "/home/hercules/Documents/detectron/extract-features/sitting_rgb/test/no/human_features.pkl"
label = 0 # 0 if no, 1 if yes
te_no_feats, te_no_labels = load_pkl(no_path, label)
print(len(te_no_feats))

109


In [51]:
len(te_no_feats[3])

12544

### randomize train - test

In [52]:
train_indices = np.random.permutation(len(tr_yes_feats) + len(tr_no_feats))
test_indices = np.random.permutation(len(te_yes_feats) + len(te_no_feats))

In [53]:
# keypoints
train_data = np.concatenate((np.asarray(tr_no_kps),np.asarray(tr_yes_kps)))
train_labels = np.concatenate((np.asarray(tr_no_labels),np.asarray(tr_yes_labels)))
kps_train_data = train_data[train_indices]
kps_train_labels = train_labels[train_indices]

test_data = np.concatenate((np.asarray(te_no_kps),np.asarray(te_yes_kps)))
test_labels = np.concatenate((np.asarray(te_no_labels),np.asarray(te_yes_labels)))
kps_test_data = test_data[test_indices]
kps_test_labels = test_labels[test_indices]

In [54]:
# features
train_data = np.concatenate((np.asarray(tr_no_feats),np.asarray(tr_yes_feats)))
train_labels = np.concatenate((np.asarray(tr_no_labels),np.asarray(tr_yes_labels)))
feats_train_data = train_data[train_indices]
feats_train_labels = train_labels[train_indices]

test_data = np.concatenate((np.asarray(te_no_feats),np.asarray(te_yes_feats)))
test_labels = np.concatenate((np.asarray(te_no_labels),np.asarray(te_yes_labels)))
feats_test_data = test_data[test_indices]
feats_test_labels = test_labels[test_indices]

### merge features and kps

In [15]:
def group_vectors(features, kps):
    data = []
    for j, elem in enumerate(features):
        data.append(np.concatenate(([features[j]/5.0,kps[j]-0.5])))
    return np.asarray(data)

In [17]:
train_data = group_vectors(feats_train_data, kps_train_data)
test_data = group_vectors(feats_test_data, kps_test_data)
train_labels = kps_train_labels
test_labels = kps_test_labels

In [74]:
print(train_data[2][30:40])

[ 0.29057613  0.3654477   0.58189404  0.2767809  -0.18224943 -0.78286034
 -0.57301581 -0.01554741 -0.21917693 -0.29260701]


In [23]:
def new_(data):
    newdata = []
    for j, elem in enumerate(data):
        newdata.append(data[j][:-35])
    return np.asarray(newdata)

In [24]:
train_data = new_(train_data)
test_data = new_(test_data)

### if not a pre-split dataset...

In [None]:
all_feats = np.concatenate((np.asarray(no_data),np.asarray(yes_data)))
len(all_feats)
all_labels = np.concatenate((label_no, adj_label_yes))
len(all_labels)

random_index = np.random.permutation(len(all_feats))
features = all_feats[random_index]
labels = all_labels[random_index]

In [39]:
# split
percent = 0.90
split_tr_te = int(len(all_feats) * percent)
train_features = features[:split_tr_te]
train_labels = labels[:split_tr_te]
test_features = features[split_tr_te:]
test_labels = labels[split_tr_te:]

In [40]:
# CORRECT THE IMBALANCE
test_features = np.concatenate((test_features,supp_yes_data))
test_labels = np.concatenate((test_labels,supp_label_yes))

### Save as minidb

In [71]:
train_data, train_labels = kps_train_data, kps_train_labels
test_data, test_labels = kps_test_data, kps_test_labels

In [61]:
train_data, train_labels = feats_train_data, feats_train_labels
test_data, test_labels = feats_test_data, feats_test_labels

In [62]:
# Now, actually write the db.
def write_db(db_type, db_name, features, labels):
    db = core.C.create_db(db_type, db_name, core.C.Mode.write)
    transaction = db.new_transaction()
    for i in range(features.shape[0]):
        feature_and_label = caffe2_pb2.TensorProtos()
        feature_and_label.protos.extend([
            utils.NumpyArrayToCaffe2Tensor(features[i]),
            utils.NumpyArrayToCaffe2Tensor(labels[i])
        ])
        transaction.put(
            'train_%03d'.format(i),
            feature_and_label.SerializeToString())
    # Close the transaction, and then close the db.
    del transaction
    del db

write_db("minidb", "sit_train.minidb", train_data, train_labels)
write_db("minidb", "sit_test.minidb", test_data, test_labels)

### SVM

In [76]:
# for the support vector machine classifier
import sklearn
from sklearn import datasets, svm, metrics

In [77]:
model = sklearn.svm.SVC(kernel="linear", C=1, probability=True)
model.fit(train_data, train_labels)

SVC(C=1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [78]:
predicted_tr = model.predict(train_data)
print("Accuracy training : %s " % metrics.accuracy_score(train_labels, predicted_tr))

Accuracy training : 0.6208588957055214 


In [79]:
# predictions and stats
expected = test_labels
predicted = model.predict(test_data)

print("Classification report for classifier %s:\n%s\n"
      % (model, metrics.classification_report(expected, predicted)))
print("Confusion matrix:\n %s \n" % metrics.confusion_matrix(expected, predicted))
print("Accuracy : %s " % metrics.accuracy_score(expected, predicted))

Classification report for classifier SVC(C=1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False):
              precision    recall  f1-score   support

           0       0.71      0.40      0.51       109
           1       0.59      0.84      0.69       110

    accuracy                           0.62       219
   macro avg       0.65      0.62      0.60       219
weighted avg       0.65      0.62      0.60       219


Confusion matrix:
 [[44 65]
 [18 92]] 

Accuracy : 0.6210045662100456 
