In [16]:
%load_ext autoreload
%autoreload 2

from modules.negative_set import generate_negative_set, get_box_parameters
from modules.selection import try_classifiers, try_params
from modules.window import multiple_sliding_windows
from modules import data, models, descriptor_vector
import numpy as np
import pickle
import importlib

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
# Load data
full_images = data.load_images()
full_labels = data.load_labels()

In [3]:
# Box size
avg_box_size = get_box_parameters(full_labels)[1:3]
print("average box size:", avg_box_size)
box_size = 155, 100
print("box size:", box_size)

average box size: (167, 110)
box size: (155, 100)


In [4]:
# Stats
shapes = np.array([ image.shape[:2] for image in full_images ], dtype=int)
for stat in ('min', 'mean', 'max'):
    print(stat, getattr(shapes, stat)(axis=0))

min [182 231]
mean [406.767 365.412]
max [450 450]


In [78]:
# Parameters
OFFSET = 0
LIMIT = 100
NEG_SIZE = 150
TRAIN_RATE = 0.6
print(OFFSET, OFFSET+LIMIT)

0 100


In [79]:
# Limit if needed
if LIMIT:
    images = full_images[OFFSET:OFFSET+LIMIT]
    labels = full_labels[full_labels[:,0] <= OFFSET+LIMIT]
    labels = labels[labels[:,0] > OFFSET]
else:
    images = full_images
    labels = full_labels
print(f"Using {len(images)}/{len(full_images)} images & {len(labels)}/{len(full_labels)} labels")

Using 100/1000 images & 134/1284 labels


In [80]:
# Precompute windows
"""
assert LIMIT <= 100
print(OFFSET, LIMIT)

slide_step = (60,50)
downscale_step = 100
windows_sets = multiple_sliding_windows(images, box_size, slide_step, downscale_step)
print(f"Got {len(windows_sets[2])} windows from {len(images)} images")

# Transform windows
X = descriptor_vector.hog(windows_sets[2])
windows_sets = windows_sets[:2], X
print("Done")
""";

In [81]:
# Label sets
print("Generating negative set...")
negatives = generate_negative_set(images, labels, set_size=NEG_SIZE)

print("Creating train & validation sets with negatives...")
all_labels = np.concatenate([labels, negatives])
label_sets = data.train_valid_sets(len(images), all_labels, TRAIN_RATE)

Generating negative set...
Creating train & validation sets with negatives...


In [82]:
COMMON_PARAMS = {
  'box_size': box_size,
#  'slide_step': (60,50),
  'vectorization_params': {
    'vectorize': descriptor_vector.hog,
    'vectorize_args': [5],
  },
}
GLOBAL_PARAMS = {
  'svc': {
    **COMMON_PARAMS,
    'gamma': 'scale',
  },
  'linear_svc': {
    **COMMON_PARAMS,
  },
  'decision_tree': {
    **COMMON_PARAMS,
  },
  'adaboost': {
    **COMMON_PARAMS,
  },
  'random_forest': {
    **COMMON_PARAMS,
    'n_estimators': 50,
  },
}
CHANGING_PARAMS = {
    'linear_svc': {
        'C': [ i/10 for i in range(8, 14, 2) ],
    },
    'random_forest': {
        'n_estimators': [50, 100],
    },
}

In [73]:
len(full_labels)

1284

In [84]:
clf_name = "random_forest"
print(f"Trying classifier {clf_name}...")
results = try_params(images, label_sets, clf_name, GLOBAL_PARAMS[clf_name], CHANGING_PARAMS.get(clf_name, {}))
print(results)

Trying classifier random_forest...
## Trying parameter `n_estimators`...
### with value `50`
First training...


Predicting windows: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:37<00:00,  2.11it/s]


Adding 31 false positives / 75 predictions
Second training...


Predicting windows: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:21<00:00,  4.55it/s]


### with value `100`
First training...


Predicting windows: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:38<00:00,  1.71it/s]


Adding 25 false positives / 59 predictions
Second training...


Predicting windows: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:21<00:00,  4.55it/s]


{'classifier': 'random_forest', 'global_params': {'n_estimators': 50}, 'results': [{'name': 'n_estimators', 'value': 50, 'score': array([0.2 , 0.22, 0.26, 0.24, 0.12, 0.06, 0.2 , 0.22, 0.06, 0.08, 0.18,
       0.24, 0.2 , 0.2 , 0.16, 0.14, 0.08, 0.3 , 0.12, 0.1 , 0.24, 0.08,
       0.32, 0.08, 0.22, 0.2 , 0.2 , 0.22, 0.12, 0.18, 0.26, 0.14, 0.02,
       0.2 , 0.48, 0.1 , 0.22, 0.08, 0.22, 0.12, 0.3 , 0.22, 0.1 , 0.26,
       0.22, 0.26, 0.22, 0.06, 0.24, 0.16, 0.2 ]), 'result': {'true_pos': 0, 'true_neg': 0, 'false_pos': 0, 'false_neg': 51}}, {'name': 'n_estimators', 'value': 100, 'score': array([0.13, 0.15, 0.38, 0.12, 0.21, 0.12, 0.25, 0.25, 0.23, 0.21, 0.11,
       0.38, 0.2 , 0.19, 0.19, 0.25, 0.25, 0.35, 0.18, 0.16, 0.15, 0.06,
       0.34, 0.16, 0.12, 0.17, 0.25, 0.22, 0.13, 0.29, 0.18, 0.1 , 0.15,
       0.29, 0.26, 0.09, 0.1 , 0.12, 0.27, 0.13, 0.39, 0.09, 0.15, 0.27,
       0.18, 0.18, 0.26, 0.29, 0.25, 0.18, 0.2 ]), 'result': {'true_pos': 0, 'true_neg': 0, 'false_pos': 0, 'fa

In [20]:
print("Trying classifiers...")
#results = try_classifiers(images, label_sets, GLOBAL_PARAMS, CHANGING_PARAMS)

Trying classifiers...


In [85]:
from pprint import pprint
print(LIMIT, NEG_SIZE, box_size)
pprint(results)

100 150 (155, 100)
{'classifier': 'random_forest',
 'global_params': {'n_estimators': 50},
 'results': [{'name': 'n_estimators',
              'result': {'false_neg': 51,
                         'false_pos': 0,
                         'true_neg': 0,
                         'true_pos': 0},
              'score': array([0.2 , 0.22, 0.26, 0.24, 0.12, 0.06, 0.2 , 0.22, 0.06, 0.08, 0.18,
       0.24, 0.2 , 0.2 , 0.16, 0.14, 0.08, 0.3 , 0.12, 0.1 , 0.24, 0.08,
       0.32, 0.08, 0.22, 0.2 , 0.2 , 0.22, 0.12, 0.18, 0.26, 0.14, 0.02,
       0.2 , 0.48, 0.1 , 0.22, 0.08, 0.22, 0.12, 0.3 , 0.22, 0.1 , 0.26,
       0.22, 0.26, 0.22, 0.06, 0.24, 0.16, 0.2 ]),
              'value': 50},
             {'name': 'n_estimators',
              'result': {'false_neg': 51,
                         'false_pos': 0,
                         'true_neg': 0,
                         'true_pos': 0},
              'score': array([0.13, 0.15, 0.38, 0.12, 0.21, 0.12, 0.25, 0.25, 0.23, 0.21, 0.11,
       0.38, 0.

In [28]:
from pprint import pprint
for clf in results:
    print(clf)
    pprint(results[clf]['results'][0]['result'])

svc
{'f-score': 0.8235294117647058,
 'false_neg': 6,
 'false_pos': 0,
 'precision': 1.0,
 'recall': 0.7,
 'true_neg': 13,
 'true_pos': 14}
linear_svc
{'f-score': 0.9189189189189189,
 'false_neg': 3,
 'false_pos': 0,
 'precision': 1.0,
 'recall': 0.85,
 'true_neg': 13,
 'true_pos': 17}
decision_tree
{'f-score': 0.5185185185185185,
 'false_neg': 13,
 'false_pos': 0,
 'precision': 1.0,
 'recall': 0.35,
 'true_neg': 13,
 'true_pos': 7}
adaboost
{'f-score': 0.7499999999999999,
 'false_neg': 8,
 'false_pos': 0,
 'precision': 1.0,
 'recall': 0.6,
 'true_neg': 13,
 'true_pos': 12}
random_forest
{'f-score': 0.888888888888889,
 'false_neg': 4,
 'false_pos': 0,
 'precision': 1.0,
 'recall': 0.8,
 'true_neg': 13,
 'true_pos': 16}


In [32]:
# Save results
with open('./results_compare.pickle', 'wb') as save_file:
    pickle.dump(results, save_file)

{'adaboost': {'classifier': 'adaboost',
              'global_params': {'adaboost': {'box_size': (147, 94),
                                             'vectorization_params': {'vectorize': <function hog at 0x00000202B35BA840>,
                                                                      'vectorize_args': []}},
                                'decision_tree': {'box_size': (147, 94),
                                                  'vectorization_params': {'vectorize': <function hog at 0x00000202B35BA840>,
                                                                           'vectorize_args': []}},
                                'linear_svc': {'box_size': (147, 94),
                                               'vectorization_params': {'vectorize': <function hog at 0x00000202B35BA840>,
                                                                        'vectorize_args': []}},
                                'random_forest': {'box_size': (147, 94),
                 

TypeError: Object of type function is not JSON serializable