In [None]:
from pathlib import Path

from src.train import train_pipeline, load_classifier, validate_pipeline, retrain_with_hard_negatives
from src.evaluation import evaluate_on_dataset
from src.inference import generate_submission

In [2]:
data_dir = Path("xray_data")
models_dir = Path("models")
dataset_dir = data_dir / "dataset"

train_images = data_dir / "images" / "train"
train_labels = data_dir / "labels" / "train"
val_images = data_dir / "images" / "val"
val_labels = data_dir / "labels" / "val"
test_images = data_dir / "images" / "test"

In [6]:
train_pipeline(train_images, train_labels, models_dir, dataset_dir=dataset_dir, dataset_exist=False, n_samples=2, max_negatives=6000, use_sift=True)

pass 1/2: collecting SIFT descriptors for vocabulary...


  0%|          | 0/4200 [00:00<?, ?it/s]

100%|██████████| 4200/4200 [08:03<00:00,  8.68it/s]


collected 10582 SIFT descriptors
building BoVW vocabulary from 630704 descriptors...
vocabulary built (with )200 words)
pass 2/2: extracting features...


100%|██████████| 4200/4200 [22:34<00:00,  3.10it/s]


class 0 (Hammer): 857 samples
class 1 (Knife): 1115 samples
class 2 (Gun): 1310 samples
class 3 (Wrench): 1033 samples
class 4 (HandCuffs): 931 samples
class 5 (Bullet): 969 samples
class 6 (Background): 6000 samples
dataset saved to xray_data\dataset
training on 12215 samples...
classifier saved to models


(XGBClassifier(base_score=None, booster=None, callbacks=None,
               colsample_bylevel=None, colsample_bynode=None,
               colsample_bytree=None, device=None, early_stopping_rounds=None,
               enable_categorical=False, eval_metric='mlogloss',
               feature_types=None, feature_weights=None, gamma=None,
               grow_policy=None, importance_type=None,
               interaction_constraints=None, learning_rate=None, max_bin=None,
               max_cat_threshold=None, max_cat_to_onehot=None,
               max_delta_step=None, max_depth=None, max_leaves=None,
               min_child_weight=None, missing=nan, monotone_constraints=None,
               multi_strategy=None, n_estimators=None, n_jobs=None, num_class=7, ...),
 StandardScaler(),
 PCA(n_components=0.95),
 StandardScaler(),
 MiniBatchKMeans(batch_size=1000, n_clusters=200, random_state=42))

In [4]:
validate_report = validate_pipeline(models_dir, images_dir=val_images, labels_dir=val_labels, nb_samples=1, use_sift=True)

pass 1/1: extracting features...


100%|██████████| 900/900 [01:11<00:00, 12.56it/s]


class 0 (Hammer): 181 samples
class 1 (Knife): 218 samples
class 2 (Gun): 266 samples
class 3 (Wrench): 220 samples
class 4 (HandCuffs): 213 samples
class 5 (Bullet): 222 samples
class 6 (Background): 900 samples
              precision    recall  f1-score   support

      Hammer       0.71      0.69      0.70       181
       Knife       0.69      0.57      0.63       218
         Gun       0.78      0.73      0.75       266
      Wrench       0.70      0.59      0.64       220
   HandCuffs       0.83      0.91      0.87       213
      Bullet       0.86      0.76      0.81       222
  Background       0.86      0.96      0.91       900

    accuracy                           0.81      2220
   macro avg       0.78      0.74      0.76      2220
weighted avg       0.80      0.81      0.80      2220



In [7]:
classifier = load_classifier(models_dir)
score = evaluate_on_dataset(val_images, val_labels, classifier, nb_of_images=100)
print(f"validation score: {score:.4f}")

100%|██████████| 100/100 [20:12<00:00, 12.13s/it]

validation score: 0.2047





NB : The following cell can take several hours to run, as it runs the entire detection pipeline to detect false positive and extract features for the hard negative mining step. 

In [9]:
retrain_with_hard_negatives(train_images, train_labels, models_dir, dataset_dir, max_images=1000, max_per_image_nonbg=None, max_total_bg=10000)

dataset loaded from xray_data\dataset, 12215 samples


100%|██████████| 1000/1000 [6:44:10<00:00, 24.25s/it] 


created 22537 hard negatives
dataset saved to xray_data\dataset
distribution of classes in the original training set:
  Hammer: 857
  Knife: 1115
  Gun: 1310
  Wrench: 1033
  HandCuffs: 931
  Bullet: 969
  Background: 6000
distribution of classes in the hard negatives that were added:
  Hammer: 3287
  Knife: 2570
  Gun: 1697
  Wrench: 2459
  HandCuffs: 2002
  Bullet: 522
  Background: 10000
distribution of classes in the new training set:
  Hammer: 4144
  Knife: 3685
  Gun: 3007
  Wrench: 3492
  HandCuffs: 2933
  Bullet: 1491
  Background: 16000
retraining on 34752 samples...
classifier saved to models


(XGBClassifier(base_score=None, booster=None, callbacks=None,
               colsample_bylevel=None, colsample_bynode=None,
               colsample_bytree=None, device=None, early_stopping_rounds=None,
               enable_categorical=False, eval_metric='mlogloss',
               feature_types=None, feature_weights=None, gamma=None,
               grow_policy=None, importance_type=None,
               interaction_constraints=None, learning_rate=None, max_bin=None,
               max_cat_threshold=None, max_cat_to_onehot=None,
               max_delta_step=None, max_depth=None, max_leaves=None,
               min_child_weight=None, missing=nan, monotone_constraints=None,
               multi_strategy=None, n_estimators=None, n_jobs=None, num_class=7, ...),
 StandardScaler(),
 PCA(n_components=0.95),
 StandardScaler(),
 MiniBatchKMeans(batch_size=1000, n_clusters=200, random_state=42))

In [10]:
validate_reports = validate_pipeline(models_dir, images_dir=val_images, labels_dir=val_labels, nb_samples=1, use_sift=True)

pass 1/1: extracting features...


100%|██████████| 900/900 [01:39<00:00,  9.00it/s]


class 0 (Hammer): 181 samples
class 1 (Knife): 218 samples
class 2 (Gun): 266 samples
class 3 (Wrench): 220 samples
class 4 (HandCuffs): 213 samples
class 5 (Bullet): 222 samples
class 6 (Background): 900 samples
              precision    recall  f1-score   support

      Hammer       0.63      0.56      0.59       181
       Knife       0.63      0.43      0.51       218
         Gun       0.85      0.52      0.64       266
      Wrench       0.69      0.39      0.49       220
   HandCuffs       0.79      0.85      0.82       213
      Bullet       0.97      0.42      0.58       222
  Background       0.68      0.99      0.81       900

    accuracy                           0.71      2220
   macro avg       0.75      0.59      0.64      2220
weighted avg       0.73      0.71      0.69      2220



In [11]:
classifier = load_classifier(models_dir)
score = evaluate_on_dataset(val_images, val_labels, classifier, nb_of_images=100)
print(f"validation score: {score:.4f}")

100%|██████████| 100/100 [16:25<00:00,  9.85s/it]

validation score: 0.2483





NB : The following cell can take a few hours to run.

In [12]:
generate_submission(test_images, models_dir, output_csv="submission.csv")

loading classifier...
running inference...


inference: 100%|██████████| 900/900 [1:59:39<00:00,  7.98s/it]


submission saved to submission.csv
inference completed.
