In [14]:
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.debug("test")

DEBUG:root:test


In [15]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import time

In [16]:
from src.data.make_dataset import make_dataset
from src.data.dataset_loaders import AerialCarsDatasetLoader
from src.data.image_loaders import AerialCarsImageLoader
from src.features.pipelines import RawImageToFeatures
from src.features.processors import RotateToHorizontal, Resize
from src.features.descriptors import HOGDescriptor, LBPDescriptor

## Synchronous approach

In [17]:
start_time = time.time()

data = make_dataset(
    input_folder_filepath='../data/interim/aerial-cars-dataset/',
    output_filepath='output_path',
    dataset_loader=AerialCarsDatasetLoader(
        image_loader=AerialCarsImageLoader()
    ),
    images_files_types=('jpg', 'JPG', 'png'),
    annotations_files_types=('txt',),
    process_pipeline=RawImageToFeatures(
        processors=[
            RotateToHorizontal(),
            Resize(
                output_size=(32, 64)
            )
        ],
        descriptors=[
            HOGDescriptor(
                orientations = 9,
                cells_per_block = (2, 2),
                pixels_per_cell = (4, 4),
                multichannel = True,
                visualize = True
            ),
            LBPDescriptor(
                bins = 256,
                range = (0, 256)
            )
        ]
    ),
    negative_images_size=(32, 64),
    negative_examples_size=0.5,
    workers=0,
    verbose=True
)

print()
print("--- %s seconds ---" % (time.time() - start_time))

INFO:src.data.make_dataset:Reading images...
100%|██████████| 31/31 [00:00<00:00, 38.63it/s]
INFO:src.data.make_dataset:Croping cars...
100%|██████████| 15/15 [00:00<00:00, 506.57it/s]
INFO:src.data.make_dataset:Cropping random boxes...
100%|██████████| 314/314 [00:00<00:00, 15210.62it/s]
INFO:src.data.make_dataset:Processing images...
100%|██████████| 628/628 [00:26<00:00, 23.90it/s]
--- 27.144628763198853 seconds ---



In [18]:
X = list(map(lambda x: x[1], data))
y = list(map(lambda x: x[2], data))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
clf = make_pipeline(SVC(gamma='auto'))
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.84      0.91      0.87        96
           1       0.91      0.85      0.88       112

    accuracy                           0.88       208
   macro avg       0.88      0.88      0.87       208
weighted avg       0.88      0.88      0.88       208

              precision    recall  f1-score   support

           0       0.95      0.98      0.96        96
           1       0.98      0.96      0.97       112

    accuracy                           0.97       208
   macro avg       0.97      0.97      0.97       208
weighted avg       0.97      0.97      0.97       208



## Parallel approach

In [21]:
start_time = time.time()

data = make_dataset(
    input_folder_filepath='../data/interim/aerial-cars-dataset/',
    output_filepath='output_path',
    dataset_loader=AerialCarsDatasetLoader(
        image_loader=AerialCarsImageLoader()
    ),
    images_files_types=('jpg', 'JPG', 'png'),
    annotations_files_types=('txt',),
    process_pipeline=RawImageToFeatures(
        processors=[
            RotateToHorizontal(),
            Resize(
                output_size=(32, 64)
            )
        ],
        descriptors=[
            HOGDescriptor(
                orientations = 9,
                cells_per_block = (2, 2),
                pixels_per_cell = (4, 4),
                multichannel = True,
                visualize = True
            ),
            LBPDescriptor(
                bins = 256,
                range = (0, 256)
            )
        ]
    ),
    negative_images_size=(32, 64),
    negative_examples_size=0.5,
    workers=7,
    verbose=True
)

print()
print("--- %s seconds ---" % (time.time() - start_time))

INFO:src.data.make_dataset:Reading images...
100%|██████████| 31/31 [00:00<00:00, 41.20it/s]
INFO:src.data.make_dataset:Croping cars...
100%|██████████| 15/15 [00:00<00:00, 589.59it/s]
INFO:src.data.make_dataset:Cropping random boxes...
100%|██████████| 314/314 [00:00<00:00, 14922.46it/s]
INFO:src.data.make_dataset:Processing images...

--- 14.50227427482605 seconds ---


In [22]:
X = list(map(lambda x: x[1], data))
y = list(map(lambda x: x[2], data))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
clf = make_pipeline(SVC(gamma='auto'))
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.77      0.98      0.86        96
           1       0.98      0.75      0.85       112

    accuracy                           0.86       208
   macro avg       0.87      0.86      0.86       208
weighted avg       0.88      0.86      0.85       208

              precision    recall  f1-score   support

           0       0.95      0.98      0.96        96
           1       0.98      0.96      0.97       112

    accuracy                           0.97       208
   macro avg       0.97      0.97      0.97       208
weighted avg       0.97      0.97      0.97       208

