Skip to content

Commit

Permalink
base API: train/test split, classifier (+ others), CLI, tests
Browse files Browse the repository at this point in the history
  • Loading branch information
martibosch committed Nov 14, 2019
1 parent b251aec commit f4fd9e7
Show file tree
Hide file tree
Showing 53 changed files with 2,373 additions and 15 deletions.
4 changes: 4 additions & 0 deletions detectree/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
from .classifier import Classifier, ClassifierTrainer
from .train_test_split import TrainingSelector
from .utils import split_into_tiles

__version__ = '0.0.1'
414 changes: 414 additions & 0 deletions detectree/classifier.py

Large diffs are not rendered by default.

Empty file added detectree/cli/__init__.py
Empty file.
311 changes: 311 additions & 0 deletions detectree/cli/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,311 @@
import logging
from os import path

import click
import joblib
import pandas as pd

import detectree as dtr


# utils for the CLI
class OptionEatAll(click.Option):
# Option that can take an unlimided number of arguments
# Copied from Stephen Rauch's answer in stack overflow.
# https://bit.ly/2kstLhe
def __init__(self, *args, **kwargs):
self.save_other_options = kwargs.pop('save_other_options', True)
nargs = kwargs.pop('nargs', -1)
assert nargs == -1, 'nargs, if set, must be -1 not {}'.format(nargs)
super(OptionEatAll, self).__init__(*args, **kwargs)
self._previous_parser_process = None
self._eat_all_parser = None

def add_to_parser(self, parser, ctx):
def parser_process(value, state):
# method to hook to the parser.process
done = False
value = [value]
if self.save_other_options:
# grab everything up to the next option
while state.rargs and not done:
for prefix in self._eat_all_parser.prefixes:
if state.rargs[0].startswith(prefix):
done = True
if not done:
value.append(state.rargs.pop(0))
else:
# grab everything remaining
value += state.rargs
state.rargs[:] = []
value = tuple(value)

# call the actual process
self._previous_parser_process(value, state)

retval = super(OptionEatAll, self).add_to_parser(parser, ctx)
for name in self.opts:
our_parser = parser._long_opt.get(name) or parser._short_opt.get(
name)
if our_parser:
self._eat_all_parser = our_parser
self._previous_parser_process = our_parser.process
our_parser.process = parser_process
break
return retval


def _dict_from_kws(kws):
# Multiple key:value pair arguments in click, see https://bit.ly/32BaES3
if kws is not None:
kws = dict(kw.split(':') for kw in kws)
else:
kws = {}

return kws


def _init_classifier_trainer(pixel_features_builder_kws,
pixel_response_builder_kws, adaboost_kws,
num_estimators):
pixel_features_builder_kws = _dict_from_kws(pixel_features_builder_kws)
pixel_response_builder_kws = _dict_from_kws(pixel_response_builder_kws)
adaboost_kws = _dict_from_kws(adaboost_kws)

return dtr.ClassifierTrainer(
num_estimators=num_estimators,
pixel_features_builder_kws=pixel_features_builder_kws,
pixel_response_builder_kws=pixel_response_builder_kws,
adaboost_kws=adaboost_kws)


def _dump_clf(clf, output_filepath, logger):
joblib.dump(clf, output_filepath)
logger.info("Dumped trained classifier to %s", output_filepath)


# CLI
@click.group()
@click.version_option(version=dtr.__version__, message="%(version)s")
@click.pass_context
def cli(ctx):
log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
logging.basicConfig(level=logging.INFO, format=log_fmt)
logger = logging.getLogger(__name__)
ctx.ensure_object(dict)
ctx.obj['LOGGER'] = logger


@cli.command()
@click.pass_context
@click.option('--img-filepaths', cls=OptionEatAll)
@click.option('--img-dir', type=click.Path(exists=True))
@click.option('--img-filename-pattern')
@click.option('--gabor-frequencies', cls=OptionEatAll)
@click.option('--gabor-num-orientations', cls=OptionEatAll)
@click.option('--response-bins-per-axis', type=int)
@click.option('--num-color-bins', type=int)
@click.option('--method')
@click.option('--num-components', type=int)
@click.option('--num-img-clusters', type=int)
@click.option('--train-prop', type=float)
@click.option('--output-filepath', type=click.Path())
def train_test_split(ctx, img_filepaths, img_dir, img_filename_pattern,
gabor_frequencies, gabor_num_orientations,
response_bins_per_axis, num_color_bins, method,
num_components, num_img_clusters, train_prop,
output_filepath):
logger = ctx.obj['LOGGER']

ts = dtr.TrainingSelector(img_filepaths=img_filepaths, img_dir=img_dir,
img_filename_pattern=img_filename_pattern,
gabor_frequencies=gabor_frequencies,
gabor_num_orientations=gabor_num_orientations,
response_bins_per_axis=response_bins_per_axis,
num_color_bins=num_color_bins)
num_imgs = len(ts.img_filepaths)
if img_dir is not None:
logger.info("Loaded %d images from %s", num_imgs, img_dir)
else:
logger.info("Loaded %d images", num_imgs)

tts_kws = {}
if method is not None:
tts_kws['method'] = method
if num_components is not None:
tts_kws['num_components'] = num_components
if num_img_clusters is not None:
tts_kws['num_img_clusters'] = num_img_clusters
if train_prop is not None:
tts_kws['train_prop'] = train_prop
df, evr = ts.train_test_split(return_evr=True, **tts_kws)
logger.info("Variance ratio explained by PCA: %f", evr)

if output_filepath is None:
output_filepath = 'split.csv'

df.to_csv(output_filepath)
logger.info("Dumped train/test split to %s", output_filepath)


@cli.command()
@click.pass_context
@click.option('--split-filepath', type=click.Path(exists=True))
@click.option('--response-img-dir', type=click.Path(exists=True))
@click.option('--img-filepaths', cls=OptionEatAll)
@click.option('--response-img-filepaths', cls=OptionEatAll)
@click.option('--img-dir', type=click.Path(exists=True))
@click.option('--img-filename-pattern')
@click.option('--method')
@click.option('--img-cluster', type=int)
@click.option('--num-estimators', type=int)
@click.option('--pixel-features-builder-kws', cls=OptionEatAll)
@click.option('--pixel-response-builder-kws', cls=OptionEatAll)
@click.option('--adaboost-kws', cls=OptionEatAll)
@click.option('--output-filepath', type=click.Path())
def train_classifier(ctx, split_filepath, response_img_dir, img_filepaths,
response_img_filepaths, img_dir, img_filename_pattern,
method, img_cluster, num_estimators,
pixel_features_builder_kws, pixel_response_builder_kws,
adaboost_kws, output_filepath):
logger = ctx.obj['LOGGER']

logger.info("Training classifier")
if split_filepath is not None:
split_df = pd.read_csv(split_filepath)
else:
split_df = None

ct = _init_classifier_trainer(pixel_features_builder_kws,
pixel_response_builder_kws, adaboost_kws,
num_estimators)

clf = ct.train_classifier(split_df=split_df,
response_img_dir=response_img_dir,
img_filepaths=img_filepaths,
response_img_filepaths=response_img_filepaths,
img_dir=img_dir,
img_filename_pattern=img_filename_pattern,
method=method, img_cluster=img_cluster)

if output_filepath is None:
output_filepath = 'clf.joblib'

_dump_clf(clf, output_filepath, logger)


@cli.command()
@click.pass_context
@click.argument('split_filepath', type=click.Path(exists=True))
@click.argument('response_img_dir', type=click.Path(exists=True))
@click.option('--num-estimators', type=int)
@click.option('--pixel-features-builder-kws', cls=OptionEatAll)
@click.option('--pixel-response-builder-kws', cls=OptionEatAll)
@click.option('--adaboost-kws', cls=OptionEatAll)
@click.option('--output-dir', type=click.Path(exists=True))
def train_classifiers(ctx, split_filepath, response_img_dir, num_estimators,
pixel_features_builder_kws, pixel_response_builder_kws,
adaboost_kws, output_dir):
logger = ctx.obj['LOGGER']

logger.info("Training classifiers")
if split_filepath is not None:
split_df = pd.read_csv(split_filepath)
else:
split_df = None

ct = _init_classifier_trainer(pixel_features_builder_kws,
pixel_response_builder_kws, adaboost_kws,
num_estimators)

clfs_dict = ct.train_classifiers(split_df, response_img_dir)

if output_dir is None:
output_dir = ''

for img_cluster in clfs_dict:
_dump_clf(clfs_dict[img_cluster],
path.join(output_dir, f"{img_cluster}.joblib"), logger)


@cli.command()
@click.pass_context
@click.argument('img_filepath', type=click.Path(exists=True))
@click.argument('clf_filepath', type=click.Path(exists=True))
@click.option('--tree-val', type=int)
@click.option('--nontree-val', type=int)
@click.option('--refine', is_flag=True)
@click.option('--refine-beta', type=int)
@click.option('--refine-int-rescale', type=int)
@click.option('--pixel-features-builder-kws', cls=OptionEatAll)
@click.option('--output-filepath', type=click.Path())
def classify_img(ctx, img_filepath, clf_filepath, tree_val, nontree_val,
refine, refine_beta, refine_int_rescale,
pixel_features_builder_kws, output_filepath):
logger = ctx.obj['LOGGER']

logger.info("Classifying %s with classifier of %s", img_filepath,
clf_filepath)

pixel_features_builder_kws = _dict_from_kws(pixel_features_builder_kws)
c = dtr.Classifier(tree_val=tree_val, nontree_val=nontree_val,
refine=refine, refine_beta=refine_beta,
refine_int_rescale=refine_int_rescale,
**pixel_features_builder_kws)

if output_filepath is None:
filename, ext = path.splitext(path.basename(img_filepath))
output_filepath = f"{filename}-pred{ext}"

c.classify_img(img_filepath, joblib.load(clf_filepath), output_filepath)
logger.info("Dumped predicted image to %s", output_filepath)


@cli.command()
@click.pass_context
@click.argument('split_filepath', type=click.Path(exists=True))
@click.option('--clf-filepath', type=click.Path(exists=True))
@click.option('--clf-dir', type=click.Path(exists=True))
@click.option('--method')
@click.option('--img-cluster', type=int)
@click.option('--tree-val', type=int)
@click.option('--nontree-val', type=int)
@click.option('--refine', is_flag=True)
@click.option('--refine-beta', type=int)
@click.option('--refine-int-rescale', type=int)
@click.option('--pixel-features-builder-kws', cls=OptionEatAll)
@click.option('--output-dir', type=click.Path(exists=True))
def classify_imgs(ctx, split_filepath, clf_filepath, clf_dir, method,
img_cluster, tree_val, nontree_val, refine, refine_beta,
refine_int_rescale, pixel_features_builder_kws, output_dir):
logger = ctx.obj['LOGGER']

split_df = pd.read_csv(split_filepath)

if clf_filepath is not None:
clf_dict = None
clf = joblib.load(clf_filepath)
logger.info("Classifying images from %s with classifier of %s",
split_filepath, clf_filepath)

if clf_dir is not None:
clf = None
clf_dict = {}
for img_cluster in split_df['img_cluster'].unique():
clf_dict[img_cluster] = joblib.load(
path.join(clf_dir, f"{img_cluster}.joblib"))

pixel_features_builder_kws = _dict_from_kws(pixel_features_builder_kws)

c = dtr.Classifier(tree_val=tree_val, nontree_val=nontree_val,
refine=refine, refine_beta=refine_beta,
refine_int_rescale=refine_int_rescale,
**pixel_features_builder_kws)

if output_dir is None:
output_dir = ''

pred_imgs = c.classify_imgs(split_df, output_dir, clf=clf,
clf_dict=clf_dict, method=method,
img_cluster=img_cluster)
logger.info("Dumped %d predicted images to %s", len(pred_imgs), output_dir)
59 changes: 59 additions & 0 deletions detectree/filters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import numpy as np
from scipy.ndimage.filters import _gaussian_kernel1d
from skimage.filters import gabor_kernel

__all__ = ['get_texture_kernel', 'get_gabor_filter_bank']


def _get_gaussian_kernel1d(sigma, order=0, truncate=4.0):
"""
Based on scipy.ndimage.filters.gaussian_filter1d
"""
truncate = 4
sd = float(sigma)
# make the radius of the filter equal to truncate standard deviations
lw = int(truncate * sd + 0.5)
# # Since we are calling correlate, not convolve, revert the kernel
# weights = _gaussian_kernel1d(sigma, order, lw)[::-1]
weights = _gaussian_kernel1d(sigma, order, lw)
return weights


def get_texture_kernel(sigma):
"""
Parameters
----------
sigma : numeric
Returns
-------
texture_kernel :
"""
g0_kernel_arr = _get_gaussian_kernel1d(sigma, 0)
g2_kernel_arr = _get_gaussian_kernel1d(sigma, 2)

return np.dot(g2_kernel_arr.reshape(1, -1).T, g0_kernel_arr.reshape(1, -1))


def get_gabor_filter_bank(frequencies, num_orientations):
"""
Parameters
----------
frequencies : list-like
num_orientations : int or list-like
Returns
-------
kernels :
"""
kernels = []

for frequency, _num_orientations in zip(frequencies, num_orientations):
for orientation_i in range(_num_orientations):
theta = orientation_i / _num_orientations * np.pi
kernel = np.real(gabor_kernel(frequency, theta=theta))
kernels.append(kernel)

return kernels
Loading

0 comments on commit f4fd9e7

Please sign in to comment.