Skip to content
Permalink
Browse files

Fix #6, a couple of bugs remaining

  • Loading branch information...
sdpython committed Jan 3, 2019
1 parent 0de24c1 commit 3665d99bd7108d8dcba053a2eac3eaf6343587a0
@@ -30,7 +30,7 @@
import src


class TestSearchPredictionsImages(ExtTestCase):
class TestSearchPredictionsImagesKeras(ExtTestCase):

def test_search_predictions_keras(self):
fLOG(
@@ -0,0 +1,110 @@
# -*- coding: utf-8 -*-
"""
@brief test log(time=10s)
"""

import sys
import os
import unittest
import warnings
from contextlib import redirect_stderr
from io import StringIO
import pandas
import numpy
from pyquickhelper.loghelper import fLOG
from pyquickhelper.pycode import ExtTestCase, get_temp_folder
from pyquickhelper.filehelper import unzip_files


try:
import src
except ImportError:
path = os.path.normpath(
os.path.abspath(
os.path.join(
os.path.split(__file__)[0],
"..",
"..")))
if path not in sys.path:
sys.path.append(path)
import src


class TestSearchPredictionsImagesTorch(ExtTestCase):

def test_search_predictions_torch(self):
fLOG(
__file__,
self._testMethodName,
OutputPrint=__name__ == "__main__")

from src.mlinsights.search_rank import SearchEnginePredictionImages

# We delay the import as keras backend is not necessarily available.
with redirect_stderr(StringIO()):
try:
import torchvision.models as tmodels
except (SyntaxError, ModuleNotFoundError) as e:
warnings.warn(
"torch is not available: {0}".format(e))
return
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# deep learning model
model = tmodels.squeezenet1_0(pretrained=True)

# images
temp = get_temp_folder(__file__, "temp_search_predictions_torch")
dest = os.path.join(temp, "simages")
os.mkdir(dest)
zipname = os.path.join(
temp, "..", "..", "..", "_doc", "notebooks", "data", "dog-cat-pixabay.zip")
files = unzip_files(zipname, where_to=dest)
self.assertTrue(len(files) > 0)

# sequence of images
trans = transforms.Compose([transforms.Resize((224, 224)),
transforms.CenterCrop(224),
transforms.ToTensor()])
imgs_ = datasets.ImageFolder(temp, trans)
dataloader = DataLoader(imgs_, batch_size=1,
shuffle=False, num_workers=1)
img_seq = iter(dataloader)
imgs = list(img[0] for img in img_seq)

# search
se = SearchEnginePredictionImages(model, n_neighbors=5)
r = repr(se)
self.assertIn("SearchEnginePredictionImages", r)

# fit
se.fit(imgs_, fLOG=fLOG)

# neighbors
score, ind, meta = se.kneighbors(imgs[0])

# assert
self.assertIsInstance(ind, (list, numpy.ndarray))
self.assertEqual(len(ind), 5)
self.assertEqual(ind[0], 0)

self.assertIsInstance(score, numpy.ndarray)
self.assertEqual(score.shape, (5,))
self.assertEqual(score[0], 0)

self.assertIsInstance(meta, (numpy.ndarray, pandas.DataFrame))
self.assertEqual(meta.shape, (5, 1))
self.assertEqual(meta.iloc[0, 0].replace('\\', '/'),
'simages/cat-1151519__480.jpg')

# neighbors 2
score, ind, meta = se.kneighbors(imgs)

self.assertIsInstance(ind, (list, numpy.ndarray))
self.assertIsInstance(score, numpy.ndarray)
self.assertIsInstance(meta, (numpy.ndarray, pandas.DataFrame))


if __name__ == "__main__":
unittest.main()
@@ -39,6 +39,11 @@ def model_featurizer(model, **params):
return model_featurizer_keras(model, **params)
else:
tried.append("Keras")
if hasattr(model, "forward"):
# It should be a torch model.
return model_featurizer_torch(model, **params)
else:
tried.append("torch")
raise FeaturizerTypeError("Unable to process type '{0}', allowed:\n{1}".format(
type(model), "\n".join(sorted(str(_) for _ in tried))))

@@ -176,3 +181,49 @@ def feat(X, model, many, shapes):
return wrap_predict_keras(X, model.predict, many, shapes)

return lambda X, many, model=model, shapes=model._feed_input_shapes[0]: feat(X, model, many, shapes)


def wrap_predict_torch(X, fct, many, shapes):
"""
Checks types and dimension.
Calls *fct* and returns the approriate type.
A vector if *X* is a vector, the raw output
otherwise.
@param X vector or list
@param fct function
@param many many observations or just one
@param shapes expected input shapes for the neural network
"""
if many:
y = [fct(X[i]).ravel() for i in range(X.shape[0])]
return numpy.stack(y)
else:
if shapes is None or len(X.shape) == len(shapes):
t = fct(X)
return t.detach().numpy().ravel()
else:
x = X[numpy.newaxis, :, :, :]
t = fct(x)
return t.detach().numpy().ravel()


def model_featurizer_torch(model, layer=None):
"""
Builds a featurizer from a :epkg:`torch` model
It returns a function which returns the output of one
particular layer.
@param model model to use to featurize a vector
@param layer number of layers to keep
@return function
"""
if layer is not None:
output = model.layers[layer].output
model = model.__class__(model.input, output)

def feat(X, model, many, shapes):
"wraps torch"
return wrap_predict_torch(X, model.forward, many, shapes)

return lambda X, many, model=model, shapes=None: feat(X, model, many, shapes)
@@ -29,7 +29,8 @@ def __init__(self, fct, fct_params=None, **knn):
super().__init__(**knn)
self._fct_params = fct_params
self._fct_init = fct
if callable(fct) and not hasattr(fct, 'predict'):
if callable(fct) and not hasattr(fct, 'predict') and \
not hasattr(fct, 'forward'):
self.fct = fct
else:
if fct_params is None:
@@ -15,7 +15,8 @@ class SearchEnginePredictionImages(SearchEnginePredictions):
:ref:`searchimageskerasrst` or :ref:`searchimagestorchrst`.
"""

def _prepare_fit(self, data=None, features=None, metadata=None, transform=None, n=None, fLOG=None):
def _prepare_fit(self, data=None, features=None, metadata=None,
transform=None, n=None, fLOG=None):
"""
Stores data in the class itself.
@@ -29,36 +30,59 @@ def _prepare_fit(self, data=None, features=None, metadata=None, transform=None,
@param n takes *n* images (or ``len(iter_images)``)
@param fLOG logging function
"""
iter_images = data
# We delay the import as keras backend is not necessarily installed.
from keras.preprocessing.image import Iterator
from keras_preprocessing.image import DirectoryIterator, NumpyArrayIterator
if not isinstance(iter_images, (Iterator, DirectoryIterator, NumpyArrayIterator)):
raise NotImplementedError(
"iter_images must be a keras Iterator. No option implemented for type {0}.".format(type(iter_images)))
if iter_images.batch_size != 1:
raise ValueError("batch_size must be 1 not {0}".format(
iter_images.batch_size))
self.iter_images_ = iter_images
if n is None:
n = len(iter_images)
if not hasattr(iter_images, "filenames"):
raise NotImplementedError(
"Iterator does not iterate on images but numpy arrays (not implemented).")
if "torch" in str(type(data)):
from torch.utils.data import DataLoader
dataloader = DataLoader(
data, batch_size=1, shuffle=False, num_workers=1)
self.iter_images_ = iter_images = iter(
zip(dataloader, data.samples))
if n is None:
n = len(data)
elif "keras" in str(type(data)):
iter_images = data
# We delay the import as keras backend is not necessarily installed.
from keras.preprocessing.image import Iterator
from keras_preprocessing.image import DirectoryIterator, NumpyArrayIterator
if not isinstance(iter_images, (Iterator, DirectoryIterator, NumpyArrayIterator)):
raise NotImplementedError(
"iter_images must be a keras Iterator. No option implemented for type {0}.".format(type(iter_images)))
if iter_images.batch_size != 1:
raise ValueError("batch_size must be 1 not {0}".format(
iter_images.batch_size))
self.iter_images_ = iter_images
if n is None:
n = len(iter_images)
if not hasattr(iter_images, "filenames"):
raise NotImplementedError(
"Iterator does not iterate on images but numpy arrays (not implemented).")
else:
raise TypeError("Unexpected data type {0}.".format(type(data)))

def get_current_index(flow):
"get current index"
return flow.index_array[(flow.batch_index + flow.n - 1) % flow.n]

def iterator_feature_meta():
"iterators on metadaat"
for i, im in zip(range(n), iter_images):
name = iter_images.filenames[get_current_index(iter_images)]
yield im[0], dict(name=name)
"iterators on metadata"
def accessor(iter_images):
if hasattr(iter_images, 'filenames'):
# keras
return (lambda i, ite: (ite, iter_images.filenames[get_current_index(iter_images)]))
else:
# torch
return (lambda i, ite: (ite[0], ite[1][0]))
acc = accessor(iter_images)

for i, it in zip(range(n), iter_images):
im, name = acc(i, it)
if not isinstance(name, str):
print(name)
raise TypeError(
"name should be a string, not {0}".format(type(name)))
yield im[0], dict(name=name, i=i)
if fLOG and i % 10000 == 0:
fLOG(
'[SearchEnginePredictionImages.fit] i={}/{} - {}'.format(i, n, name))

super()._prepare_fit(data=iterator_feature_meta(), transform=transform)

def fit(self, iter_images, n=None, fLOG=None):
@@ -87,15 +111,23 @@ def kneighbors(self, iter_images, n_neighbors=None):
*meta* is the metadata.
"""
# We delay the import as keras backend is not necessarily installed.
from keras.preprocessing.image import Iterator
from keras_preprocessing.image import DirectoryIterator, NumpyArrayIterator
if not isinstance(iter_images, (Iterator, DirectoryIterator, NumpyArrayIterator)):
raise NotImplementedError(
"iter_images must be a keras Iterator. No option implemented for type {0}.".format(type(iter_images)))
if iter_images.batch_size != 1:
raise ValueError("batch_size must be 1 not {0}".format(
iter_images.batch_size))
for img in iter_images:
X = img[0]
break
return super().kneighbors(X, n_neighbors=n_neighbors)
if "keras" in str(iter_images):
# keras, it expects an iterator
from keras.preprocessing.image import Iterator
from keras_preprocessing.image import DirectoryIterator, NumpyArrayIterator
if not isinstance(iter_images, (Iterator, DirectoryIterator, NumpyArrayIterator)):
raise NotImplementedError(
"iter_images must be a keras Iterator. No option implemented for type {0}.".format(type(iter_images)))
if iter_images.batch_size != 1:
raise ValueError("batch_size must be 1 not {0}".format(
iter_images.batch_size))
for img in iter_images:
X = img[0]
break
return super().kneighbors(X, n_neighbors=n_neighbors)
elif "torch" in str(type(iter_images)):
# torch: it expects a tensor
X = iter_images
return super().kneighbors(X, n_neighbors=n_neighbors)
else:
raise TypeError("Unexpected type {0}".format(type(iter_images)))

0 comments on commit 3665d99

Please sign in to comment.
You can’t perform that action at this time.