-
Notifications
You must be signed in to change notification settings - Fork 14
/
_colorhist.py
66 lines (54 loc) · 1.93 KB
/
_colorhist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import numpy as np
from embetter.base import EmbetterBase
class ColorHistogramEncoder(EmbetterBase):
"""
Encoder that generates an embedding based on the color histogram of the image.
Arguments:
n_buckets: number of buckets per color
**Usage**:
```python
import pandas as pd
from sklearn.pipeline import make_pipeline
from embetter.grab import ColumnGrabber
from embetter.vision import ImageLoader, ColorHistogramEncoder
# Let's say we start we start with a csv file with filepaths
data = {"filepaths": ["tests/data/thiscatdoesnotexist.jpeg"]}
df = pd.DataFrame(data)
# Let's build a pipeline that grabs the column, turns it
# into an image and embeds it.
pipe = make_pipeline(
ColumnGrabber("filepaths"),
ImageLoader(),
ColorHistogramEncoder()
)
# This pipeline can now encode each image in the dataframe
pipe.fit_transform(df)
```
"""
def __init__(self, n_buckets=256):
self.n_buckets = n_buckets
def transform(self, X, y=None):
"""
Takes a sequence of `PIL.Image` and returns a numpy array representing
a color histogram for each.
"""
output = np.zeros((len(X), self.n_buckets * 3))
for i, x in enumerate(X):
arr = np.array(x)
output[i, :] = np.concatenate(
[
np.histogram(
arr[:, :, 0].flatten(),
bins=np.linspace(0, 255, self.n_buckets + 1),
)[0],
np.histogram(
arr[:, :, 1].flatten(),
bins=np.linspace(0, 255, self.n_buckets + 1),
)[0],
np.histogram(
arr[:, :, 2].flatten(),
bins=np.linspace(0, 255, self.n_buckets + 1),
)[0],
]
)
return output