Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

Commit

Permalink
Improve documentation and the checking of parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
rragundez committed Jan 20, 2019
1 parent 427f106 commit 611f605
Showing 1 changed file with 26 additions and 9 deletions.
35 changes: 26 additions & 9 deletions keras_preprocessing/image/dataframe_iterator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from __future__ import print_function

import os
import warnings

import numpy as np
from pandas.api.types import is_numeric_dtype

Expand All @@ -21,9 +23,18 @@ class DataFrameIterator(Iterator):
# Arguments
dataframe: Pandas dataframe containing the filepaths relative to
`directory` or absolute paths if `directory` is None of the images
in a column and classes in another column/s that can be fed as raw
target data.
`directory` (or absolute paths if `directory` is None) of the
images in a string column. It should include other column/s
depending on the `class_mode`:
- if `class_mode` is `"categorical"` (default value) it must
include the `y_col` column with the class/es of each image.
Values in column can be string/list/tuple if a single class
or list/tuple if multiple classes.
- if `class_mode` is `"binary"` or `"sparse"` it must include
the given `y_col` column with class values as strings.
- if `class_mode` is `"other"` it should contain the columns
specified in `y_col`.
- if `class_mode` is `input` or None no extra column is needed.
directory: string, path to the directory to read images from. Directory to
under which all the images are present. If None, data in `x_col` column
should be absolute paths.
Expand Down Expand Up @@ -159,26 +170,32 @@ def _check_params(self, df, x_col, y_col, classes):
# check labels are string or numeric if class_mode is binary or sparse
# Note: categorical class_mode supports numeric, string, list and tuple
if self.class_mode in {'binary', 'sparse'}:
if not (is_numeric_dtype(df[y_col]) or
all(df[y_col].apply(lambda x: isinstance(x, str)))):
if not all(df[y_col].apply(lambda x: isinstance(x, str))):
raise TypeError('If class_mode="{}", y_col="{}" column '
'values must be numeric or string. '
'values must be strings.'
.format(self.class_mode, y_col))
# check that if binary there are only 2 different classes
if self.class_mode == 'binary':
if classes:
classes = set(classes)
if len(classes) != 2:
raise ValueError('If class_mode="binary" there must be 2 '
'classes. {} class(es) were given.'
'classes. {} class/es were given.'
.format(len(classes)))
elif df[y_col].nunique() != 2:
raise ValueError('If class_mode="binary" there must be 2 classes. '
'Found {} classes.'.format(df[y_col].nunique()))

if self.class_mode == 'categorical':
types = (str, list, tuple)
if not all(df[y_col].apply(lambda x: isinstance(x, types))):
raise TypeError('If class_mode="{}", y_col="{}" column '
'values must be type string, list or tuple.'
.format(self.class_mode, y_col))
# check that no classes are given if class_mode other or input
if classes and self.class_mode in {"other", "input", None}:
raise ValueError('classes cannot be set if class_mode'
' is either "other" or "input" or None.')
warnings.warn('`classes` will be ignored given the class_mode="{}"'
.format(self.class_mode))

def get_classes(self, df, y_col):
labels = []
Expand Down

0 comments on commit 611f605

Please sign in to comment.