Skip to content

Commit

Permalink
lint: flake8
Browse files Browse the repository at this point in the history
  • Loading branch information
paxcema committed Dec 22, 2023
1 parent c415465 commit a523ffa
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 11 deletions.
1 change: 0 additions & 1 deletion tests/integration_tests/test_rule_based.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ def test_1_stack_overflow_survey(self):
df = pd.read_csv("tests/data/stack_overflow_survey_sample.csv")
config = {'engine': 'rule_based', 'pct_invalid': 0, 'seed': 420, 'mp_cutoff': 1e4}


expected_types = {
'Respondent': 'integer',
'Professional': 'binary',
Expand Down
1 change: 1 addition & 0 deletions tests/unit_tests/rule_based/test_infer_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

get_column_data_type = RuleBasedEngine.get_column_data_type


class TestInferDtypes(unittest.TestCase):
def test_negative_integers(self):
data = pd.DataFrame([-random.randint(-10, 10) for _ in range(100)], columns=['test_col'])
Expand Down
4 changes: 2 additions & 2 deletions type_infer/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def __init__(self):


class BaseEngine:
def __init__(self, stable = True):
def __init__(self, stable=True):
self.stable = stable # whether the engine is stable or not (i.e. experimental)

def infer(self, df) -> TypeInformation:
Expand All @@ -36,4 +36,4 @@ def infer(self, df) -> TypeInformation:


class ENGINES:
RULE_BASED = 'rule_based'
RULE_BASED = 'rule_based'
16 changes: 8 additions & 8 deletions type_infer/rule_based/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@


class RuleBasedEngine(BaseEngine):
def __init__(self, config = None):
def __init__(self, config=None):
"""
:param config: a dictionary containing the configuration for the engine
pct_invalid : float
Expand Down Expand Up @@ -96,9 +96,9 @@ def infer(self, data: pd.DataFrame) -> TypeInformation:
# @TODO Column removal logic was here, if the column was an identifier, move it elsewhere
return type_information


# @TODO: hardcode for distance, time, subunits of currency (e.g. cents) and other common units
# @TODO: Add tests with plenty of examples

def get_quantity_col_info(self, col_data: pd.Series) -> str:
assert isinstance(col_data, pd.Series)
char_const = None
Expand Down Expand Up @@ -134,7 +134,6 @@ def get_quantity_col_info(self, col_data: pd.Series) -> str:
else:
return False, None


def get_binary_type(self, element: object) -> str:
try:
is_img = imghdr.what(element)
Expand All @@ -153,7 +152,6 @@ def get_binary_type(self, element: object) -> str:
# Not a file or file doesn't exist
return None


def get_numeric_type(self, element: object) -> str:
""" Returns the subtype inferred from a number string, or False if its not a number"""
string_as_nr = cast_string_to_python_type(str(element))
Expand All @@ -177,7 +175,6 @@ def get_numeric_type(self, element: object) -> str:
except Exception:
return None


def type_check_sequence(self, element: object) -> str:
dtype_guess = None

Expand Down Expand Up @@ -259,7 +256,6 @@ def type_check_date(element: object) -> str:

return None


def count_data_types_in_column(self, data):
dtype_counts = Counter()

Expand All @@ -282,8 +278,12 @@ def count_data_types_in_column(self, data):

return dtype_counts


def get_column_data_type(self, data: Union[pd.Series, np.ndarray, list], full_data: pd.DataFrame, col_name: str, pct_invalid: float):
def get_column_data_type(self,
data: Union[pd.Series, np.ndarray, list],
full_data: pd.DataFrame,
col_name: str,
pct_invalid: float
):
"""
Provided the column data, define its data type and data subtype.
Expand Down

0 comments on commit a523ffa

Please sign in to comment.