Skip to content

Commit

Permalink
Roll-out type checking with mypy (#171)
Browse files Browse the repository at this point in the history
Roll-out mypy ...

Co-authored-by: Amirhessam Tahmassebi <admin@slickml.com>
  • Loading branch information
amirhessam88 and Amirhessam Tahmassebi committed Nov 28, 2022
1 parent 1f91289 commit 664df08
Show file tree
Hide file tree
Showing 33 changed files with 429 additions and 355 deletions.
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@
autoapi_python_use_implicit_namespaces = False
autoapi_prepare_jinja_env = None
autoapi_keep_files = False
suppress_warnings = []
suppress_warnings = [] # type: ignore

# -- Options for View-Code -------------------------------------------------
viewcode_follow_imported_members = True
Expand Down
8 changes: 3 additions & 5 deletions mypy.ini
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# More details on how to update this config file: https://mypy.readthedocs.io/en/stable/config_file.html
# TODO(amir): Still a lot of advanced options have not been added here!
# TODO(amir): Currently we use `ignore_errors = True` which would ignore all non-fatal errors
# gradually, we have to turn on `ignore_missing_imports = False` and `strict = True`
# More details on how to update this config file: https://mypy.readthedocs.io/en/stable/config_file.html
# TODO(amir): gradually, we have to turn on `strict = True`
# More details on strategies on how to use `mypy` in large code-base:
# - https://blog.wolt.com/engineering/2021/09/30/professional-grade-mypy-configuration/
# - https://dropbox.tech/application/our-journey-to-type-checking-4-million-lines-of-python
Expand All @@ -18,7 +16,7 @@ disallow_untyped_defs = True
no_implicit_optional = True

# --- errors ---
ignore_errors = True
ignore_errors = False

# --- imports ----
ignore_missing_imports = True
Expand Down
58 changes: 50 additions & 8 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,11 @@ flake8-type-checking = "^2.3"
flake8-typing-imports = "^1.12"
flake8-use-fstring = "^1.4"
pep8-naming = "^0.13"

# --- type-checking ---
mypy = "^0.991"
pandas-stubs = "^1.5"
data-science-types = "^0.2"

# --- unit-testing ---
pytest = "^7.2"
Expand Down
10 changes: 5 additions & 5 deletions src/slickml/base/_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,11 @@ class BaseXGBoostEstimator(ABC, BaseEstimator):
.. [xgboost-api] https://xgboost.readthedocs.io/en/latest/python/python_api.html
"""

num_boost_round: int
sparse_matrix: bool
scale_mean: bool
scale_std: bool
importance_type: str
num_boost_round: Optional[int]
sparse_matrix: Optional[bool]
scale_mean: Optional[bool]
scale_std: Optional[bool]
importance_type: Optional[str]
params: Optional[Dict[str, Union[str, float, int]]] = None

def __post_init__(self) -> None:
Expand Down
6 changes: 3 additions & 3 deletions src/slickml/classification/_xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,7 @@ def plot_shap_waterfall(
return_fig=return_fig,
)

def get_params(self) -> Dict[str, Union[str, float, int]]:
def get_params(self) -> Optional[Dict[str, Union[str, float, int]]]:
"""Returns the final set of train parameters.
The default set of parameters will be updated with the new ones that passed to ``params``.
Expand Down Expand Up @@ -653,7 +653,7 @@ def _model(self) -> xgb.Booster:
return xgb.train(
params=self.params,
dtrain=self.dtrain_,
num_boost_round=self.num_boost_round - 1,
num_boost_round=self.num_boost_round, # type: ignore
)

def _explainer(self) -> None:
Expand Down Expand Up @@ -682,7 +682,7 @@ def _imp_to_df(self) -> pd.DataFrame:
-------
pd.DataFrame
"""
data = {
data: Dict[str, List[float]] = {
"feature": [],
f"{self.importance_type}": [],
}
Expand Down
61 changes: 32 additions & 29 deletions src/slickml/classification/_xgboostcv.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ class XGBoostCVClassifier(XGBoostClassifier):
scale_mean: Optional[bool] = False
scale_std: Optional[bool] = False
importance_type: Optional[str] = "total_gain"
params: Optional[Dict] = None
params: Optional[Dict[str, Union[str, float, int]]] = None
verbose: Optional[bool] = True
callbacks: Optional[bool] = False

Expand Down Expand Up @@ -414,7 +414,9 @@ def _callbacks(self) -> None:
None
"""
if self.callbacks:
self.callbacks = [
# TODO(amir): we receive bool from user and define callbacks; so mypy complains
# we prolly need to use type overloads here
self.callbacks = [ # type: ignore
xgb.callback.EvaluationMonitor(
rank=0,
period=1,
Expand All @@ -437,30 +439,31 @@ def _verbose_log(self) -> None:
-------
None
"""
print(
str(Colors.BOLD)
+ "*-* "
+ str(Colors.GREEN)
+ f"Best Boosting Round = {len(self.cv_results_) - 1}"
+ str(Colors.END)
+ str(Colors.BOLD)
+ " -*- "
+ str(Colors.F_Red)
+ f"{self.n_splits}-Folds CV {self.metrics.upper()}: "
+ str(Colors.END)
+ str(Colors.BOLD)
+ str(Colors.B_Blue)
+ f"Train = {self.cv_results_.iloc[-1][0]:.3f}"
+ " +/- "
+ f"{self.cv_results_.iloc[-1][1]:.3f}"
+ str(Colors.END)
+ str(Colors.BOLD)
+ " -*- "
+ str(Colors.B_Magenta)
+ f"Test = {self.cv_results_.iloc[-1][2]:.3f}"
+ " +/- "
+ f"{self.cv_results_.iloc[-1][3]:.3f}"
+ str(Colors.END)
+ str(Colors.BOLD)
+ " *-*",
)
if self.metrics is not None:
print(
str(Colors.BOLD)
+ "*-* "
+ str(Colors.GREEN)
+ f"Best Boosting Round = {len(self.cv_results_) - 1}"
+ str(Colors.END)
+ str(Colors.BOLD)
+ " -*- "
+ str(Colors.F_Red)
+ f"{self.n_splits}-Folds CV {self.metrics.upper()}: "
+ str(Colors.END)
+ str(Colors.BOLD)
+ str(Colors.B_Blue)
+ f"Train = {self.cv_results_.iloc[-1][0]:.3f}"
+ " +/- "
+ f"{self.cv_results_.iloc[-1][1]:.3f}"
+ str(Colors.END)
+ str(Colors.BOLD)
+ " -*- "
+ str(Colors.B_Magenta)
+ f"Test = {self.cv_results_.iloc[-1][2]:.3f}"
+ " +/- "
+ f"{self.cv_results_.iloc[-1][3]:.3f}"
+ str(Colors.END)
+ str(Colors.BOLD)
+ " *-*",
)
5 changes: 3 additions & 2 deletions src/slickml/metrics/_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ class BinaryClassificationMetrics:
precision_digits: Optional[int] = 3
display_df: Optional[bool] = True

def __post_init__(self):
def __post_init__(self) -> None:
"""Post instantiation validations and assignments."""
check_var(
self.y_true,
Expand Down Expand Up @@ -243,7 +243,8 @@ def __post_init__(self):
dtypes=bool,
)
# TODO(amir): add `values_between` option to `check_var()`
if self.threshold < 0.0 or self.threshold > 1.0:

if self.threshold is not None and (self.threshold < 0.0 or self.threshold > 1.0):
raise ValueError("The input threshold must have a value between 0.0 and 1.0.")

# TODO(amir): how we can pull off special cases like this ?
Expand Down
11 changes: 6 additions & 5 deletions src/slickml/metrics/_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ class RegressionMetrics:
precision_digits: Optional[int] = 3
display_df: Optional[bool] = True

def __post_init__(self):
def __post_init__(self) -> None:
"""Post instantiation validations and assignments."""
check_var(
self.y_true,
Expand Down Expand Up @@ -390,10 +390,10 @@ def _rec_curve(self) -> Tuple[np.ndarray, np.ndarray, float]:
interval = 0.01
accuracy = []
deviation = np.arange(begin, end, interval)
# this would prolly break mypy since it cannot understand that the list is alrady cast to
# TODO(amir): this would prolly break mypy since it cannot understand that the list is alrady cast to
# np.ndarray; so np.array() or np.linalg.norm() should be used
norms = np.abs(self.y_true - self.y_pred) / np.sqrt(
self.y_true**2 + self.y_pred**2,
norms = np.abs(self.y_true - self.y_pred) / np.sqrt( # type: ignore
self.y_true**2 + self.y_pred**2, # type: ignore
)

# main loop to count the number of times that the calculated norm is less than deviation
Expand All @@ -417,7 +417,8 @@ def _ratio_hist(self) -> Tuple[np.ndarray, float, float, float]:
-------
Tuple[np.ndarray, float, float, float]
"""
y_ratio = self.y_pred / self.y_true
# TODO(amir): self.y_pred is already np.ndarray and mypy does not infer it
y_ratio = self.y_pred / self.y_true # type: ignore
mean_y_ratio = np.mean(y_ratio)
std_y_ratio = np.std(y_ratio)
cv_y_ratio = std_y_ratio / mean_y_ratio
Expand Down
7 changes: 4 additions & 3 deletions src/slickml/optimization/_bayesianopt.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,8 @@ def __post_init__(self) -> None:
var_name="verbose",
dtypes=bool,
)
self.verbose = self._verbose()
# TODO(amir): use type overload
self.verbose = self._verbose() # type: ignore
check_var(
self.objective,
var_name="objective",
Expand Down Expand Up @@ -377,7 +378,7 @@ def _xgb_eval(

return None

def get_params_bounds(self) -> Dict[str, Tuple[Union[int, float], Union[int, float]]]:
def get_params_bounds(self) -> Optional[Dict[str, Tuple[Union[int, float], Union[int, float]]]]:
"""Returns the hyper-parameters boundaries for the tuning process.
Returns
Expand Down Expand Up @@ -488,7 +489,7 @@ def _inner_params(
gamma: float,
reg_alpha: float,
reg_lambda: float,
) -> Dict[str, Union[str, float, int]]:
) -> Dict[str, Union[str, float, int, None]]:
"""Default set of parameters passed in inner evaluation.
Notes
Expand Down

0 comments on commit 664df08

Please sign in to comment.