Skip to content

Commit

Permalink
Merge pull request #45 from serengil/feat-task-2512-more-type-hinting…
Browse files Browse the repository at this point in the history
…-and-docstrings

type hinting
  • Loading branch information
serengil committed Dec 26, 2023
2 parents 0ea6e22 + 5b4a039 commit b54df75
Show file tree
Hide file tree
Showing 16 changed files with 364 additions and 124 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ __pycache__/*
commons/__pycache__/*
training/__pycache__/*
tuning/__pycache__/*
tests/__pycache__/*
build/
dist/
Pipfile
Expand All @@ -18,4 +19,5 @@ chefboost/tuning/__pycache__/*
.DS_Store
chefboost/.DS_Store
tests/.DS_Store
.pytest_cache
.pytest_cache
*.pyc
33 changes: 33 additions & 0 deletions chefboost/commons/daemon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import multiprocessing
import multiprocessing.pool

class NoDaemonProcess(multiprocessing.Process):
"""
NoDaemonProcess class for recursive parallel runs
"""
def _get_daemon(self):
# make 'daemon' attribute always return False
return False

def _set_daemon(self, value):
pass

daemon = property(_get_daemon, _set_daemon)


class NoDaemonContext(type(multiprocessing.get_context())):
"""
NoDaemonContext class for recursive parallel runs
"""
# pylint: disable=too-few-public-methods
Process = NoDaemonProcess


class CustomPool(multiprocessing.pool.Pool):
"""
MyPool class for recursive parallel runs
"""
# pylint: disable=too-few-public-methods, abstract-method, super-with-arguments
def __init__(self, *args, **kwargs):
kwargs["context"] = NoDaemonContext()
super(CustomPool, self).__init__(*args, **kwargs)
85 changes: 72 additions & 13 deletions chefboost/commons/functions.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import pathlib
import os
import sys
from os import path
from types import ModuleType
import multiprocessing
from typing import Optional
from typing import Optional, Union
import numpy as np
import pandas as pd
from chefboost import Chefboost as cb
from chefboost.commons.logger import Logger
from chefboost.commons.module import load_module
Expand All @@ -13,7 +16,15 @@
logger = Logger(module="chefboost/commons/functions.py")


def bulk_prediction(df, model):
def bulk_prediction(df: pd.DataFrame, model: dict) -> None:
"""
Perform a bulk prediction on given dataframe
Args:
df (pd.DataFrame): input data frame
model (dict): built model
Returns:
None
"""
predictions = []
for _, instance in df.iterrows():
features = instance.values[0:-1]
Expand All @@ -23,17 +34,35 @@ def bulk_prediction(df, model):
df["Prediction"] = predictions


def restoreTree(module_name):
def restoreTree(module_name: str) -> ModuleType:
"""
Restores a built tree
"""
return load_module(module_name)


def softmax(w):
def softmax(w: list) -> np.ndarray:
"""
Softmax function
Args:
w (list): probabilities
Returns:
result (numpy.ndarray): softmax of inputs
"""
e = np.exp(np.array(w, dtype=np.float32))
dist = e / np.sum(e)
return dist


def sign(x):
def sign(x: Union[int, float]) -> int:
"""
Sign function
Args:
x (int or float): input
Returns
result (int) 1 for positive inputs, -1 for negative
inputs, 0 for neutral input
"""
if x > 0:
return 1
elif x < 0:
Expand All @@ -42,7 +71,14 @@ def sign(x):
return 0


def formatRule(root):
def formatRule(root: int) -> str:
"""
Format a rule in the output file (tree)
Args:
root (int): degree of current rule
Returns:
formatted rule (str)
"""
resp = ""

for _ in range(0, root):
Expand All @@ -51,20 +87,37 @@ def formatRule(root):
return resp


def storeRule(file, content):
def storeRule(file: str, content: str) -> None:
"""
Store a custom rule
Args:
file (str): target file
content (str): content to store
Returns:
None
"""
with open(file, "a+", encoding="UTF-8") as f:
f.writelines(content)
f.writelines("\n")


def createFile(file, content):
def createFile(file: str, content: str) -> None:
"""
Create a file with given content
Args:
file (str): target file
content (str): content to store
Returns
None
"""
with open(file, "w", encoding="UTF-8") as f:
f.write(content)


def initializeFolders():
import sys

def initializeFolders() -> None:
"""
Initialize required folders
"""
sys.path.append("..")
pathlib.Path("outputs").mkdir(parents=True, exist_ok=True)
pathlib.Path("outputs/data").mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -97,8 +150,14 @@ def initializeFolders():
# ------------------------------------


def initializeParams(config: Optional[dict] = None):

def initializeParams(config: Optional[dict] = None) -> dict:
"""
Arrange a chefboost configuration
Args:
config (dict): initial configuration
Returns:
config (dict): final configuration
"""
if config == None:
config = {}

Expand Down
16 changes: 15 additions & 1 deletion chefboost/training/Preprocess.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,26 @@
import math
import numpy as np
import pandas as pd
from chefboost.training import Training
from chefboost.commons.logger import Logger

logger = Logger(module="chefboost/training/Preprocess.py")


def processContinuousFeatures(algorithm, df, column_name, entropy, config):
def processContinuousFeatures(
algorithm: str, df: pd.DataFrame, column_name: str, entropy: float, config: dict
) -> pd.DataFrame:
"""
Find the best split point for numeric features
Args:
df (pd.DataFrame): (sub) training dataframe
column_name (str): current column to process
entropy (float): calculated entropy
config (dict): training configuration
Returns
df (pd.DataFrame): dataframe with numeric columns updated
to nominal (e.g. instead of continious age >40 or <=40)
"""
# if True:
if df[column_name].nunique() <= 20:
unique_values = sorted(df[column_name].unique())
Expand Down
Loading

0 comments on commit b54df75

Please sign in to comment.