Skip to content

Commit

Permalink
add support to all python notebooks in report (#352)
Browse files Browse the repository at this point in the history
  • Loading branch information
pplonski committed Apr 7, 2021
1 parent ba571ac commit e3fd801
Show file tree
Hide file tree
Showing 6 changed files with 149 additions and 63 deletions.
6 changes: 3 additions & 3 deletions supervised/automl.py
Expand Up @@ -78,7 +78,7 @@ def __init__(
- Uses 10-fold CV (Cross-Validation).
- It tunes with Optuna the following algorithms: `Random Forest`, `Extra Trees`, `LightGBM`, `XGBoost`, `CatBoost`, `Neural Network`.
- It applies `Ensemble` and `Stacking` for trained models.
- It has only learning curves in the reports.
- It has only learning curves in the reports.
ml_task (str): Can be {"auto", "binary_classification", "multiclass_classification", "regression"}.
Expand Down Expand Up @@ -114,11 +114,11 @@ def __init__(
stack_models (boolean): Whether a models stack gets created at the end of the training. Stack level is 1.
eval_metric (str): The metric to be used in early stopping and to compare models.
- for binary classification: `logloss`, `auc`, `f1`, `average_precision`, `accuracy` - default is logloss (if left "auto")
- for mutliclass classification: `logloss`, `f1`, `accuracy` - default is `logloss` (if left "auto")
- for regression: `rmse`, `mse`, `mae`, `r2`, `mape`, `spearman`, `pearson` - default is `rmse` (if left "auto")
validation_strategy (dict): Dictionary with validation type. Right now train/test split and cross-validation are supported.
Example:
Expand Down
194 changes: 140 additions & 54 deletions supervised/base_automl.py
Expand Up @@ -1780,7 +1780,7 @@ def _validate_eval_metric(self):
"auc",
"f1",
"average_precision",
"accuracy"
"accuracy",
]:
raise ValueError(
f"Metric {self.eval_metric} is not allowed in ML task: {self._get_ml_task()}. \
Expand Down Expand Up @@ -1965,57 +1965,7 @@ def from_json(self, json_data):

self._ml_task = json_data.get("ml_task")

def _md_to_html(self, md_fname, page_type):
import markdown

if not os.path.exists(md_fname):
return None
content = ""
with open(md_fname) as fin:
content = fin.read()

content = content.replace("README.md", "README.html")
content_html = markdown.markdown(
content, extensions=["markdown.extensions.tables"]
)
content_html = content_html.replace("<img ", '<img style="width:80%" ')
content_html = content_html.replace("<table>", '<table class="styled-table">')
content_html = content_html.replace("<tr>", '<tr style="text-align: right;">')

styles = '<link rel="stylesheet" href="style.css">\n\n'
if page_type == "sub":
styles = '<link rel="stylesheet" href="../style.css">\n\n'
beginning = styles

if page_type == "main":
beginning += """<img src="https://raw.githubusercontent.com/mljar/visual-identity/main/media/mljar_AutomatedML.png" style="height:128px; margin-left: auto;
margin-right: auto;display: block;"/>\n\n"""
if os.path.exists(os.path.join(self._results_path, "EDA")):
beginning += '<a href="EDA/README.html">Automatic Exploratory Data Analysis Report</a>'

content_html = beginning + content_html

html_fname = md_fname.replace("README.md", "README.html")
with open(html_fname, "w") as fout:
fout.write(content_html)

return html_fname

def _report(self, width=900, height=1200):

from IPython.display import IFrame

main_readme_html = os.path.join(self._results_path, "README.html")
if not os.path.exists(main_readme_html) or 1:
fname = os.path.join(self._results_path, "README.md")
main_readme_html = self._md_to_html(fname, "main")
for f in os.listdir(self._results_path):
fname = os.path.join(self._results_path, f, "README.md")
if os.path.exists(fname):
self._md_to_html(fname, "sub")
with open(os.path.join(self._results_path, "style.css"), "w") as fout:
fout.write(
"""
report_style = """
.styled-table {
border-collapse: collapse;
font-size: 0.9em;
Expand Down Expand Up @@ -2069,10 +2019,146 @@ def _report(self, width=900, height=1200):
}
"""

def _md_to_html(self, md_fname, page_type, dir_path, me=None):
import markdown
import base64

if not os.path.exists(md_fname):
return None
content = ""
with open(md_fname) as fin:
content = fin.read()

content = content.replace("README.md", "README.html")
content_html = markdown.markdown(
content, extensions=["markdown.extensions.tables"]
)
content_html = content_html.replace("<img ", '<img style="width:750px" ')
content_html = content_html.replace("<table>", '<table class="styled-table">')
content_html = content_html.replace("<tr>", '<tr style="text-align: right;">')

# replace png figures to base64
for f in os.listdir(dir_path):
if ".png" in f:
encoded_string = ""
with open(os.path.join(dir_path, f), "rb") as image_file:
encoded_string = base64.b64encode(image_file.read())
encoded_string = encoded_string.decode("utf-8")
encoded_figure = f"data:image/png;base64, {encoded_string}"
content_html = content_html.replace(f, encoded_figure)

# insert svg figures
for f in os.listdir(dir_path):
if ".svg" in f:
with open(os.path.join(dir_path, f), "rb") as image_file:
svg_plot = image_file.read()
svg_plot = svg_plot.decode("utf-8")

arr = content_html.split("\n")
new_content = []
for i in arr:
if f in i:
new_content += [f"<p>{svg_plot}</p>"]
else:
new_content += [i]
content_html = "\n".join(new_content)

# change links
if page_type == "main":
hrefs = []
for f in os.listdir(dir_path):
if os.path.exists(os.path.join(dir_path, f, "README.md")):
hrefs += [f]
old = f'href="{f}/README.html"'
new = f"onclick=\"toggleShow('{f}');toggleShow('main')\" href=\"javascript:void(0);\""
content_html = content_html.replace(old, new)

# other links
if me is not None:
old = 'href="../README.html"'
new = f"onclick=\"toggleShow('{me}');toggleShow('main')\" href=\"javascript:void(0);\""
content_html = content_html.replace(old, new)

beginning = ""

if page_type == "main":
beginning += """<img src="https://raw.githubusercontent.com/mljar/visual-identity/main/media/mljar_AutomatedML.png" style="height:128px; margin-left: auto;
margin-right: auto;display: block;"/>\n\n"""
if os.path.exists(os.path.join(self._results_path, "EDA")):
beginning += '<a href="EDA/README.html">Automatic Exploratory Data Analysis Report</a>'

content_html = beginning + content_html

return content_html

def _report(self, width=900, height=1200):

from IPython.display import HTML

main_readme_html = os.path.join(self._results_path, "README.html")

body = ""
fname = os.path.join(self._results_path, "README.md")
body += (
'<div id="main">\n'
+ self._md_to_html(fname, "main", self._results_path)
+ "\n\n</div>\n\n"
)

for f in os.listdir(self._results_path):
fname = os.path.join(self._results_path, f, "README.md")
if os.path.exists(fname):
body += (
f'<div id="{f}" style="display: none">\n'
+ self._md_to_html(
fname, "sub", os.path.join(self._results_path, f), f
)
+ "\n\n</div>\n\n"
)

if main_readme_html is not None:
return IFrame(main_readme_html, width, height)
"""
if not os.path.exists(main_readme_html) or 1:
fname = os.path.join(self._results_path, "README.md")
main_readme_html = self._md_to_html(fname, "main")
for f in os.listdir(self._results_path):
fname = os.path.join(self._results_path, f, "README.md")
if os.path.exists(fname):
self._md_to_html(fname, "sub")
"""

body += """
<script>
function toggleShow(elementId) {
var x = document.getElementById(elementId);
if (x.style.display === "none") {
x.style.display = "block";
} else {
x.style.display = "none";
}
}
</script>
"""

report_content = f"""
<!DOCTYPE html>
<html>
<head>
<style>
{self.report_style}
</style>
</head>
<body>
{body}
</body>
</html>
"""
with open(main_readme_html, "w") as fout:
fout.write(report_content)

if report_content is not None:
return HTML(report_content)

def _need_retrain(self, X, y, sample_weight, decrease):

Expand Down
2 changes: 1 addition & 1 deletion supervised/tuner/optuna/lightgbm.py
Expand Up @@ -9,7 +9,7 @@
lightgbm_eval_metric_pearson,
lightgbm_eval_metric_f1,
lightgbm_eval_metric_average_precision,
lightgbm_eval_metric_accuracy
lightgbm_eval_metric_accuracy,
)
from supervised.algorithms.registry import BINARY_CLASSIFICATION
from supervised.algorithms.registry import MULTICLASS_CLASSIFICATION
Expand Down
2 changes: 1 addition & 1 deletion supervised/tuner/optuna/tuner.py
Expand Up @@ -37,7 +37,7 @@ def __init__(
"pearson",
"f1",
"average_precision",
"accuracy"
"accuracy",
]:
raise AutoMLException(f"Metric {eval_metric.name} is not supported")

Expand Down
2 changes: 1 addition & 1 deletion supervised/tuner/optuna/xgboost.py
Expand Up @@ -9,7 +9,7 @@
xgboost_eval_metric_pearson,
xgboost_eval_metric_f1,
xgboost_eval_metric_average_precision,
xgboost_eval_metric_accuracy
xgboost_eval_metric_accuracy,
)
from supervised.algorithms.registry import BINARY_CLASSIFICATION
from supervised.algorithms.registry import MULTICLASS_CLASSIFICATION
Expand Down
6 changes: 3 additions & 3 deletions supervised/utils/metric.py
Expand Up @@ -83,7 +83,7 @@ def negative_accuracy(y_true, y_predicted, sample_weight=None):
y_predicted = (y_predicted > 0.5).astype(int)
else:
y_predicted = np.argmax(y_predicted, axis=1)

val = accuracy_score(y_true, y_predicted, sample_weight=sample_weight)

return -val
Expand Down Expand Up @@ -195,7 +195,7 @@ def lightgbm_eval_metric_f1(preds, dtrain):
cols = len(unique_targets)
rows = int(preds.shape[0] / len(unique_targets))
preds = np.reshape(preds, (rows, cols), order="F")

return "f1", -negative_f1(target, preds, weight), True


Expand All @@ -215,7 +215,7 @@ def lightgbm_eval_metric_accuracy(preds, dtrain):
cols = len(unique_targets)
rows = int(preds.shape[0] / len(unique_targets))
preds = np.reshape(preds, (rows, cols), order="F")

return "accuracy", -negative_accuracy(target, preds, weight), True


Expand Down

0 comments on commit e3fd801

Please sign in to comment.