Skip to content

Commit

Permalink
fix(eda): fixed bugs come with random generated datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
eutialia authored and jinglinpeng committed Apr 20, 2021
1 parent 56896d0 commit 53ecf76
Show file tree
Hide file tree
Showing 13 changed files with 225 additions and 287 deletions.
18 changes: 8 additions & 10 deletions dataprep/clean/clean_country.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from functools import lru_cache
from operator import itemgetter
from os import path
from re import error
from typing import Any, Union

import dask
Expand Down Expand Up @@ -289,14 +288,11 @@ def _check_country(country: str, input_format: str, strict: bool, clean: bool) -

if strict and input_format == "regex":
for form in ("name", "official"):
try:
ind = DATA[
DATA[form].str.contains(f"^{country}$", flags=re.IGNORECASE, na=False)
].index
if np.size(ind) > 0:
return (ind[0], "success") if clean else True
except error:
return (None, "unknown") if clean else False
ind = DATA[
DATA[form].str.contains(f"^{re.escape(country)}$", flags=re.IGNORECASE, na=False)
].index
if np.size(ind) > 0:
return (ind[0], "success") if clean else True

elif not strict and input_format in ("regex", "name", "official"):
for index, country_regex in enumerate(REGEXES):
Expand All @@ -305,7 +301,9 @@ def _check_country(country: str, input_format: str, strict: bool, clean: bool) -

else:
ind = DATA[
DATA[input_format].str.contains(f"^{country}$", flags=re.IGNORECASE, na=False)
DATA[input_format].str.contains(
f"^{re.escape(country)}$", flags=re.IGNORECASE, na=False
)
].index
if np.size(ind) > 0:
return (ind[0], "success") if clean else True
Expand Down
1 change: 0 additions & 1 deletion dataprep/eda/create_report/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,6 @@ def format_basic(df: dd.DataFrame, cfg: Config) -> Dict[str, Any]:
for tab in rndrd.tabs:
fig = tab.child
fig.sizing_mode = "stretch_width"
# fig.title = Title(text=tab.title, align="center", text_font_size="20px")
figs_corr.append(fig)
res["correlation_names"].append(tab.title)
res["correlations"] = components(figs_corr)
Expand Down
6 changes: 3 additions & 3 deletions dataprep/eda/create_report/templates/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<a href="#variables">Variables ≡</a>
<div class="subcontent">
{% for var in context.components.variables.keys() %}
<a href="#{{ var }}">{{ var }}</a>
<a href="#{{ var|e }}">{{ var|e }}</a>
{% endfor %}
</div>
{% endif %}
Expand Down Expand Up @@ -92,11 +92,11 @@ <h1 class="tb-title">Missing Values</h1>
{% if context.components.has_interaction and context.components.interactions[0] != 0%}
{{ context.components.interactions[0] }}
{% endif %}

<footer class="footer">
<p>Report generated with <a href="https://dataprep.ai/">DataPrep</a></p>
</footer>

</body>

</html>
</html>
33 changes: 20 additions & 13 deletions dataprep/eda/create_report/templates/correlation.html
Original file line number Diff line number Diff line change
@@ -1,20 +1,27 @@
<div class="correlation-container">
<div class="corr-plot">
<div class="report-tab">
<button class="corr-tablinks active" onclick="openTab(event, '{{context.components.correlation_names[0]}}')" >{{context.components.correlation_names[0]}}</button>
<button class="corr-tablinks" onclick="openTab(event, '{{context.components.correlation_names[1]}}')">{{context.components.correlation_names[1]}}</button>
<button class="corr-tablinks" onclick="openTab(event, '{{context.components.correlation_names[2]}}')">{{context.components.correlation_names[2]}}</button>
<div class="vp-switch">
{% for corr_name in context.components.correlation_names %}
{% if loop.index == 1 %}
<input type="radio" name="correlation-names" id="corr-name-{{ corr_name }}" checked />
<label for="corr-name-{{ corr_name }}" onclick="openTab(this)" >{{ corr_name }}</label>
{% else %}
<input type="radio" name="correlation-names" id="corr-name-{{ corr_name }}" />
<label for="corr-name-{{ corr_name }}" onclick="openTab(this)" >{{ corr_name }}</label>
{% endif %}
{% endfor %}
</div>

<div id={{context.components.correlation_names[0]}} class="corr-tabcontent">
{{ context.components.correlations[1][0]}}
{% for plot in context.components.correlations[1] %}
{% if loop.index == 1 %}
<div class="corr-tabcontent" style="display: block;">
{{ plot }}
</div>
<div id={{context.components.correlation_names[1]}} style="display:none" class="corr-tabcontent">
{{ context.components.correlations[1][1]}}
{% else %}
<div class="corr-tabcontent">
{{ plot }}
</div>
<div id={{context.components.correlation_names[2]}} style="display:none" class="corr-tabcontent">
{{ context.components.correlations[1][2]}}
</div>

{% endif %}
{% endfor %}
</div>
</div>
</div>
36 changes: 20 additions & 16 deletions dataprep/eda/create_report/templates/missing.html
Original file line number Diff line number Diff line change
@@ -1,24 +1,28 @@
<div class="missing-container">
<div class="miss-plot">
<div class="report-tab">
<button class="missing-tablinks active" onclick="openTab(event, '{{context.components.missing_tabs[0]}}')" >{{context.components.missing_tabs[0]}}</button>
<button class="missing-tablinks" onclick="openTab(event, '{{context.components.missing_tabs[1]}}')">{{context.components.missing_tabs[1]}}</button>
<button class="missing-tablinks" onclick="openTab(event, '{{context.components.missing_tabs[2]}}')">{{context.components.missing_tabs[2]}}</button>
<button class="missing-tablinks" onclick="openTab(event, '{{context.components.missing_tabs[3]}}')">{{context.components.missing_tabs[3]}}</button>
<div class="vp-switch">
{% for miss_name in context.components.missing_tabs %}
{% if loop.index == 1 %}
<input type="radio" name="miss-names" id="miss-name-{{ miss_name }}" checked />
<label for="miss-name-{{ miss_name }}" onclick="openTab(this)" >{{ miss_name }}</label>
{% else %}
<input type="radio" name="miss-names" id="miss-name-{{ miss_name }}" />
<label for="miss-name-{{ miss_name }}" onclick="openTab(this)" >{{ miss_name }}</label>
{% endif %}
{% endfor %}
</div>

<div id="{{context.components.missing_tabs[0]}}" class="missing-tabcontent">
{{ context.components.missing[1][0]}}
{% for plot in context.components.missing[1] %}
{% if loop.index == 1 %}
<div class="missing-tabcontent" style="display: block;">
{{ plot }}
</div>
<div id="{{context.components.missing_tabs[1]}}" style="display:none" class="missing-tabcontent">
{{ context.components.missing[1][1]}}
</div>
<div id="{{context.components.missing_tabs[2]}}" style="display:none" class="missing-tabcontent">
{{ context.components.missing[1][2]}}
</div>
<div id="{{context.components.missing_tabs[3]}}" style="display:none" class="missing-tabcontent">
{{ context.components.missing[1][3]}}
{% else %}
<div class="missing-tabcontent">
{{ plot }}
</div>
{% endif %}
{% endfor %}

</div>
</div>
</div>
31 changes: 15 additions & 16 deletions dataprep/eda/create_report/templates/overview.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<div class="ov-stats-container">
<div class="tb-container">
<h3 class="tb-title">Dataset Statistics</h3>
<table class="rp-table-overview">
<table class="rp-table">
{% for h, d in context.components.overview[0].items() %}
<tr>
<th>{{ h }}</th>
Expand All @@ -12,7 +12,7 @@ <h3 class="tb-title">Dataset Statistics</h3>
<tr>
<th style="vertical-align: top;">Variable Types</th>
<td>
<ul>
<ul style="list-style: none;padding-left: 0;">
{% for tp, num in context.components.overview[1].items() %}
<li>{{ tp }}: {{ num }}</li>
{% endfor %}
Expand All @@ -24,15 +24,15 @@ <h3 class="tb-title">Dataset Statistics</h3>
</div>
{% if context.components.overview_insights %}
<div class="insights">
<h3>Dataset Insights</h3>
{% for page, content in context.components.overview_insights.items() %}
<div class="page-box" id="page-{{ loop.index }}">
<h3>Dataset Insights</h3>
<table>
{% for entry in content %}
{% for ins_type, insight in entry.items() %}
<tr>
<th id="des">
{{ insight.replace('/*', '<span class="col-name' + context.rnd | string() + '">').replace('*/', '</span>') }}
{{ insight|escape|replace('/*start*/', '<span class="col-name">')|replace('/*end*/', '</span>') }}
</th>
<td><span class="ins-type">{{ ins_type }}</span></td>
</tr>
Expand All @@ -41,18 +41,17 @@ <h3>Dataset Insights</h3>
</table>
</div>
{% endfor %}
<div class="page-switch-box">
<ul class="page-switch">
{% for _ in context.components.overview_insights %}
{% if loop.length > 1 %}
<li id="switch-{{ loop.index }}" onclick="switchPage(this)">{{ loop.index }}</li>
{% endif %}
{% endfor %}
</ul>
</div>
</div>
{% endif %}
</div>
{% if context.components.overview_insights %}
<div class="page-switch-box">
<ul class="page-switch">
{% for _ in context.components.overview_insights %}
{% if loop.length > 1 %}
<li id="switch-{{ loop.index }}" onclick="switchPage(this)">{{ loop.index }}</li>
{% endif %}
{% endfor %}
</ul>
</div>
{% endif %}
</div>

</div>
43 changes: 11 additions & 32 deletions dataprep/eda/create_report/templates/scripts.html
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,10 @@
{% endif %}

<script>
setTimeout(init, 100);
function init() {
const tableRows_num = document.querySelectorAll('.tb-numerical tr');
highlightTableValue(tableRows_num, '#f00');
scientificNotationStrip(tableRows_num);

const tableRows_cat = document.querySelectorAll('.tb-categorical tr');
highlightTableValue(tableRows_cat, '#f00');
scientificNotationStrip(tableRows_cat);
}
setTimeout(init, 50);

window.onload = () => {
let tableRows = document.getElementsByTagName('tr');
function init() {
const tableRows = document.querySelectorAll('.rp-table tr');
highlightTableValue(tableRows, '#f00');
scientificNotationStrip(tableRows);
}
Expand Down Expand Up @@ -102,25 +93,13 @@
}
}

function openTab(evt, tabName) {
var i, tabcontent, tablinks;

tabcontent = document.getElementsByClassName(evt.currentTarget.parentElement.nextElementSibling.className);
for (i = 0; i < tabcontent.length; i++) {
tabcontent[i].style.display = "none";
}

tablinks = document.getElementsByClassName(evt.currentTarget.className);
for (i = 0; i < tablinks.length; i++) {
tablinks[i].style.background = "transparent";
tablinks[i].className = tablinks[i].className.replace(" active", "");
}
document.getElementById(tabName).style.display = "flex";
for (i = 0; i < tablinks.length; i++){
}
evt.currentTarget.className += " active";

evt.currentTarget.style.background = "#b5d6ea";
function openTab(e) {
const btnIndex = [...e.parentElement.children].indexOf(e);
const allContentEle = e.parentElement.parentElement.children;
for (let i of [...allContentEle].slice(1)) {
i.style.display = 'none';
}
allContentEle[Math.round(btnIndex/2)].style.display = (btnIndex == 1) ? 'flex' : 'block';
}

function switchPage(e) {
Expand All @@ -137,4 +116,4 @@
e.style.backgroundColor = '#5DADE2';
}

</script>
</script>

0 comments on commit 53ecf76

Please sign in to comment.