Skip to content

Commit

Permalink
Remove pi_iteration_chart + tweak axis titles
Browse files Browse the repository at this point in the history
  • Loading branch information
samnlindsay committed Aug 4, 2020
1 parent 55292ea commit e59f56d
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 113 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "splink"
version = "0.2.2"
version = "0.2.3"
description = "[Beta]: Implementation in Apache Spark of the EM algorithm to estimate parameters of Fellegi-Sunter's canonical model of record linkage."
authors = ["Robin Linacre <robinlinacre@hotmail.com>", "Sam Lindsay", "Theodore Manassis"]
license = "MIT"
Expand Down
91 changes: 15 additions & 76 deletions splink/chart_definitions.py
Original file line number Diff line number Diff line change
@@ -1,73 +1,3 @@
pi_iteration_chart_def = {
"config": {
"view": {"width": 400, "height": 300}, # pragma: no cover
"mark": {"tooltip": None},
"title": {"anchor": "middle"},
},
"hconcat": [
{
"mark": "bar",
"encoding": {
"color": {"type": "nominal", "field": "value"},
"row": {
"type": "nominal",
"field": "column",
"sort": {"field": "gamma"},
},
"tooltip": [
{"type": "quantitative", "field": "probability"},
{"type": "ordinal", "field": "iteration"},
{"type": "nominal", "field": "column"},
{"type": "nominal", "field": "value"},
],
"x": {"type": "ordinal", "field": "iteration"},
"y": {
"type": "quantitative",
"aggregate": "sum",
"axis": {"title": "𝛾 value"},
"field": "probability",
},
},
"height": 150,
"resolve": {"scale": {"y": "independent"}},
"title": "Non Match",
"transform": [{"filter": "(datum.match === 0)"}],
},
{
"mark": "bar",
"encoding": {
"color": {"type": "nominal", "field": "value"},
"row": {
"type": "nominal",
"field": "column",
"sort": {"field": "gamma"},
},
"tooltip": [
{"type": "quantitative", "field": "probability"},
{"type": "ordinal", "field": "iteration"},
{"type": "nominal", "field": "column"},
{"type": "nominal", "field": "value"},
],
"x": {"type": "ordinal", "field": "iteration"},
"y": {
"type": "quantitative",
"aggregate": "sum",
"axis": {"title": "𝛾 value"},
"field": "probability",
},
},
"height": 150,
"resolve": {"scale": {"y": "independent"}},
"title": "Match",
"transform": [{"filter": "(datum.match === 1)"}],
},
],
"data": {"values": None},
"title": "Probability distribution of comparison vector values by iteration number",
"$schema": "https://vega.github.io/schema/vega-lite/v3.4.0.json",
}


lambda_iteration_chart_def = {
"config": {
"view": {"width": 400, "height": 300},
Expand Down Expand Up @@ -141,7 +71,11 @@
{"type": "quantitative", "field": "probability", "format": ".4f"},
{"type": "ordinal", "field": "value"},
],
"x": {"type": "quantitative", "field": "probability"},
"x": {
"type": "quantitative",
"field": "probability",
"axis": {"title": "proportion of non-matches"}
},
"y": {
"type": "nominal",
"axis": {"title": "𝛾 value"},
Expand Down Expand Up @@ -171,7 +105,11 @@
{"type": "quantitative", "field": "probability", "format": ".4f"},
{"type": "ordinal", "field": "value"},
],
"x": {"type": "quantitative", "field": "probability"},
"x": {
"type": "quantitative",
"field": "probability",
"axis": {"title": "proportion of matches"}
},
"y": {
"type": "nominal",
"axis": {"title": "𝛾 value"},
Expand Down Expand Up @@ -207,7 +145,11 @@
{"type": "quantitative", "field": "level_proportion", "format": ".4f"},
{"type": "ordinal", "field": "level"},
],
"x": {"type": "quantitative", "field": "level_proportion"},
"x": {
"type": "quantitative",
"field": "level_proportion",
"axis": {"title": "proportion of comparisons"}
},
"y": {
"type": "nominal",
"axis": {"title": "𝛾 value"},
Expand Down Expand Up @@ -281,8 +223,6 @@
<div id="vis4"></div><div id="vis5"></div>
<br/>
<div id="vis6"></div>
<br/>
<div id="vis7"></div>
Expand All @@ -293,7 +233,6 @@
vegaEmbed('#vis4', {spec4}).catch(console.error);
vegaEmbed('#vis5', {spec5}).catch(console.error);
vegaEmbed('#vis6', {spec6}).catch(console.error);
vegaEmbed('#vis7', {spec7}).catch(console.error);
</script>
</body>
</html>
Expand Down
50 changes: 14 additions & 36 deletions splink/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from .validate import _get_default_value
from .chart_definitions import (
lambda_iteration_chart_def,
pi_iteration_chart_def,
probability_distribution_chart,
gamma_distribution_chart_def,
ll_iteration_chart_def,
Expand Down Expand Up @@ -399,23 +398,6 @@ def field_value_to_probs(fv):
print(field)
field_value_to_probs(value)


def pi_iteration_chart(self): # pragma: no cover

if self.real_params:
data = self._iteration_history_df_gammas()
data_real = self._convert_params_dict_to_dataframe(self.real_params, "real_param")
data.extend(data_real)
else:
data = self._iteration_history_df_gammas()

pi_iteration_chart_def["data"]["values"] = data

if altair_installed:
return alt.Chart.from_dict(pi_iteration_chart_def)
else:
return pi_iteration_chart_def

def lambda_iteration_chart(self): # pragma: no cover
data = self._iteration_history_df_lambdas()
if self.real_params:
Expand Down Expand Up @@ -540,15 +522,14 @@ def all_charts_write_html_file(self, filename="splink_charts.html", overwrite=Fa
c1 = self.probability_distribution_chart().to_json(indent=None)
c2 = self.bayes_factor_chart().to_json(indent=None)
c3 = self.lambda_iteration_chart().to_json(indent=None)
c4 = self.pi_iteration_chart().to_json(indent=None)

if self.log_likelihood_exists:
c5 = self.ll_iteration_chart().to_json(indent=None)
c4 = self.ll_iteration_chart().to_json(indent=None)
else:
c5 = ""
c4 = ""

c6 = self.bayes_factor_history_charts().to_json(indent=None)
c7 = self.gamma_distribution_chart().to_json(indent=None)
c5 = self.bayes_factor_history_charts().to_json(indent=None)
c6 = self.gamma_distribution_chart().to_json(indent=None)

with open(filename, "w") as f:
f.write(
Expand All @@ -557,27 +538,25 @@ def all_charts_write_html_file(self, filename="splink_charts.html", overwrite=Fa
vegalite_version=alt.VEGALITE_VERSION,
vegaembed_version=alt.VEGAEMBED_VERSION,
spec1=c1,
spec2=c7,
spec2=c6,
spec3=c2,
spec4=c3,
spec5=c5,
spec6=c4,
spec7=c6
spec5=c4,
spec6=c5
)
)
else:
c1 = json.dumps(self.probability_distribution_chart())
c2 = json.dumps(self.bayes_factor_chart())
c3 = json.dumps(self.lambda_iteration_chart())
c4 = json.dumps(self.pi_iteration_chart())

if self.log_likelihood_exists:
c5 = json.dumps(self.ll_iteration_chart())
c4 = json.dumps(self.ll_iteration_chart())
else:
c5 = ""
c4 = ""

c6 = json.dumps(self.bayes_factor_history_charts())
c7 = json.dumps(self.gamma_distribution_chart())
c5 = json.dumps(self.bayes_factor_history_charts())
c6 = json.dumps(self.gamma_distribution_chart())

with open(filename, "w") as f:
f.write(
Expand All @@ -586,12 +565,11 @@ def all_charts_write_html_file(self, filename="splink_charts.html", overwrite=Fa
vegalite_version="3.3.0",
vegaembed_version="4",
spec1=c1,
spec2=c7,
spec2=c6,
spec3=c2,
spec4=c3,
spec5=c5,
spec6=c4,
spec7=c6
spec5=c4,
spec7=c5
)
)

Expand Down

0 comments on commit e59f56d

Please sign in to comment.