From fca8213bbf844bf111f8caf0f91944c7338aeaa5 Mon Sep 17 00:00:00 2001 From: Sam Lindsay Date: Fri, 31 Jul 2020 14:32:52 +0000 Subject: [PATCH 1/4] bayes factor charts --- splink/chart_definitions.py | 208 +++++++++++++++++------------------- splink/params.py | 57 +++++----- 2 files changed, 125 insertions(+), 140 deletions(-) diff --git a/splink/chart_definitions.py b/splink/chart_definitions.py index 59ea265e3e..2a8d54d3ea 100644 --- a/splink/chart_definitions.py +++ b/splink/chart_definitions.py @@ -222,68 +222,49 @@ "$schema": "https://vega.github.io/schema/vega-lite/v3.4.0.json", } -adjustment_weight_chart_def = { +bayes_factor_chart_def = { "config": { "view": {"width": 400, "height": 300}, "mark": {"tooltip": None}, "title": {"anchor": "middle"}, }, "data": {"values": None}, - "mark": "bar", + "mark": {"type": "bar", "clip": True}, "encoding": { "color": { "type": "quantitative", - "field": "normalised_adjustment", + "field": "logk", + "title": "log2(K)", "scale": { - "domain": [-0.5, -0.4, 0, 0.4, 0.5], - "range": ["red", "orange", "green", "orange", "red"], + "scheme": "redyellowgreen" + #"domain": [-10, -7, 0, 7, 10], + #"range": ["red", "orange", "green", "orange", "red"], }, }, "row": {"type": "nominal", "field": "column", "sort": {"field": "column"}}, "tooltip": [ {"type": "nominal", "field": "column"}, - {"type": "quantitative", "field": "normalised_adjustment"}, + {"type": "quantitative", "field": "bayes_factor", "title": "Bayes factor, K"}, + {"type": "quantitative", "field": "logk", "title": "log2(K)"} ], "x": { "type": "quantitative", - "axis": {"title": "Influence on match probabiity."}, - "field": "normalised_adjustment", - "scale": {"domain": [-0.5, 0.5]}, + "axis": {"title": "log2(Bayes factor, K = m/u)", + "values": [-10,-5,0,5,10]}, + "field": "logk", + "scale": {"domain": [-10, 10]}, }, "y": {"type": "nominal", "field": "level"}, }, + "transform": [ + {"calculate": "(log(datum.bayes_factor) / log(2))", "as": "logk"} + ], "height": 50, "resolve": {"scale": {"y": "independent"}}, "title": "Influence of comparison vector values on match probability", "$schema": "https://vega.github.io/schema/vega-lite/v3.4.0.json", } -adjustment_factor_chart_def = { - "config": {"view": {"width": 400, "height": 300}, "mark": {"tooltip": None}}, - "data": {"values": None}, - "mark": "bar", - "encoding": { - "color": { - "type": "quantitative", - "field": "normalised", - "scale": { - "domain": [-0.5, -0.4, 0, 0.4, 0.5], - "range": ["red", "orange", "green", "orange", "red"], - }, - }, - "tooltip": [ - {"type": "nominal", "field": "field"}, - {"type": "quantitative", "field": "normalised"}, - ], - "x": { - "type": "quantitative", - "field": "normalised", - "scale": {"domain": [-0.5, 0.5]}, - }, - "y": {"type": "nominal", "field": "column", "sort": {"field": "gamma"}}, - }, - "$schema": "https://vega.github.io/schema/vega-lite/v3.4.0.json", -} multi_chart_template = """ @@ -319,91 +300,98 @@ """ # pragma: no cover -adjustment_history_chart_def = { - 'hconcat': [{ - 'mark': 'bar', - 'encoding': { - 'color': { - 'type': 'quantitative', - 'field': 'level', - 'legend': {}, - 'scale': {'range': ['red', 'orange', 'green']} - }, - 'tooltip': [ - {'type': 'nominal', 'field': 'column'}, - {'type': 'ordinal', 'field': 'level'}, - {'type': 'quantitative', 'field': 'm'}, - {'type': 'quantitative', 'field': 'u'}, - {'type': 'quantitative', 'field': 'normalised_adjustment'}], - 'x': { - 'type': 'ordinal', - 'field': 'level' - }, - 'y': { - 'type': 'quantitative', - 'axis': {'title': 'Influence on match probability'}, - 'field': 'normalised_adjustment', - 'scale': {'domain': [-0.5, 0.5]} - } - }, +bayes_factor_history_chart_def = { + "hconcat": [{ + "mark": "bar", + "encoding": { + "color": { + "type": "quantitative", + "field": "level", + "scale": {"range": ["red", "orange", "green"]} + }, + "tooltip": [ + {"type": "nominal", "field": "column"}, + {"type": "ordinal", "field": "level"}, + {"type": "quantitative", "field": "m"}, + {"type": "quantitative", "field": "u"}, + {"type": "quantitative", "field": "bayes_factor", "title": "Bayes factor, K"}, + {"type": "quantitative", "field": "logk", "title": "log2(K)"} + ], + "x": {"type": "ordinal", "field": "level"}, + "y": { + "type": "quantitative", + "axis": { + "title": "log2(Bayes factor, K = m/u)", + "values": [-10,-5,-2,-1,0,1,2,5,10] + }, + "field": "logk", + #"scale": {"domain": [-10, 10]}, + } + }, + "height": 150, + "selection": { + "selector190": { + "type": "single", + "on": "mouseover", + "fields": ["level", "column"] + } + }, "transform": [ + {"calculate": "(log(datum.bayes_factor) / log(2))", "as": "logk"}, {"filter": "(datum.final === true)"} ], - 'width': 100, - 'height': 150, - 'selection': { - 'selector190': {'type': 'single', 'on': 'mouseover', 'fields': ['level', 'column']} - } + "width": 100 }, { - 'layer': [{ - 'mark': 'line', - 'height': 150, - 'encoding': { - 'color': { - 'type': 'quantitative', - 'field': 'level', - 'legend': {'type': 'symbol', 'tickCount':2}, - 'scale': {'range': ['red', 'orange', 'green']} - }, - 'opacity': { - 'condition': { - 'value': 0.8, - 'selection': {'not': 'selector190'} + "layer": [ + { + "mark": "line", + "encoding": { + "color": { + "type": "quantitative", + "field": "level", + "legend": {"tickCount": 2, "type": "symbol"}, + "scale": {"range": ["red", "orange", "green"]} }, - 'value': 1 - }, - 'size': { - 'condition': { - 'value': 3, - 'selection': {'not': 'selector190'} + "opacity": { + "condition": {"selection": {"not": "selector190"}, "value": 0.8}, + "value": 1 + }, + "size": { + "condition": {"selection": {"not": "selector190"}, "value": 3}, + "value": 5 + }, + "tooltip": [ + {"type": "nominal", "field": "column"}, + {"type": "quantitative", "field": "iteration"}, + {"type": "ordinal", "field": "level"}, + {"type": "quantitative", "field": "m"}, + {"type": "quantitative", "field": "u"}, + {"type": "quantitative", "field": "bayes_factor", "title": "Bayes factor, K"}, + {"type": "quantitative", "field": "logk", "title": "log2(K)"} + ], + "x": { + "type": "ordinal", + "axis": {"title": "Iteration"}, + "field": "iteration" }, - 'value': 5 + "y": { + "type": "quantitative", + "axis": { + "title": "log2(Bayes factor, K = m/u)", + "values": [-10,-5,-2,-1,0,1,2,5,10] + }, + "field": "logk", + #"scale": {"domain": [-10, 10]}, + } }, - 'tooltip': [ - {'type': 'nominal', 'field': 'column'}, - {'type': 'quantitative', 'field': 'iteration'}, - {'type': 'ordinal', 'field': 'level'}, - {'type': 'quantitative', 'field': 'm'}, - {'type': 'quantitative', 'field': 'u'}, - {'type': 'quantitative', 'field': 'normalised_adjustment'} + "transform": [ + {"calculate": "(log(datum.bayes_factor) / log(2))", "as": "logk"} ], - 'x': { - 'type': 'ordinal', - 'axis': {'title': 'Iteration'}, - 'field': 'iteration' - }, - 'y': { - 'type': 'quantitative', - 'axis': {'title': 'Influence on match probability'}, - 'field': 'normalised_adjustment', - 'scale': {'domain': [-0.5, 0.5]} - } + "height": 150 } - } - ] - } - ], + ], + }], 'title': {'text': None, 'orient': 'top', 'dx': 200}, 'data': {'values': None} } \ No newline at end of file diff --git a/splink/params.py b/splink/params.py index 9071943fd8..71afc3346a 100644 --- a/splink/params.py +++ b/splink/params.py @@ -17,8 +17,8 @@ probability_distribution_chart, gamma_distribution_chart_def, ll_iteration_chart_def, - adjustment_weight_chart_def, - adjustment_history_chart_def, + bayes_factor_chart_def, + bayes_factor_history_chart_def, multi_chart_template, ) from .check_types import check_types @@ -173,10 +173,10 @@ def _convert_params_dict_to_dataframe(params, iteration_num=None): data.append(this_row) return data - def _convert_params_dict_to_normalised_adjustment_data(self): + def _convert_params_dict_to_bayes_factor_data(self): """ Get the data needed for a chart that shows which comparison - vector values have the greatest effect on matc probability + vector values have the greatest effect on match probability """ data = [] # Want to compare the u and m probabilities @@ -195,21 +195,19 @@ def _convert_params_dict_to_normalised_adjustment_data(self): row["u"] = this_gamma["prob_dist_non_match"][level]["probability"] row["level_proportion"] = row["m"]*lam + row["u"]*(1-lam) try: - row["adjustment"] = row["m"] / (row["m"] + row["u"]) - row["normalised_adjustment"] = row["adjustment"] - 0.5 + row["bayes_factor"] = row["m"] / row["u"] except ZeroDivisionError: - row["adjustment"] = None - row["normalised_adjustment"] = None + row["bayes_factor"] = None data.append(row) return data - def _convert_params_dict_to_normalised_adjustment_iteration_history(self): + def _convert_params_dict_to_bayes_factor_iteration_history(self): """ Get the data needed for a chart that shows which comparison vector values have the greatest effect on match probability """ - adj_data = [] + data = [] pi = gk = self.params["π"] gk = list(pi.keys()) @@ -229,18 +227,17 @@ def _convert_params_dict_to_normalised_adjustment_iteration_history(self): row["m"] = this_gamma["prob_dist_match"][level]["probability"] row["u"] = this_gamma["prob_dist_non_match"][level]["probability"] try: - row["adjustment"] = row["m"] / (row["m"] + row["u"]) - row["normalised_adjustment"] = row["adjustment"] - 0.5 + row["bayes_factor"] = row["m"] / row["u"] except ZeroDivisionError: - row["adjustment"] = None - row["normalised_adjustment"] = None + row["bayes_factor"] = None + if it_num == len(self.param_history)-1: row["final"]=True else: row["final"]=False - adj_data.append(row) - return adj_data + data.append(row) + return data def _iteration_history_df_gammas(self): data = [] @@ -465,7 +462,7 @@ def gamma_distribution_chart(self): # pragma: no cover If altair is installed, returns the chart Otherwise will return the chart spec as a dictionary """ - data = self._convert_params_dict_to_normalised_adjustment_data() + data = self._convert_params_dict_to_bayes_factor_data() gamma_distribution_chart_def["data"]["values"] = data @@ -474,21 +471,21 @@ def gamma_distribution_chart(self): # pragma: no cover else: return gamma_distribution_chart_def - def adjustment_factor_chart(self): # pragma: no cover + def bayes_factor_chart(self): # pragma: no cover """ If altair is installed, returns the chart Otherwise will return the chart spec as a dictionary """ - data = self._convert_params_dict_to_normalised_adjustment_data() + data = self._convert_params_dict_to_bayes_factor_data() - adjustment_weight_chart_def["data"]["values"] = data + bayes_factor_chart_def["data"]["values"] = data if altair_installed: - return alt.Chart.from_dict(adjustment_weight_chart_def) + return alt.Chart.from_dict(bayes_factor_chart_def) else: - return adjustment_weight_chart_def + return bayes_factor_chart_def - def adjustment_factor_history_charts(self): + def bayes_factor_history_charts(self): """ If altair is installed, returns the chart Otherwise will return the chart spec as a dictionary @@ -497,7 +494,7 @@ def adjustment_factor_history_charts(self): chart_defs = [] # Full iteration history - data = self._convert_params_dict_to_normalised_adjustment_iteration_history() + data = self._convert_params_dict_to_bayes_factor_iteration_history() # Create charts for each column for col_dict in self.settings["comparison_columns"]: @@ -508,7 +505,7 @@ def adjustment_factor_history_charts(self): elif "custom_name" in col_dict: col_name = col_dict["custom_name"] - chart_def = copy.deepcopy(adjustment_history_chart_def) + chart_def = copy.deepcopy(bayes_factor_history_chart_def) # Assign iteration history to values of chart_def chart_def["data"]["values"] = [d for d in data if d['column']==col_name] chart_def["title"]["text"] = col_name @@ -519,7 +516,7 @@ def adjustment_factor_history_charts(self): "config": { "view": {"width": 400, "height": 120}, }, - "title": {"text":"Influence factors iteration history", "anchor": "middle"}, + "title": {"text":"Bayes factor iteration history", "anchor": "middle"}, "vconcat": chart_defs, "resolve": {"scale":{"color": "independent"}}, '$schema': 'https://vega.github.io/schema/vega-lite/v4.8.1.json' @@ -541,7 +538,7 @@ def all_charts_write_html_file(self, filename="splink_charts.html", overwrite=Fa if altair_installed: c1 = self.probability_distribution_chart().to_json(indent=None) - c2 = self.adjustment_factor_chart().to_json(indent=None) + c2 = self.bayes_factor_chart().to_json(indent=None) c3 = self.lambda_iteration_chart().to_json(indent=None) c4 = self.pi_iteration_chart().to_json(indent=None) @@ -550,7 +547,7 @@ def all_charts_write_html_file(self, filename="splink_charts.html", overwrite=Fa else: c5 = "" - c6 = self.adjustment_factor_history_charts().to_json(indent=None) + c6 = self.bayes_factor_history_charts().to_json(indent=None) c7 = self.gamma_distribution_chart().to_json(indent=None) with open(filename, "w") as f: @@ -570,7 +567,7 @@ def all_charts_write_html_file(self, filename="splink_charts.html", overwrite=Fa ) else: c1 = json.dumps(self.probability_distribution_chart()) - c2 = json.dumps(self.adjustment_factor_chart()) + c2 = json.dumps(self.bayes_factor_chart()) c3 = json.dumps(self.lambda_iteration_chart()) c4 = json.dumps(self.pi_iteration_chart()) @@ -579,7 +576,7 @@ def all_charts_write_html_file(self, filename="splink_charts.html", overwrite=Fa else: c5 = "" - c6 = json.dumps(self.adjustment_factor_history_charts()) + c6 = json.dumps(self.bayes_factor_history_charts()) c7 = json.dumps(self.gamma_distribution_chart()) with open(filename, "w") as f: From 1afd6f68cc857b3643ad790f2dcd3985b6558846 Mon Sep 17 00:00:00 2001 From: Sam Lindsay Date: Mon, 3 Aug 2020 10:48:47 +0000 Subject: [PATCH 2/4] Fix intuition report/chart --- splink/chart_definitions.py | 2 -- splink/intuition.py | 43 ++++++++++++++++++++----------------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/splink/chart_definitions.py b/splink/chart_definitions.py index 2a8d54d3ea..d05ae9b7d7 100644 --- a/splink/chart_definitions.py +++ b/splink/chart_definitions.py @@ -265,8 +265,6 @@ "$schema": "https://vega.github.io/schema/vega-lite/v3.4.0.json", } - - multi_chart_template = """ diff --git a/splink/intuition.py b/splink/intuition.py index 9ae353cbb4..8cc8ad94b5 100644 --- a/splink/intuition.py +++ b/splink/intuition.py @@ -1,4 +1,4 @@ -from .chart_definitions import adjustment_factor_chart_def +from .chart_definitions import bayes_factor_chart_def from .params import Params altair_installed = True @@ -18,9 +18,9 @@ {col_name}_r: {value_r} Comparison has {num_levels} levels 𝛾 for this comparison = {gamma_col_name} = {gamma_value} -Amongst matches, P(𝛾 = {prob_m}): -Amongst non matches, P(𝛾 = {prob_nm}): -Adjustment factor = p1/(p1 + p2) = {adj} +Amongst matches, m = P(𝛾|match) = {prob_m} +Amongst non matches, u = P(𝛾|non-match) = {prob_nm} +Bayes factor = m/u = {bf} New probability of match (updated belief): {updated_belief} """ @@ -71,15 +71,14 @@ def intuition_report(row_dict:dict, params:Params): d["prob_m"] = float(row_dict[f"prob_{gk}_match"]) d["prob_nm"] = float(row_dict[f"prob_{gk}_non_match"]) - d["adj"] = d["prob_m"]/(d["prob_m"] + d["prob_nm"]) + d["bf"] = d["prob_m"]/d["prob_nm"] - # Update beleif - adj = d["adj"] + # Update belief + bf = d["bf"] current_prob = d["current_p"] - a = adj*current_prob - b = (1-adj) * (1-current_prob) - new_p = a/(a+b) + a = bf*current_prob + new_p = a/(a + (1-current_prob)) d["updated_belief"] = new_p d["current_p"] = new_p @@ -91,36 +90,40 @@ def intuition_report(row_dict:dict, params:Params): return report -def _get_adjustment_factors(row_dict, params): +def _get_bayes_factors(row_dict, params): pi = params.params["π"] gamma_keys = pi.keys() # gamma_0, gamma_1 etc. - adjustment_factors = [] + bayes_factors = [] for gk in gamma_keys: col_params = pi[gk] - col_name = col_params["column_name"] + column = col_params["column_name"] prob_m = float(row_dict[f"prob_{gk}_match"]) prob_nm = float(row_dict[f"prob_{gk}_non_match"]) - adj = prob_m/(prob_m + prob_nm) + bf = prob_m/prob_nm - adjustment_factors.append({"gamma": gk,"col_name": col_name, "value": adj, "normalised": adj-0.5}) + bayes_factors.append({"gamma": gk,"column": column, "bayes_factor": bf}) - return adjustment_factors + return bayes_factors -def adjustment_factor_chart(row_dict, params): +def bayes_factor_chart(row_dict, params): - adjustment_factor_chart_def["data"]["values"] = _get_adjustment_factors(row_dict, params) + bayes_factor_chart_def["data"]["values"] = _get_bayes_factors(row_dict, params) + bayes_factor_chart_def["encoding"]["y"]["field"] = "column" + bayes_factor_chart_def["encoding"]["color"]["scale"]["domain"] = [-10,10] + del bayes_factor_chart_def["encoding"]["row"] + del bayes_factor_chart_def["height"] if altair_installed: - return alt.Chart.from_dict(adjustment_factor_chart_def) + return alt.Chart.from_dict(bayes_factor_chart_def) else: - return adjustment_factor_chart_def + return bayes_factor_chart_def From 55292ead6d68af3b9bff9bb97a3f8671e4cb22c7 Mon Sep 17 00:00:00 2001 From: Sam Lindsay Date: Mon, 3 Aug 2020 11:13:44 +0000 Subject: [PATCH 3/4] Set midpoint of colour scale --- splink/chart_definitions.py | 3 ++- splink/intuition.py | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/splink/chart_definitions.py b/splink/chart_definitions.py index d05ae9b7d7..7eee6dbd7d 100644 --- a/splink/chart_definitions.py +++ b/splink/chart_definitions.py @@ -236,7 +236,8 @@ "field": "logk", "title": "log2(K)", "scale": { - "scheme": "redyellowgreen" + "scheme": "redyellowgreen", + "domainMid": 0.0, #"domain": [-10, -7, 0, 7, 10], #"range": ["red", "orange", "green", "orange", "red"], }, diff --git a/splink/intuition.py b/splink/intuition.py index 8cc8ad94b5..3cf0afbde6 100644 --- a/splink/intuition.py +++ b/splink/intuition.py @@ -118,7 +118,6 @@ def bayes_factor_chart(row_dict, params): bayes_factor_chart_def["data"]["values"] = _get_bayes_factors(row_dict, params) bayes_factor_chart_def["encoding"]["y"]["field"] = "column" - bayes_factor_chart_def["encoding"]["color"]["scale"]["domain"] = [-10,10] del bayes_factor_chart_def["encoding"]["row"] del bayes_factor_chart_def["height"] From e59f56d1a7e8f79b05d786cf1ff31430dea07d1b Mon Sep 17 00:00:00 2001 From: Sam Lindsay Date: Tue, 4 Aug 2020 09:50:37 +0000 Subject: [PATCH 4/4] Remove pi_iteration_chart + tweak axis titles --- pyproject.toml | 2 +- splink/chart_definitions.py | 91 ++++++------------------------------- splink/params.py | 50 ++++++-------------- 3 files changed, 30 insertions(+), 113 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 362a6670b3..d06c372340 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "splink" -version = "0.2.2" +version = "0.2.3" description = "[Beta]: Implementation in Apache Spark of the EM algorithm to estimate parameters of Fellegi-Sunter's canonical model of record linkage." authors = ["Robin Linacre ", "Sam Lindsay", "Theodore Manassis"] license = "MIT" diff --git a/splink/chart_definitions.py b/splink/chart_definitions.py index 7eee6dbd7d..9d54d1800d 100644 --- a/splink/chart_definitions.py +++ b/splink/chart_definitions.py @@ -1,73 +1,3 @@ -pi_iteration_chart_def = { - "config": { - "view": {"width": 400, "height": 300}, # pragma: no cover - "mark": {"tooltip": None}, - "title": {"anchor": "middle"}, - }, - "hconcat": [ - { - "mark": "bar", - "encoding": { - "color": {"type": "nominal", "field": "value"}, - "row": { - "type": "nominal", - "field": "column", - "sort": {"field": "gamma"}, - }, - "tooltip": [ - {"type": "quantitative", "field": "probability"}, - {"type": "ordinal", "field": "iteration"}, - {"type": "nominal", "field": "column"}, - {"type": "nominal", "field": "value"}, - ], - "x": {"type": "ordinal", "field": "iteration"}, - "y": { - "type": "quantitative", - "aggregate": "sum", - "axis": {"title": "𝛾 value"}, - "field": "probability", - }, - }, - "height": 150, - "resolve": {"scale": {"y": "independent"}}, - "title": "Non Match", - "transform": [{"filter": "(datum.match === 0)"}], - }, - { - "mark": "bar", - "encoding": { - "color": {"type": "nominal", "field": "value"}, - "row": { - "type": "nominal", - "field": "column", - "sort": {"field": "gamma"}, - }, - "tooltip": [ - {"type": "quantitative", "field": "probability"}, - {"type": "ordinal", "field": "iteration"}, - {"type": "nominal", "field": "column"}, - {"type": "nominal", "field": "value"}, - ], - "x": {"type": "ordinal", "field": "iteration"}, - "y": { - "type": "quantitative", - "aggregate": "sum", - "axis": {"title": "𝛾 value"}, - "field": "probability", - }, - }, - "height": 150, - "resolve": {"scale": {"y": "independent"}}, - "title": "Match", - "transform": [{"filter": "(datum.match === 1)"}], - }, - ], - "data": {"values": None}, - "title": "Probability distribution of comparison vector values by iteration number", - "$schema": "https://vega.github.io/schema/vega-lite/v3.4.0.json", -} - - lambda_iteration_chart_def = { "config": { "view": {"width": 400, "height": 300}, @@ -141,7 +71,11 @@ {"type": "quantitative", "field": "probability", "format": ".4f"}, {"type": "ordinal", "field": "value"}, ], - "x": {"type": "quantitative", "field": "probability"}, + "x": { + "type": "quantitative", + "field": "probability", + "axis": {"title": "proportion of non-matches"} + }, "y": { "type": "nominal", "axis": {"title": "𝛾 value"}, @@ -171,7 +105,11 @@ {"type": "quantitative", "field": "probability", "format": ".4f"}, {"type": "ordinal", "field": "value"}, ], - "x": {"type": "quantitative", "field": "probability"}, + "x": { + "type": "quantitative", + "field": "probability", + "axis": {"title": "proportion of matches"} + }, "y": { "type": "nominal", "axis": {"title": "𝛾 value"}, @@ -207,7 +145,11 @@ {"type": "quantitative", "field": "level_proportion", "format": ".4f"}, {"type": "ordinal", "field": "level"}, ], - "x": {"type": "quantitative", "field": "level_proportion"}, + "x": { + "type": "quantitative", + "field": "level_proportion", + "axis": {"title": "proportion of comparisons"} + }, "y": { "type": "nominal", "axis": {"title": "𝛾 value"}, @@ -281,8 +223,6 @@

-
-
@@ -293,7 +233,6 @@ vegaEmbed('#vis4', {spec4}).catch(console.error); vegaEmbed('#vis5', {spec5}).catch(console.error); vegaEmbed('#vis6', {spec6}).catch(console.error); - vegaEmbed('#vis7', {spec7}).catch(console.error); diff --git a/splink/params.py b/splink/params.py index 71afc3346a..cc223d9f03 100644 --- a/splink/params.py +++ b/splink/params.py @@ -13,7 +13,6 @@ from .validate import _get_default_value from .chart_definitions import ( lambda_iteration_chart_def, - pi_iteration_chart_def, probability_distribution_chart, gamma_distribution_chart_def, ll_iteration_chart_def, @@ -399,23 +398,6 @@ def field_value_to_probs(fv): print(field) field_value_to_probs(value) - - def pi_iteration_chart(self): # pragma: no cover - - if self.real_params: - data = self._iteration_history_df_gammas() - data_real = self._convert_params_dict_to_dataframe(self.real_params, "real_param") - data.extend(data_real) - else: - data = self._iteration_history_df_gammas() - - pi_iteration_chart_def["data"]["values"] = data - - if altair_installed: - return alt.Chart.from_dict(pi_iteration_chart_def) - else: - return pi_iteration_chart_def - def lambda_iteration_chart(self): # pragma: no cover data = self._iteration_history_df_lambdas() if self.real_params: @@ -540,15 +522,14 @@ def all_charts_write_html_file(self, filename="splink_charts.html", overwrite=Fa c1 = self.probability_distribution_chart().to_json(indent=None) c2 = self.bayes_factor_chart().to_json(indent=None) c3 = self.lambda_iteration_chart().to_json(indent=None) - c4 = self.pi_iteration_chart().to_json(indent=None) if self.log_likelihood_exists: - c5 = self.ll_iteration_chart().to_json(indent=None) + c4 = self.ll_iteration_chart().to_json(indent=None) else: - c5 = "" + c4 = "" - c6 = self.bayes_factor_history_charts().to_json(indent=None) - c7 = self.gamma_distribution_chart().to_json(indent=None) + c5 = self.bayes_factor_history_charts().to_json(indent=None) + c6 = self.gamma_distribution_chart().to_json(indent=None) with open(filename, "w") as f: f.write( @@ -557,27 +538,25 @@ def all_charts_write_html_file(self, filename="splink_charts.html", overwrite=Fa vegalite_version=alt.VEGALITE_VERSION, vegaembed_version=alt.VEGAEMBED_VERSION, spec1=c1, - spec2=c7, + spec2=c6, spec3=c2, spec4=c3, - spec5=c5, - spec6=c4, - spec7=c6 + spec5=c4, + spec6=c5 ) ) else: c1 = json.dumps(self.probability_distribution_chart()) c2 = json.dumps(self.bayes_factor_chart()) c3 = json.dumps(self.lambda_iteration_chart()) - c4 = json.dumps(self.pi_iteration_chart()) if self.log_likelihood_exists: - c5 = json.dumps(self.ll_iteration_chart()) + c4 = json.dumps(self.ll_iteration_chart()) else: - c5 = "" + c4 = "" - c6 = json.dumps(self.bayes_factor_history_charts()) - c7 = json.dumps(self.gamma_distribution_chart()) + c5 = json.dumps(self.bayes_factor_history_charts()) + c6 = json.dumps(self.gamma_distribution_chart()) with open(filename, "w") as f: f.write( @@ -586,12 +565,11 @@ def all_charts_write_html_file(self, filename="splink_charts.html", overwrite=Fa vegalite_version="3.3.0", vegaembed_version="4", spec1=c1, - spec2=c7, + spec2=c6, spec3=c2, spec4=c3, - spec5=c5, - spec6=c4, - spec7=c6 + spec5=c4, + spec7=c5 ) )