From fca8213bbf844bf111f8caf0f91944c7338aeaa5 Mon Sep 17 00:00:00 2001
From: Sam Lindsay <sam.lindsay@digital.justice.gov.uk>
Date: Fri, 31 Jul 2020 14:32:52 +0000
Subject: [PATCH 1/4] bayes factor charts

---
 splink/chart_definitions.py | 208 +++++++++++++++++-------------------
 splink/params.py            |  57 +++++-----
 2 files changed, 125 insertions(+), 140 deletions(-)
diff --git a/splink/chart_definitions.py b/splink/chart_definitions.py
index 59ea265e3e..2a8d54d3ea 100644
--- a/splink/chart_definitions.py
+++ b/splink/chart_definitions.py
@@ -222,68 +222,49 @@
     "$schema": "https://vega.github.io/schema/vega-lite/v3.4.0.json",
 }
 
-adjustment_weight_chart_def = {
+bayes_factor_chart_def = {
     "config": {
         "view": {"width": 400, "height": 300},
         "mark": {"tooltip": None},
         "title": {"anchor": "middle"},
     },
     "data": {"values": None},
-    "mark": "bar",
+    "mark": {"type": "bar", "clip": True},
     "encoding": {
         "color": {
             "type": "quantitative",
-            "field": "normalised_adjustment",
+            "field": "logk",
+            "title": "log2(K)",
             "scale": {
-                "domain": [-0.5, -0.4, 0, 0.4, 0.5],
-                "range": ["red", "orange", "green", "orange", "red"],
+                "scheme": "redyellowgreen"
+                #"domain": [-10, -7, 0, 7, 10],
+                #"range": ["red", "orange", "green", "orange", "red"],
             },
         },
         "row": {"type": "nominal", "field": "column", "sort": {"field": "column"}},
         "tooltip": [
             {"type": "nominal", "field": "column"},
-            {"type": "quantitative", "field": "normalised_adjustment"},
+            {"type": "quantitative", "field": "bayes_factor", "title": "Bayes factor, K"},
+            {"type": "quantitative", "field": "logk", "title": "log2(K)"}
         ],
         "x": {
             "type": "quantitative",
-            "axis": {"title": "Influence on match probabiity."},
-            "field": "normalised_adjustment",
-            "scale": {"domain": [-0.5, 0.5]},
+            "axis": {"title": "log2(Bayes factor, K = m/u)", 
+                     "values": [-10,-5,0,5,10]},
+            "field": "logk",
+            "scale": {"domain": [-10, 10]},
         },
         "y": {"type": "nominal", "field": "level"},
     },
+    "transform": [
+        {"calculate": "(log(datum.bayes_factor) / log(2))", "as": "logk"}
+    ],
     "height": 50,
     "resolve": {"scale": {"y": "independent"}},
     "title": "Influence of comparison vector values on match probability",
     "$schema": "https://vega.github.io/schema/vega-lite/v3.4.0.json",
 }
 
-adjustment_factor_chart_def = {
-    "config": {"view": {"width": 400, "height": 300}, "mark": {"tooltip": None}},
-    "data": {"values": None},
-    "mark": "bar",
-    "encoding": {
-        "color": {
-            "type": "quantitative",
-            "field": "normalised",
-            "scale": {
-                "domain": [-0.5, -0.4, 0, 0.4, 0.5],
-                "range": ["red", "orange", "green", "orange", "red"],
-            },
-        },
-        "tooltip": [
-            {"type": "nominal", "field": "field"},
-            {"type": "quantitative", "field": "normalised"},
-        ],
-        "x": {
-            "type": "quantitative",
-            "field": "normalised",
-            "scale": {"domain": [-0.5, 0.5]},
-        },
-        "y": {"type": "nominal", "field": "column", "sort": {"field": "gamma"}},
-    },
-    "$schema": "https://vega.github.io/schema/vega-lite/v3.4.0.json",
-}
 
 
 multi_chart_template = """
@@ -319,91 +300,98 @@
 </html>
 """  # pragma: no cover
 
-adjustment_history_chart_def = {
-    'hconcat': [{
-        'mark': 'bar',
-         'encoding': {
-             'color': {
-                 'type': 'quantitative',
-                 'field': 'level',
-                 'legend': {},
-                 'scale': {'range': ['red', 'orange', 'green']}
-             },
-             'tooltip': [
-                 {'type': 'nominal', 'field': 'column'},
-                 {'type': 'ordinal', 'field': 'level'},
-                 {'type': 'quantitative', 'field': 'm'},
-                 {'type': 'quantitative', 'field': 'u'},
-                 {'type': 'quantitative', 'field': 'normalised_adjustment'}],
-             'x': {
-                 'type': 'ordinal', 
-                 'field': 'level'
-             },
-             'y': {
-                 'type': 'quantitative',
-                 'axis': {'title': 'Influence on match probability'},
-                 'field': 'normalised_adjustment',
-                 'scale': {'domain': [-0.5, 0.5]}
-             }
-         },
+bayes_factor_history_chart_def = {
+    "hconcat": [{
+        "mark": "bar",
+        "encoding": {
+            "color": {
+                "type": "quantitative",
+                "field": "level",
+                "scale": {"range": ["red", "orange", "green"]}
+            },
+            "tooltip": [
+                {"type": "nominal", "field": "column"},
+                {"type": "ordinal", "field": "level"},
+                {"type": "quantitative", "field": "m"},
+                {"type": "quantitative", "field": "u"},
+                {"type": "quantitative", "field": "bayes_factor", "title": "Bayes factor, K"},
+                {"type": "quantitative", "field": "logk", "title": "log2(K)"}
+            ],
+            "x": {"type": "ordinal", "field": "level"},
+            "y": {
+                "type": "quantitative",
+                "axis": {
+                    "title": "log2(Bayes factor, K = m/u)",
+                    "values": [-10,-5,-2,-1,0,1,2,5,10]
+                },
+                "field": "logk",
+                #"scale": {"domain": [-10, 10]},
+            }
+        },
+        "height": 150,
+        "selection": {
+            "selector190": {
+                "type": "single",
+                "on": "mouseover",
+                "fields": ["level", "column"]
+            }
+        },
         "transform": [
+            {"calculate": "(log(datum.bayes_factor) / log(2))", "as": "logk"},
             {"filter": "(datum.final === true)"}
         ],
-        'width': 100,
-        'height': 150,
-        'selection': {
-            'selector190': {'type': 'single', 'on': 'mouseover', 'fields': ['level', 'column']}
-        }
+        "width": 100
     },
         {
-            'layer': [{
-                'mark': 'line',
-                'height': 150,
-                'encoding': {
-                    'color': {
-                        'type': 'quantitative',
-                        'field': 'level',
-                        'legend': {'type': 'symbol', 'tickCount':2},
-                        'scale': {'range': ['red', 'orange', 'green']}
-                    },
-                    'opacity': {
-                        'condition': {
-                            'value': 0.8,
-                            'selection': {'not': 'selector190'}
+            "layer": [
+                {
+                    "mark": "line",
+                    "encoding": {
+                        "color": {
+                            "type": "quantitative",
+                            "field": "level",
+                            "legend": {"tickCount": 2, "type": "symbol"},
+                            "scale": {"range": ["red", "orange", "green"]}
                         },
-                        'value': 1 
-                    },
-                    'size': {
-                        'condition': {
-                            'value': 3, 
-                            'selection': {'not': 'selector190'}
+                        "opacity": {
+                            "condition": {"selection": {"not": "selector190"}, "value": 0.8},
+                            "value": 1
+                        },
+                        "size": {
+                            "condition": {"selection": {"not": "selector190"}, "value": 3},
+                            "value": 5
+                        },
+                        "tooltip": [
+                            {"type": "nominal", "field": "column"},
+                            {"type": "quantitative", "field": "iteration"},
+                            {"type": "ordinal", "field": "level"},
+                            {"type": "quantitative", "field": "m"},
+                            {"type": "quantitative", "field": "u"},
+                            {"type": "quantitative", "field": "bayes_factor", "title": "Bayes factor, K"},
+                            {"type": "quantitative", "field": "logk", "title": "log2(K)"}
+                        ],
+                        "x": {
+                            "type": "ordinal",
+                            "axis": {"title": "Iteration"},
+                            "field": "iteration"
                         },
-                        'value': 5
+                        "y": {
+                            "type": "quantitative",
+                            "axis": {
+                                "title": "log2(Bayes factor, K = m/u)",
+                                "values": [-10,-5,-2,-1,0,1,2,5,10]
+                            },
+                            "field": "logk",
+                            #"scale": {"domain": [-10, 10]},
+                        }
                     },
-                    'tooltip': [
-                        {'type': 'nominal', 'field': 'column'},
-                        {'type': 'quantitative', 'field': 'iteration'},
-                        {'type': 'ordinal', 'field': 'level'},
-                        {'type': 'quantitative', 'field': 'm'},
-                        {'type': 'quantitative', 'field': 'u'},
-                        {'type': 'quantitative', 'field': 'normalised_adjustment'}
+                    "transform": [
+                        {"calculate": "(log(datum.bayes_factor) / log(2))", "as": "logk"}
                     ],
-                    'x': {
-                        'type': 'ordinal',     
-                        'axis': {'title': 'Iteration'},
-                        'field': 'iteration'
-                    },
-                    'y': {
-                        'type': 'quantitative',
-                        'axis': {'title': 'Influence on match probability'},
-                        'field': 'normalised_adjustment',
-                        'scale': {'domain': [-0.5, 0.5]}
-                    }
+                    "height": 150
                 }
-            }
-            ]
-        }
-    ],
+            ],
+        }],
     'title': {'text': None, 'orient': 'top', 'dx': 200},
     'data': {'values': None}
 }
\ No newline at end of file
diff --git a/splink/params.py b/splink/params.py
index 9071943fd8..71afc3346a 100644
--- a/splink/params.py
+++ b/splink/params.py
@@ -17,8 +17,8 @@
     probability_distribution_chart,
     gamma_distribution_chart_def,
     ll_iteration_chart_def,
-    adjustment_weight_chart_def,
-    adjustment_history_chart_def,
+    bayes_factor_chart_def,
+    bayes_factor_history_chart_def,
     multi_chart_template,
 )
 from .check_types import check_types
@@ -173,10 +173,10 @@ def _convert_params_dict_to_dataframe(params, iteration_num=None):
                 data.append(this_row)
         return data
 
-    def _convert_params_dict_to_normalised_adjustment_data(self):
+    def _convert_params_dict_to_bayes_factor_data(self):
         """
         Get the data needed for a chart that shows which comparison
-        vector values have the greatest effect on matc probability
+        vector values have the greatest effect on match probability
         """
         data = []
         # Want to compare the u and m probabilities
@@ -195,21 +195,19 @@ def _convert_params_dict_to_normalised_adjustment_data(self):
                 row["u"] = this_gamma["prob_dist_non_match"][level]["probability"]
                 row["level_proportion"] = row["m"]*lam + row["u"]*(1-lam)
                 try:
-                    row["adjustment"] = row["m"] / (row["m"] + row["u"])
-                    row["normalised_adjustment"] = row["adjustment"] - 0.5
+                    row["bayes_factor"] = row["m"] / row["u"]
                 except ZeroDivisionError:
-                    row["adjustment"] = None
-                    row["normalised_adjustment"] = None
+                    row["bayes_factor"] = None
 
                 data.append(row)
         return data
     
-    def _convert_params_dict_to_normalised_adjustment_iteration_history(self):
+    def _convert_params_dict_to_bayes_factor_iteration_history(self):
         """
         Get the data needed for a chart that shows which comparison
         vector values have the greatest effect on match probability
         """
-        adj_data = []
+        data = []
 
         pi = gk = self.params["π"]
         gk = list(pi.keys())
@@ -229,18 +227,17 @@ def _convert_params_dict_to_normalised_adjustment_iteration_history(self):
                     row["m"] = this_gamma["prob_dist_match"][level]["probability"]
                     row["u"] = this_gamma["prob_dist_non_match"][level]["probability"]
                     try:
-                        row["adjustment"] = row["m"] / (row["m"] + row["u"])
-                        row["normalised_adjustment"] = row["adjustment"] - 0.5
+                        row["bayes_factor"] = row["m"] / row["u"]
                     except ZeroDivisionError: 
-                        row["adjustment"] = None
-                        row["normalised_adjustment"] = None
+                        row["bayes_factor"] = None
+                    
                     if it_num == len(self.param_history)-1:
                         row["final"]=True
                     else:
                         row["final"]=False
 
-                    adj_data.append(row)
-        return adj_data
+                    data.append(row)
+        return data
 
     def _iteration_history_df_gammas(self):
         data = []
@@ -465,7 +462,7 @@ def gamma_distribution_chart(self):  # pragma: no cover
         If altair is installed, returns the chart
         Otherwise will return the chart spec as a dictionary
         """
-        data = self._convert_params_dict_to_normalised_adjustment_data()
+        data = self._convert_params_dict_to_bayes_factor_data()
 
         gamma_distribution_chart_def["data"]["values"] = data
 
@@ -474,21 +471,21 @@ def gamma_distribution_chart(self):  # pragma: no cover
         else:
             return gamma_distribution_chart_def
 
-    def adjustment_factor_chart(self):  # pragma: no cover
+    def bayes_factor_chart(self):  # pragma: no cover
         """
         If altair is installed, returns the chart
         Otherwise will return the chart spec as a dictionary
         """
-        data = self._convert_params_dict_to_normalised_adjustment_data()
+        data = self._convert_params_dict_to_bayes_factor_data()
 
-        adjustment_weight_chart_def["data"]["values"] = data
+        bayes_factor_chart_def["data"]["values"] = data
 
         if altair_installed:
-            return alt.Chart.from_dict(adjustment_weight_chart_def)
+            return alt.Chart.from_dict(bayes_factor_chart_def)
         else:
-            return adjustment_weight_chart_def
+            return bayes_factor_chart_def
         
-    def adjustment_factor_history_charts(self):
+    def bayes_factor_history_charts(self):
         """
         If altair is installed, returns the chart
         Otherwise will return the chart spec as a dictionary
@@ -497,7 +494,7 @@ def adjustment_factor_history_charts(self):
         chart_defs = []
     
         # Full iteration history
-        data = self._convert_params_dict_to_normalised_adjustment_iteration_history()
+        data = self._convert_params_dict_to_bayes_factor_iteration_history()
     
         # Create charts for each column
         for col_dict in self.settings["comparison_columns"]:
@@ -508,7 +505,7 @@ def adjustment_factor_history_charts(self):
             elif "custom_name" in col_dict:
                 col_name = col_dict["custom_name"] 
            
-            chart_def = copy.deepcopy(adjustment_history_chart_def)
+            chart_def = copy.deepcopy(bayes_factor_history_chart_def)
             # Assign iteration history to values of chart_def
             chart_def["data"]["values"] = [d for d in data if d['column']==col_name]
             chart_def["title"]["text"] = col_name
@@ -519,7 +516,7 @@ def adjustment_factor_history_charts(self):
             "config": {
                 "view": {"width": 400, "height": 120},
             },
-            "title": {"text":"Influence factors iteration history", "anchor": "middle"},
+            "title": {"text":"Bayes factor iteration history", "anchor": "middle"},
             "vconcat": chart_defs,
             "resolve": {"scale":{"color": "independent"}},
             '$schema': 'https://vega.github.io/schema/vega-lite/v4.8.1.json'
@@ -541,7 +538,7 @@ def all_charts_write_html_file(self, filename="splink_charts.html", overwrite=Fa
 
         if altair_installed:
             c1 = self.probability_distribution_chart().to_json(indent=None)
-            c2 = self.adjustment_factor_chart().to_json(indent=None)
+            c2 = self.bayes_factor_chart().to_json(indent=None)
             c3 = self.lambda_iteration_chart().to_json(indent=None)
             c4 = self.pi_iteration_chart().to_json(indent=None)
 
@@ -550,7 +547,7 @@ def all_charts_write_html_file(self, filename="splink_charts.html", overwrite=Fa
             else:
                 c5 = ""
 
-            c6 = self.adjustment_factor_history_charts().to_json(indent=None)
+            c6 = self.bayes_factor_history_charts().to_json(indent=None)
             c7 = self.gamma_distribution_chart().to_json(indent=None)
             
             with open(filename, "w") as f:
@@ -570,7 +567,7 @@ def all_charts_write_html_file(self, filename="splink_charts.html", overwrite=Fa
                 )
         else:
             c1 = json.dumps(self.probability_distribution_chart())
-            c2 = json.dumps(self.adjustment_factor_chart())
+            c2 = json.dumps(self.bayes_factor_chart())
             c3 = json.dumps(self.lambda_iteration_chart())
             c4 = json.dumps(self.pi_iteration_chart())
 
@@ -579,7 +576,7 @@ def all_charts_write_html_file(self, filename="splink_charts.html", overwrite=Fa
             else:
                 c5 = ""
             
-            c6 = json.dumps(self.adjustment_factor_history_charts())
+            c6 = json.dumps(self.bayes_factor_history_charts())
             c7 = json.dumps(self.gamma_distribution_chart())
             
             with open(filename, "w") as f:

From 1afd6f68cc857b3643ad790f2dcd3985b6558846 Mon Sep 17 00:00:00 2001
From: Sam Lindsay <sam.lindsay@digital.justice.gov.uk>
Date: Mon, 3 Aug 2020 10:48:47 +0000
Subject: [PATCH 2/4] Fix intuition report/chart

---
 splink/chart_definitions.py |  2 --
 splink/intuition.py         | 43 ++++++++++++++++++++-----------------
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/splink/chart_definitions.py b/splink/chart_definitions.py
index 2a8d54d3ea..d05ae9b7d7 100644
--- a/splink/chart_definitions.py
+++ b/splink/chart_definitions.py
@@ -265,8 +265,6 @@
     "$schema": "https://vega.github.io/schema/vega-lite/v3.4.0.json",
 }
 
-
-
 multi_chart_template = """
 <!DOCTYPE html>
 <html>
diff --git a/splink/intuition.py b/splink/intuition.py
index 9ae353cbb4..8cc8ad94b5 100644
--- a/splink/intuition.py
+++ b/splink/intuition.py
@@ -1,4 +1,4 @@
-from .chart_definitions import adjustment_factor_chart_def
+from .chart_definitions import bayes_factor_chart_def
 from .params import Params
 
 altair_installed = True
@@ -18,9 +18,9 @@
 {col_name}_r: {value_r}
 Comparison has {num_levels} levels
 𝛾 for this comparison = {gamma_col_name} = {gamma_value}
-Amongst matches, P(𝛾 = {prob_m}):
-Amongst non matches, P(𝛾 = {prob_nm}):
-Adjustment factor = p1/(p1 + p2) = {adj}
+Amongst matches, m = P(𝛾|match) = {prob_m}
+Amongst non matches, u = P(𝛾|non-match) = {prob_nm}
+Bayes factor = m/u = {bf}
 New probability of match (updated belief): {updated_belief}
 """
 
@@ -71,15 +71,14 @@ def intuition_report(row_dict:dict, params:Params):
         d["prob_m"] = float(row_dict[f"prob_{gk}_match"])
         d["prob_nm"] = float(row_dict[f"prob_{gk}_non_match"])
 
-        d["adj"] = d["prob_m"]/(d["prob_m"] + d["prob_nm"])
+        d["bf"] = d["prob_m"]/d["prob_nm"]
 
-        # Update beleif
-        adj = d["adj"]
+        # Update belief
+        bf = d["bf"]
         current_prob = d["current_p"]
 
-        a = adj*current_prob
-        b = (1-adj) * (1-current_prob)
-        new_p = a/(a+b)
+        a = bf*current_prob
+        new_p = a/(a + (1-current_prob))
         d["updated_belief"] = new_p
         d["current_p"] = new_p
 
@@ -91,36 +90,40 @@ def intuition_report(row_dict:dict, params:Params):
 
     return report
 
-def _get_adjustment_factors(row_dict, params):
+def _get_bayes_factors(row_dict, params):
 
     pi = params.params["π"]
 
     gamma_keys = pi.keys() # gamma_0, gamma_1 etc.
 
-    adjustment_factors  = []
+    bayes_factors  = []
 
 
     for gk in gamma_keys:
 
         col_params = pi[gk]
 
-        col_name = col_params["column_name"]
+        column = col_params["column_name"]
 
         prob_m = float(row_dict[f"prob_{gk}_match"])
         prob_nm = float(row_dict[f"prob_{gk}_non_match"])
 
-        adj = prob_m/(prob_m + prob_nm)
+        bf = prob_m/prob_nm
 
-        adjustment_factors.append({"gamma": gk,"col_name": col_name, "value": adj, "normalised": adj-0.5})
+        bayes_factors.append({"gamma": gk,"column": column, "bayes_factor": bf})
 
-    return adjustment_factors
+    return bayes_factors
 
-def adjustment_factor_chart(row_dict, params):
+def bayes_factor_chart(row_dict, params):
 
-    adjustment_factor_chart_def["data"]["values"] = _get_adjustment_factors(row_dict, params)
+    bayes_factor_chart_def["data"]["values"] = _get_bayes_factors(row_dict, params)
+    bayes_factor_chart_def["encoding"]["y"]["field"] = "column"
+    bayes_factor_chart_def["encoding"]["color"]["scale"]["domain"] = [-10,10]
+    del bayes_factor_chart_def["encoding"]["row"]
+    del bayes_factor_chart_def["height"]
 
     if altair_installed:
-        return alt.Chart.from_dict(adjustment_factor_chart_def)
+        return alt.Chart.from_dict(bayes_factor_chart_def)
     else:
-        return adjustment_factor_chart_def
+        return bayes_factor_chart_def
 

From 55292ead6d68af3b9bff9bb97a3f8671e4cb22c7 Mon Sep 17 00:00:00 2001
From: Sam Lindsay <sam.lindsay@digital.justice.gov.uk>
Date: Mon, 3 Aug 2020 11:13:44 +0000
Subject: [PATCH 3/4] Set midpoint of colour scale

---
 splink/chart_definitions.py | 3 ++-
 splink/intuition.py         | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/splink/chart_definitions.py b/splink/chart_definitions.py
index d05ae9b7d7..7eee6dbd7d 100644
--- a/splink/chart_definitions.py
+++ b/splink/chart_definitions.py
@@ -236,7 +236,8 @@
             "field": "logk",
             "title": "log2(K)",
             "scale": {
-                "scheme": "redyellowgreen"
+                "scheme": "redyellowgreen",
+                "domainMid": 0.0,
                 #"domain": [-10, -7, 0, 7, 10],
                 #"range": ["red", "orange", "green", "orange", "red"],
             },
diff --git a/splink/intuition.py b/splink/intuition.py
index 8cc8ad94b5..3cf0afbde6 100644
--- a/splink/intuition.py
+++ b/splink/intuition.py
@@ -118,7 +118,6 @@ def bayes_factor_chart(row_dict, params):
 
     bayes_factor_chart_def["data"]["values"] = _get_bayes_factors(row_dict, params)
     bayes_factor_chart_def["encoding"]["y"]["field"] = "column"
-    bayes_factor_chart_def["encoding"]["color"]["scale"]["domain"] = [-10,10]
     del bayes_factor_chart_def["encoding"]["row"]
     del bayes_factor_chart_def["height"]
 

From e59f56d1a7e8f79b05d786cf1ff31430dea07d1b Mon Sep 17 00:00:00 2001
From: Sam Lindsay <sam.lindsay@digital.justice.gov.uk>
Date: Tue, 4 Aug 2020 09:50:37 +0000
Subject: [PATCH 4/4] Remove pi_iteration_chart + tweak axis titles

---
 pyproject.toml              |  2 +-
 splink/chart_definitions.py | 91 ++++++-------------------------------
 splink/params.py            | 50 ++++++--------------
 3 files changed, 30 insertions(+), 113 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 362a6670b3..d06c372340 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "splink"
-version = "0.2.2"
+version = "0.2.3"
 description = "[Beta]: Implementation in Apache Spark of the EM algorithm to estimate parameters of Fellegi-Sunter's canonical model of record linkage."
 authors = ["Robin Linacre <robinlinacre@hotmail.com>", "Sam Lindsay", "Theodore Manassis"]
 license = "MIT"
diff --git a/splink/chart_definitions.py b/splink/chart_definitions.py
index 7eee6dbd7d..9d54d1800d 100644
--- a/splink/chart_definitions.py
+++ b/splink/chart_definitions.py
@@ -1,73 +1,3 @@
-pi_iteration_chart_def = {
-    "config": {
-        "view": {"width": 400, "height": 300},  # pragma: no cover
-        "mark": {"tooltip": None},
-        "title": {"anchor": "middle"},
-    },
-    "hconcat": [
-        {
-            "mark": "bar",
-            "encoding": {
-                "color": {"type": "nominal", "field": "value"},
-                "row": {
-                    "type": "nominal",
-                    "field": "column",
-                    "sort": {"field": "gamma"},
-                },
-                "tooltip": [
-                    {"type": "quantitative", "field": "probability"},
-                    {"type": "ordinal", "field": "iteration"},
-                    {"type": "nominal", "field": "column"},
-                    {"type": "nominal", "field": "value"},
-                ],
-                "x": {"type": "ordinal", "field": "iteration"},
-                "y": {
-                    "type": "quantitative",
-                    "aggregate": "sum",
-                    "axis": {"title": "𝛾 value"},
-                    "field": "probability",
-                },
-            },
-            "height": 150,
-            "resolve": {"scale": {"y": "independent"}},
-            "title": "Non Match",
-            "transform": [{"filter": "(datum.match === 0)"}],
-        },
-        {
-            "mark": "bar",
-            "encoding": {
-                "color": {"type": "nominal", "field": "value"},
-                "row": {
-                    "type": "nominal",
-                    "field": "column",
-                    "sort": {"field": "gamma"},
-                },
-                "tooltip": [
-                    {"type": "quantitative", "field": "probability"},
-                    {"type": "ordinal", "field": "iteration"},
-                    {"type": "nominal", "field": "column"},
-                    {"type": "nominal", "field": "value"},
-                ],
-                "x": {"type": "ordinal", "field": "iteration"},
-                "y": {
-                    "type": "quantitative",
-                    "aggregate": "sum",
-                    "axis": {"title": "𝛾 value"},
-                    "field": "probability",
-                },
-            },
-            "height": 150,
-            "resolve": {"scale": {"y": "independent"}},
-            "title": "Match",
-            "transform": [{"filter": "(datum.match === 1)"}],
-        },
-    ],
-    "data": {"values": None},
-    "title": "Probability distribution of comparison vector values by iteration number",
-    "$schema": "https://vega.github.io/schema/vega-lite/v3.4.0.json",
-}
-
-
 lambda_iteration_chart_def = {
     "config": {
         "view": {"width": 400, "height": 300},
@@ -141,7 +71,11 @@
                     {"type": "quantitative", "field": "probability", "format": ".4f"},
                     {"type": "ordinal", "field": "value"},
                 ],
-                "x": {"type": "quantitative", "field": "probability"},
+                "x": {
+                    "type": "quantitative", 
+                    "field": "probability",
+                    "axis": {"title": "proportion of non-matches"}
+                },
                 "y": {
                     "type": "nominal",
                     "axis": {"title": "𝛾 value"},
@@ -171,7 +105,11 @@
                     {"type": "quantitative", "field": "probability", "format": ".4f"},
                     {"type": "ordinal", "field": "value"},
                 ],
-                "x": {"type": "quantitative", "field": "probability"},
+                "x": {
+                    "type": "quantitative", 
+                    "field": "probability",
+                    "axis": {"title": "proportion of matches"}
+                },
                 "y": {
                     "type": "nominal",
                     "axis": {"title": "𝛾 value"},
@@ -207,7 +145,11 @@
                     {"type": "quantitative", "field": "level_proportion", "format": ".4f"},
                     {"type": "ordinal", "field": "level"},
                 ],
-                "x": {"type": "quantitative", "field": "level_proportion"},
+                "x": {
+                    "type": "quantitative", 
+                    "field": "level_proportion", 
+                    "axis": {"title": "proportion of comparisons"}
+                },
                 "y": {
                     "type": "nominal",
                     "axis": {"title": "𝛾 value"},
@@ -281,8 +223,6 @@
 <div id="vis4"></div><div id="vis5"></div>
 <br/>
 <div id="vis6"></div>
-<br/>
-<div id="vis7"></div>
 
 
 
@@ -293,7 +233,6 @@
   vegaEmbed('#vis4', {spec4}).catch(console.error);
   vegaEmbed('#vis5', {spec5}).catch(console.error);
   vegaEmbed('#vis6', {spec6}).catch(console.error);
-  vegaEmbed('#vis7', {spec7}).catch(console.error);
 </script>
 </body>
 </html>
diff --git a/splink/params.py b/splink/params.py
index 71afc3346a..cc223d9f03 100644
--- a/splink/params.py
+++ b/splink/params.py
@@ -13,7 +13,6 @@
 from .validate import _get_default_value
 from .chart_definitions import (
     lambda_iteration_chart_def,
-    pi_iteration_chart_def,
     probability_distribution_chart,
     gamma_distribution_chart_def,
     ll_iteration_chart_def,
@@ -399,23 +398,6 @@ def field_value_to_probs(fv):
             print(field)
             field_value_to_probs(value)
 
-
-    def pi_iteration_chart(self):  # pragma: no cover
-
-        if self.real_params:
-            data = self._iteration_history_df_gammas()
-            data_real = self._convert_params_dict_to_dataframe(self.real_params, "real_param")
-            data.extend(data_real)
-        else:
-            data = self._iteration_history_df_gammas()
-
-        pi_iteration_chart_def["data"]["values"] = data
-
-        if altair_installed:
-            return alt.Chart.from_dict(pi_iteration_chart_def)
-        else:
-            return pi_iteration_chart_def
-
     def lambda_iteration_chart(self):  # pragma: no cover
         data = self._iteration_history_df_lambdas()
         if self.real_params:
@@ -540,15 +522,14 @@ def all_charts_write_html_file(self, filename="splink_charts.html", overwrite=Fa
             c1 = self.probability_distribution_chart().to_json(indent=None)
             c2 = self.bayes_factor_chart().to_json(indent=None)
             c3 = self.lambda_iteration_chart().to_json(indent=None)
-            c4 = self.pi_iteration_chart().to_json(indent=None)
 
             if self.log_likelihood_exists:
-                c5 = self.ll_iteration_chart().to_json(indent=None)
+                c4 = self.ll_iteration_chart().to_json(indent=None)
             else:
-                c5 = ""
+                c4 = ""
 
-            c6 = self.bayes_factor_history_charts().to_json(indent=None)
-            c7 = self.gamma_distribution_chart().to_json(indent=None)
+            c5 = self.bayes_factor_history_charts().to_json(indent=None)
+            c6 = self.gamma_distribution_chart().to_json(indent=None)
             
             with open(filename, "w") as f:
                 f.write(
@@ -557,27 +538,25 @@ def all_charts_write_html_file(self, filename="splink_charts.html", overwrite=Fa
                         vegalite_version=alt.VEGALITE_VERSION,
                         vegaembed_version=alt.VEGAEMBED_VERSION,
                         spec1=c1,
-                        spec2=c7,
+                        spec2=c6,
                         spec3=c2,
                         spec4=c3,
-                        spec5=c5,
-                        spec6=c4,
-                        spec7=c6
+                        spec5=c4,
+                        spec6=c5
                     )
                 )
         else:
             c1 = json.dumps(self.probability_distribution_chart())
             c2 = json.dumps(self.bayes_factor_chart())
             c3 = json.dumps(self.lambda_iteration_chart())
-            c4 = json.dumps(self.pi_iteration_chart())
 
             if self.log_likelihood_exists:
-                c5 = json.dumps(self.ll_iteration_chart())
+                c4 = json.dumps(self.ll_iteration_chart())
             else:
-                c5 = ""
+                c4 = ""
             
-            c6 = json.dumps(self.bayes_factor_history_charts())
-            c7 = json.dumps(self.gamma_distribution_chart())
+            c5 = json.dumps(self.bayes_factor_history_charts())
+            c6 = json.dumps(self.gamma_distribution_chart())
             
             with open(filename, "w") as f:
                 f.write(
@@ -586,12 +565,11 @@ def all_charts_write_html_file(self, filename="splink_charts.html", overwrite=Fa
                         vegalite_version="3.3.0",
                         vegaembed_version="4",
                         spec1=c1,
-                        spec2=c7,
+                        spec2=c6,
                         spec3=c2,
                         spec4=c3,
-                        spec5=c5,
-                        spec6=c4,
-                        spec7=c6
+                        spec5=c4,
+                        spec7=c5
                     )
                 )