# Confusion Matrixes Plotted as Sankey Diagrams

Because Sankey may cause less Confusion, for some.

In [1]:
from sklearn.metrics import confusion_matrix
import plotly.graph_objects as go

## Evaluating Classification

Generate a Sankey Diagram from a list of true labels and corresponding predicted labels. This is predictive classification evaluation data that would typically be rendered as a [Confusion Matrix](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html).

In [2]:
def confusion_sankey(y_true, y_pred, labels=[1, 0], descriptors=('True', 'Pred')):
  cm = confusion_matrix(y_true, y_pred, labels=labels)
  ids = range(len(labels))
  combns = [(s, t) for s in ids for t in ids]

  fig = go.Figure(data=[go.Sankey(
    node = dict(
      label = ['{} {}'.format(d, c) for d in descriptors for c in labels]
    ),
    link = dict(
      source = [c[0] for c in combns], 
      target = [len(ids) + c[1] for c in combns],
      value = [cm[c[0]][c[1]] for c in combns]
    )
  )])
  fig.update_layout(title_text='Flows {} to {}'.format(descriptors[0], descriptors[1]), font_size=12)
  return fig

**Binary Classification (Defaults) Example**

In [3]:
y_true_b = [1, 1, 1, 1, 0, 0, 0, 0]
y_pred_b = [1, 1, 0, 0, 1, 0, 0, 0]

confusion_sankey(y_true_b, y_pred_b).show()

**Multi-Class Classification (Customised) Example**

In [4]:
y_true_m = [1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2]
y_pred_m = [1, 1, 0, 0, 1, 0, 0, 0, 2, 2, 1, 0]

confusion_sankey(y_true_m,
                 y_pred_m,
                 labels=[0, 1, 2],
                 descriptors=('Champion', 'Challenger')).show()

# Comparing Multiple Classifications

This format is slightly more speculative, prompted primarily by the Sankey paradigm.

This might be useful if there is some conceptual sequence linking multiple classifiers, such as different versions of a classifier over time, or a parameter sweep preferring precision or recall.

In [5]:
def confusion_sankey_stack(ys, labels=[1, 0], descriptors=None):
  flows = range(len(ys) - 1)
  cms = [confusion_matrix(ys[i], ys[i + 1], labels=labels) for i in flows]
  ids = range(len(labels))
  combns = [(s, t) for s in ids for t in ids]
  if descriptors is None:
    descriptors = ['Model {}'.format(i) for i in range(len(ys))]

  fig = go.Figure(data=[go.Sankey(
    node = dict(
      label = ['{} class {}'.format(d, c) for d in descriptors for c in labels]
    ),
    link = dict(
      source = [len(ids) * i + c[0] for c in combns for i in flows], 
      target = [len(ids) * (i + 1) + c[1] for c in combns for i in flows],
      value = [cms[i][c[0]][c[1]] for c in combns for i in flows]
    )
  )])
  fig.update_layout(title_text='Flows {}'.format(' -> '.join(descriptors)), font_size=12)
  return fig

**Binary Classification (Defaults) Example**

In [6]:
ys_b = [[1, 1, 1, 1, 0, 0, 0, 0],
        [1, 1, 0, 0, 1, 0, 0, 0],
        [1, 1, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 1, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0]]

confusion_sankey_stack(ys_b).show()

**Multi-Class Classification (Customised) Example**

In [7]:
ys_m = [[1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2],
        [1, 1, 0, 0, 1, 0, 0, 0, 2, 2, 1, 0],
        [1, 1, 0, 0, 0, 0, 0, 0, 2, 2, 2, 1],
        [1, 0, 0, 0, 1, 0, 0, 0, 2, 2, 2, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 2]]

confusion_sankey_stack(ys_m,
                       labels=[0, 1, 2],
                       descriptors=['a', 'b', 'c', 'd', 'e']).show()