# Parallel Categories Diagram in Python

Visualization of multi-dimensional categorical data sets

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from ipywidgets import widgets

## Basic Parallel Category Diagram with plotly.express

In [2]:
df = px.data.tips()
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [3]:
fig = px.parallel_categories(df)
fig.show()

## Style Diagram

In [4]:
df = px.data.tips()
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [5]:
fig = px.parallel_categories(df, 
                             dimensions=['sex', 'smoker', 'day'],
                             color="size", 
                             color_continuous_scale=px.colors.sequential.Inferno,
                             labels={'sex':'Payer sex', 'smoker':'Smokers at the table', 'day':'Day of week'})
fig.show()

## Basic Parallel Categories Diagram with graph_objects

In [6]:
fig = go.Figure(go.Parcats(
    dimensions=[
        {'label': 'Hair', 'values': ['Black', 'Black', 'Black', 'Brown', 'Brown', 'Brown', 'Red', 'Brown']},
        {'label': 'Eye', 'values': ['Brown', 'Brown', 'Brown', 'Brown', 'Brown', 'Blue', 'Blue', 'Blue']},
        {'label': 'Sex', 'values': ['Female', 'Female', 'Female', 'Male', 'Female', 'Male', 'Male', 'Male']}]
))

fig.show()

## Basic Parallel Categories Diagram with Counts

In [7]:
fig = go.Figure(go.Parcats(
    dimensions=[
        {'label': 'Hair', 'values': ['Black', 'Brown', 'Brown', 'Brown', 'Red']},
        {'label': 'Eye', 'values': ['Brown', 'Brown', 'Brown', 'Blue', 'Blue']},
        {'label': 'Sex', 'values': ['Female', 'Male', 'Female', 'Male', 'Male']}],
    counts=[6, 10, 40, 23, 7]
))


fig.show()

## Multi-Color Parallel Categories Diagram

In [8]:
titanic_df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/titanic.csv")
titanic_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [9]:
# Create dimensions
class_dim = go.parcats.Dimension(
    values=titanic_df.Pclass,
    categoryorder='category ascending', label="Class"
)
class_dim

parcats.Dimension({
    'categoryorder': 'category ascending',
    'label': 'Class',
    'values': array([3, 1, 3, 1, 3, 3, 1, 3, 3, 2, 3, 1, 3, 3, 3, 2, 3, 2, 3, 3, 2, 2, 3, 1,
                     3, 3, 3, 1, 3, 3, 1, 1, 3, 2, 1, 1, 3, 3, 3, 3, 3, 2, 3, 2, 3, 3, 3, 3,
                     3, 3, 3, 3, 1, 2, 1, 1, 2, 3, 2, 3, 3, 1, 1, 3, 1, 3, 2, 3, 3, 3, 2, 3,
                     2, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 1, 2, 3, 3, 3, 1, 3, 3, 3, 1, 3, 3, 3,
                     1, 1, 2, 2, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 2, 1, 3,
                     2, 3, 2, 2, 1, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 1, 1, 3, 1, 3, 3, 3, 3,
                     2, 2, 3, 3, 2, 2, 2, 1, 3, 3, 3, 1], dtype=int64)
})

In [10]:
gender_dim = go.parcats.Dimension(values=titanic_df.Sex, label="Gender")
gender_dim

parcats.Dimension({
    'label': 'Gender',
    'values': array(['male', 'female', 'female', 'female', 'male', 'male', 'male', 'male',
                     'female', 'female', 'female', 'female', 'male', 'male', 'female',
                     'female', 'male', 'male', 'female', 'female', 'male', 'male', 'female',
                     'male', 'female', 'female', 'male', 'male', 'female', 'male', 'male',
                     'female', 'female', 'male', 'male', 'male', 'male', 'male', 'female',
                     'female', 'female', 'female', 'male', 'female', 'female', 'male',
                     'male', 'female', 'male', 'female', 'male', 'male', 'female', 'female',
                     'male', 'male', 'female', 'male', 'female', 'male', 'male', 'female',
                     'male', 'male', 'male', 'male', 'female', 'male', 'female', 'male',
                     'male', 'female', 'male', 'male', 'male', 'male', 'male', 'male',
                     'male', 'female', 'male', 'male', 'f

In [11]:
survival_dim = go.parcats.Dimension(
    values=titanic_df.Survived, label="Outcome", categoryarray=[0, 1],
    ticktext=['perished', 'survived']
)
survival_dim

parcats.Dimension({
    'categoryarray': [0, 1],
    'label': 'Outcome',
    'ticktext': [perished, survived],
    'values': array([0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1,
                     0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1,
                     0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0,
                     0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
                     0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                     0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0,
                     0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0], dtype=int64)
})

In [12]:
# Create parcats trace
color = titanic_df.Survived;
color

0      0
1      1
2      1
3      1
4      0
      ..
151    1
152    0
153    0
154    0
155    0
Name: Survived, Length: 156, dtype: int64

In [13]:
colorscale = [[0, 'lightsteelblue'], [1, 'mediumseagreen']];
colorscale

[[0, 'lightsteelblue'], [1, 'mediumseagreen']]

In [14]:
fig = go.Figure(data = [go.Parcats(dimensions=[class_dim, gender_dim, survival_dim],
        line={'color': color, 'colorscale': colorscale},
        hoveron='color', hoverinfo='count+probability',
        labelfont={'size': 18, 'family': 'Times'},
        tickfont={'size': 16, 'family': 'Times'},
        arrangement='freeform')])

fig.show()

## Parallel Categories Linked Brushing

In [None]:
cars_df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/imports-85.csv')
cars_df.head()

Unnamed: 0,symboling,normalized-losses,make,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
0,3,,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111.0,5000.0,21,27,13495.0
1,3,,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111.0,5000.0,21,27,16500.0
2,1,,alfa-romero,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154.0,5000.0,19,26,16500.0
3,2,164.0,audi,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102.0,5500.0,24,30,13950.0
4,2,164.0,audi,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115.0,5500.0,18,22,17450.0


In [None]:
# Build parcats dimensions
categorical_dimensions = ['body-style', 'drive-wheels', 'fuel-type'];
categorical_dimensions

['body-style', 'drive-wheels', 'fuel-type']

In [None]:
dimensions = [dict(values=cars_df[label], label=label) for label in categorical_dimensions]
dimensions

[{'values': 0      convertible
  1      convertible
  2        hatchback
  3            sedan
  4            sedan
            ...     
  200          sedan
  201          sedan
  202          sedan
  203          sedan
  204          sedan
  Name: body-style, Length: 205, dtype: object,
  'label': 'body-style'},
 {'values': 0      rwd
  1      rwd
  2      rwd
  3      fwd
  4      4wd
        ... 
  200    rwd
  201    rwd
  202    rwd
  203    rwd
  204    rwd
  Name: drive-wheels, Length: 205, dtype: object,
  'label': 'drive-wheels'},
 {'values': 0         gas
  1         gas
  2         gas
  3         gas
  4         gas
          ...  
  200       gas
  201       gas
  202       gas
  203    diesel
  204       gas
  Name: fuel-type, Length: 205, dtype: object,
  'label': 'fuel-type'}]

In [None]:
# Build colorscale
color = np.zeros(len(cars_df), dtype='uint8')
color

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0], dtype=uint8)

In [None]:
colorscale = [[0, 'gray'], [1, 'firebrick']]
colorscale

[[0, 'gray'], [1, 'firebrick']]

In [None]:
# Build figure as FigureWidget
fig = go.FigureWidget(
    data=[go.Scatter(x=cars_df.horsepower, y=cars_df['highway-mpg'],
    marker={'color': 'gray'}, mode='markers', selected={'marker': {'color': 'firebrick'}},
    unselected={'marker': {'opacity': 0.3}}), go.Parcats(
        domain={'y': [0, 0.4]}, dimensions=dimensions,
        line={'colorscale': colorscale, 'cmin': 0,
              'cmax': 1, 'color': color, 'shape': 'hspline'})
    ])

fig.update_layout(
        height=800, xaxis={'title': 'Horsepower'},
        yaxis={'title': 'MPG', 'domain': [0.6, 1]},
        dragmode='lasso', hovermode='closest')


# Update color callback
def update_color(trace, points, state):
    # Update scatter selection
    fig.data[0].selectedpoints = points.point_inds

    # Update parcats colors
    new_color = np.zeros(len(cars_df), dtype='uint8')
    new_color[points.point_inds] = 1
    fig.data[1].line.color = new_color

# Register callback on scatter selection...
fig.data[0].on_selection(update_color)
# and parcats click
fig.data[1].on_click(update_color)

fig

FigureWidget({
    'data': [{'marker': {'color': 'gray'},
              'mode': 'markers',
              'selected': {'marker': {'color': 'firebrick'}},
              'type': 'scatter',
              'uid': 'e2d97bcc-b1aa-466f-9c50-268beb1eb8d5',
              'unselected': {'marker': {'opacity': 0.3}},
              'x': array([111., 111., 154., ..., 134., 106., 114.]),
              'y': array([27, 27, 26, ..., 23, 27, 25], dtype=int64)},
             {'dimensions': [{'label': 'body-style',
                              'values': array(['convertible', 'convertible', 'hatchback', ..., 'sedan', 'sedan',
                                               'sedan'], dtype=object)},
                             {'label': 'drive-wheels',
                              'values': array(['rwd', 'rwd', 'rwd', ..., 'rwd', 'rwd', 'rwd'], dtype=object)},
                             {'label': 'fuel-type',
                              'values': array(['gas', 'gas', 'gas', ..., 'gas', 'diesel', 'gas'], 

## Parallel Categories with Multi-Color Linked Brushing

In [15]:
cars_df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/imports-85.csv')
cars_df.head()

Unnamed: 0,symboling,normalized-losses,make,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
0,3,,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111.0,5000.0,21,27,13495.0
1,3,,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111.0,5000.0,21,27,16500.0
2,1,,alfa-romero,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154.0,5000.0,19,26,16500.0
3,2,164.0,audi,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102.0,5500.0,24,30,13950.0
4,2,164.0,audi,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115.0,5500.0,18,22,17450.0


In [16]:
# Build parcats dimensions
categorical_dimensions = ['body-style', 'drive-wheels', 'fuel-type']
categorical_dimensions

['body-style', 'drive-wheels', 'fuel-type']

In [17]:
dimensions = [dict(values=cars_df[label], label=label) for label in categorical_dimensions]
dimensions

[{'values': 0      convertible
  1      convertible
  2        hatchback
  3            sedan
  4            sedan
            ...     
  200          sedan
  201          sedan
  202          sedan
  203          sedan
  204          sedan
  Name: body-style, Length: 205, dtype: object,
  'label': 'body-style'},
 {'values': 0      rwd
  1      rwd
  2      rwd
  3      fwd
  4      4wd
        ... 
  200    rwd
  201    rwd
  202    rwd
  203    rwd
  204    rwd
  Name: drive-wheels, Length: 205, dtype: object,
  'label': 'drive-wheels'},
 {'values': 0         gas
  1         gas
  2         gas
  3         gas
  4         gas
          ...  
  200       gas
  201       gas
  202       gas
  203    diesel
  204       gas
  Name: fuel-type, Length: 205, dtype: object,
  'label': 'fuel-type'}]

In [18]:
# Build colorscale
color = np.zeros(len(cars_df), dtype='uint8')
color

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0], dtype=uint8)

In [19]:
colorscale = [[0, 'gray'], [0.33, 'gray'],
              [0.33, 'firebrick'], [0.66, 'firebrick'],
              [0.66, 'blue'], [1.0, 'blue']]
colorscale

[[0, 'gray'],
 [0.33, 'gray'],
 [0.33, 'firebrick'],
 [0.66, 'firebrick'],
 [0.66, 'blue'],
 [1.0, 'blue']]

In [20]:
cmin = -0.5
cmax = 2.5

In [21]:
# Build figure as FigureWidget
fig = go.FigureWidget(
    data=[go.Scatter(x=cars_df.horsepower, y=cars_df['highway-mpg'],
                marker={'color': color, 'cmin': cmin, 'cmax': cmax,
                        'colorscale': colorscale, 'showscale': True,
                        'colorbar': {'tickvals': [0, 1, 2], 'ticktext': ['None', 'Red', 'Blue']}},
                     mode='markers'),

      go.Parcats(domain={'y': [0, 0.4]}, dimensions=dimensions,
                   line={'colorscale': colorscale, 'cmin': cmin,
                   'cmax': cmax, 'color': color, 'shape': 'hspline'})]
)

fig.update_layout(height=800, xaxis={'title': 'Horsepower'},
                  yaxis={'title': 'MPG', 'domain': [0.6, 1]},
                  dragmode='lasso', hovermode='closest')

# Build color selection widget
color_toggle = widgets.ToggleButtons(
    options=['None', 'Red', 'Blue'],
    index=1, description='Brush Color:', disabled=False)

# Update color callback
def update_color(trace, points, state):
    # Compute new color array
    new_color = np.array(fig.data[0].marker.color)
    new_color[points.point_inds] = color_toggle.index

    with fig.batch_update():
        # Update scatter color
        fig.data[0].marker.color = new_color

        # Update parcats colors
        fig.data[1].line.color = new_color

# Register callback on scatter selection...
fig.data[0].on_selection(update_color)
# and parcats click
fig.data[1].on_click(update_color)

# Display figure
widgets.VBox([color_toggle, fig])

VBox(children=(ToggleButtons(description='Brush Color:', index=1, options=('None', 'Red', 'Blue'), value='Red'…

In [22]:
print('ok_')

ok_
