**NOTE:** The visualizations in this notebook will not work in JupyterLab. This needs to be run in the older version.

In [1]:
from ipysankeywidget import SankeyWidget
from ipywidgets import Layout

In [2]:
import pandas as pd

In [3]:
from floweaver import *

# Set the default size to fit this notebook and give us some space
size = dict(width = 1000, height = 500)


In [5]:
cs_ns_data = pd.read_csv("~/src/Growth-editorjourney-2018/cs_ns_edgelist.txt", sep = '\t',
                         names = ['source', 'target', 'value'])

In [6]:
cs_ns_data.loc[cs_ns_data.target == 'save survey']

Unnamed: 0,source,target,value
157,registration,save survey,2642


In [7]:
## Take pre-registration out of it
cs_ns_data = cs_ns_data.loc[(cs_ns_data.target != "registration") &
                            (cs_ns_data.source != 'left Wikipedia')]

In [8]:
cs_ns_data.loc[cs_ns_data.target == 'Step 3: Special']

Unnamed: 0,source,target,value
96,Step 2: Special,Step 3: Special,319
108,Step 2: Talk,Step 3: Special,7
134,Step 2: Help/Wikipedia,Step 3: Special,93
142,Step 2: Main page,Step 3: Special,374
160,Step 2: Other,Step 3: Special,2
178,Step 2: Article,Step 3: Special,299
195,Step 2: User,Step 3: Special,35


In [9]:
## Help the layout by explicitly defining the nodes and the ordering

step1_with_other = Partition.Simple('process', [
    'save survey', 'skip survey', 'abandon survey', 'Step 1: Special', 'Step 1: Article', 'Step 1: Main page',
    'left Wikipedia',
    ('Step 1: other', ['Step 1: Help/Wikipedia', 'Step 1: Other', 'Step 1: Talk', 'Step 1: User'])    
])
step2_with_other = Partition.Simple('process', [
    'Step 2: Main page', 'Step 2: Article', 'Step 2: Special', 'Step 2: Help/Wikipedia', 'left Wikipedia',
    ('Step 2: other', ['Step 2: Other', 'Step 2: User', 'Step 2: Talk'])
])
step3_with_other = Partition.Simple('process', [
    'Step 3: Main page', 'Step 3: Article', 'Step 3: Special', 'Step 3: Help/Wikipedia', 'left Wikipedia',
    ('Step 3: other', ['Step 3: Other', 'Step 3: User', 'Step 3: Talk'])
])
step4_with_other = Partition.Simple('process', [
    'Step 4: Main page', 'Step 4: Article', 'Step 4: Special', 'Step 4: Help/Wikipedia', 'left Wikipedia',
    ('Step 4: other', ['Step 4: Other', 'Step 4: User', 'Step 4: Talk'])
])
step5_with_other = Partition.Simple('process', [
    'Step 5: Main page', 'Step 5: Article', 'Step 5: Special', 'Step 5: Help/Wikipedia', 'left Wikipedia',
    ('Step 5: other', ['Step 5: Other', 'Step 5: User', 'Step 5: Talk'])
])

nodes = {
    'Registration': ProcessGroup(['registration']),
    'step1': ProcessGroup(['save survey', 'skip survey', 'abandon survey', 'Step 1: Article',
                            'Step 1: Help/Wikipedia', 'Step 1: Other', 'Step 1: Main page',
                            'Step 1: Special', 'Step 1: Talk', 'Step 1: User'],
                          partition = step1_with_other),
    'step2': ProcessGroup(['Step 2: Main page', 'Step 2: Article', 'Step 2: Special', 'Step 2: Help/Wikipedia',
                            'Step 2: Other', 'Step 2: User', 'Step 2: Talk'],
                          partition = step2_with_other),
    'step3': ProcessGroup(['Step 3: Main page', 'Step 3: Article', 'Step 3: Special', 'Step 3: Help/Wikipedia',
                            'Step 3: Other', 'Step 3: User', 'Step 3: Talk'],
                          partition = step3_with_other),
    'step4': ProcessGroup(['Step 4: Main page', 'Step 4: Article', 'Step 4: Special', 'Step 4: Help/Wikipedia',
                            'Step 4: Other', 'Step 4: User', 'Step 4: Talk'],
                          partition = step4_with_other),
    'step5': ProcessGroup(['Step 5: Main page', 'Step 5: Article', 'Step 5: Special', 'Step 5: Help/Wikipedia',
                            'Step 5: Other', 'Step 5: User', 'Step 5: Talk'],
                          partition = step5_with_other),}

ordering = [
    ['Registration'], # put registration on the left...
    ['step1'],
    ['step2'],
    ['step3']
]

bundles = [
    Bundle('Registration', 'step1'),
    Bundle('step1', 'step2'),
    Bundle('step2', 'step3')
]

In [10]:
## Define a "left Wikipedia" waypoint

nodes['left'] = Waypoint(title='left Wikipedia')

ordering = [
    ['Registration'], # put registration on the left...
    ['step1'],
    ['step2'],
    ['step3'],
    ['step4'],
    ['left', 'step5']
]

bundles = [
    Bundle('Registration', 'step1'),
    Bundle('step1', 'step2'),
    Bundle('step2', 'step3'),
    Bundle('step3', 'step4'),
    Bundle('step4', 'step5'),
    Bundle('Registration', Elsewhere, waypoints=['left']),
    Bundle('step1', Elsewhere, waypoints=['left']),
    Bundle('step2', Elsewhere, waypoints=['left']),
    Bundle('step3', Elsewhere, waypoints=['left']),
    Bundle('step4', Elsewhere, waypoints=['left'])
]

In [11]:
sdd = SankeyDefinition(nodes, bundles, ordering)
weave(sdd, cs_ns_data).to_widget(**size)

SankeyWidget(groups=[{'nodes': ['__Registration_left_1^*'], 'title': '', 'id': '__Registration_left_1', 'type'…

# Real diagrams

Below follows the code that's actually useful and works properly to create the coloured diagram used by the Growth Team.

In [12]:
cs_transitions = pd.read_csv('~/src/Growth-editorjourney-2018/cswiki_transitions.csv')

In [13]:
cs_transitions['value'] = cs_transitions['count']
cs_transitions['type'] = cs_transitions['action']

In [14]:
cs_transitions.loc[cs_transitions.source == 'save survey']

Unnamed: 0,source,target,action,count,value,type
1,save survey,Step 2: Wikipedia,view,305,305,view
9,save survey,Step 2: Special,view,340,340,view
12,save survey,Step 2: Article,edit,409,409,edit
25,save survey,Step 2: Article,view,825,825,view
32,save survey,Step 2: Main page,view,496,496,view
43,save survey,Step 2: User,view,45,45,view
87,save survey,Step 2: Help,view,109,109,view
96,save survey,Step 2: Article talk,edit,7,7,edit
146,save survey,Step 2: Wikipedia,edit,5,5,edit
181,save survey,Step 2: Other,view,8,8,view


Import the Korean Wikipedia transitions and update its data similarly.

In [15]:
ko_transitions = pd.read_csv('~/src/Growth-editorjourney-2018/kowiki_transitions.csv')

In [16]:
ko_transitions['value'] = ko_transitions['count']
ko_transitions['type'] = ko_transitions['action']

In [17]:
ko_transitions.loc[ko_transitions.source == 'save survey']

Unnamed: 0,source,target,action,count,value,type
1,save survey,Step 2: Wikipedia,view,245,245,view
6,save survey,Step 2: Other,view,12,12,view
10,save survey,Step 2: Special,view,331,331,view
13,save survey,Step 2: Article,view,827,827,view
17,save survey,Step 2: Article,edit,545,545,edit
24,save survey,Step 2: User talk,view,149,149,view
42,save survey,Step 2: Main page,view,612,612,view
47,save survey,Step 2: Wikipedia,edit,20,20,edit
105,save survey,Step 2: Article talk,view,4,4,view
142,save survey,Step 2: User,view,30,30,view


In [18]:
## Help the layout by explicitly defining the nodes and the ordering

step1 = Partition.Simple('process', ['save survey', 'skip survey', 'abandon survey'])

step2_with_other = Partition.Simple('process', [
    'Step 2: Main page', 'Step 2: Article', 'Step 2: Special', 'Step 2: Homepage',
    'left Wikipedia',
    ('Step 2: Help/Wikipedia', ['Step 2: Wikipedia', 'Step 2: Help']),
    ('Step 2: other', ['Step 2: Other', 'Step 2: Article talk', 'Step 2: User', 'Step 2: User talk',
                       'Step 2: Wikipedia talk', 'Step 2: Help talk'])
])

step3_with_other = Partition.Simple('process', [
    'Step 3: Main page', 'Step 3: Article', 'Step 3: Special', 'Step 3: Homepage',
    'left Wikipedia',
    ('Step 3: Help/Wikipedia', ['Step 3: Wikipedia', 'Step 3: Help']),
    ('Step 3: other', ['Step 3: Other', 'Step 3: Article talk', 'Step 3: User', 'Step 3: User talk',
                       'Step 3: Wikipedia talk', 'Step 3: Help talk'])
])

step4_with_other = Partition.Simple('process', [
    'Step 4: Main page', 'Step 4: Article', 'Step 4: Special', 'Step 4: Homepage',
    'left Wikipedia',
    ('Step 4: Help/Wikipedia', ['Step 4: Wikipedia', 'Step 4: Help']),
    ('Step 4: other', ['Step 4: Other', 'Step 4: Article talk', 'Step 4: User', 'Step 4: User talk',
                       'Step 4: Wikipedia talk', 'Step 4: Help talk'])
])

step5_with_other = Partition.Simple('process', [
    'Step 5: Main page', 'Step 5: Article', 'Step 5: Special', 'Step 5: Homepage',
    'left Wikipedia',
    ('Step 5: Help/Wikipedia', ['Step 5: Wikipedia', 'Step 5: Help']),
    ('Step 5: other', ['Step 5: Other', 'Step 5: Article talk', 'Step 5: User', 'Step 5: User talk',
                       'Step 5: Wikipedia talk', 'Step 5: Help talk'])
])

nodes = {
    'Registration': ProcessGroup(['registration']),
    'step1': ProcessGroup(['save survey', 'skip survey', 'abandon survey'], partition = step1),
    'step2': ProcessGroup(['Step 2: Main page', 'Step 2: Article', 'Step 2: Special', 'Step 2: Homepage',
                           'Step 2: Wikipedia', 'Step 2: Help', 'Step 2: Other', 'Step 2: Article talk',
                           'Step 2: User', 'Step 2: User talk', 'Step 2: Wikipedia talk', 'Step 2: Help talk'],
                          partition = step2_with_other),
    'step3': ProcessGroup(['Step 3: Main page', 'Step 3: Article', 'Step 3: Special', 'Step 3: Homepage',
                           'Step 3: Wikipedia', 'Step 3: Help', 'Step 3: Other', 'Step 3: Article talk',
                           'Step 3: User', 'Step 3: User talk', 'Step 3: Wikipedia talk', 'Step 3: Help talk'],
                          partition = step3_with_other),
    'step4': ProcessGroup(['Step 4: Main page', 'Step 4: Article', 'Step 4: Special', 'Step 4: Homepage',
                           'Step 4: Wikipedia', 'Step 4: Help', 'Step 4: Other', 'Step 4: Article talk',
                           'Step 4: User', 'Step 4: User talk', 'Step 4: Wikipedia talk', 'Step 4: Help talk'],
                          partition = step4_with_other),
    'step5': ProcessGroup(['Step 5: Main page', 'Step 5: Article', 'Step 5: Special', 'Step 5: Homepage',
                           'Step 5: Wikipedia', 'Step 5: Help', 'Step 5: Other', 'Step 5: Article talk',
                           'Step 5: User', 'Step 5: User talk', 'Step 5: Wikipedia talk', 'Step 5: Help talk'],
                          partition = step5_with_other)
}
                          
## Define a "left Wikipedia" waypoint
nodes['left'] = Waypoint(title='left Wikipedia')

## Define the order
ordering = [
    ['Registration'], # put registration on the left...
    ['step1'],
    ['step2'],
    ['step3'],
    ['step4'],
    ['left', 'step5']
]

## Define the bundles
bundles = [
    Bundle('Registration', 'step1'),
    Bundle('step1', 'step2'),
    Bundle('step2', 'step3'),
    Bundle('step3', 'step4'),
    Bundle('step4', 'step5'),
    Bundle('Registration', Elsewhere, waypoints=['left']),
    Bundle('step1', Elsewhere, waypoints=['left']),
    Bundle('step2', Elsewhere, waypoints=['left']),
    Bundle('step3', Elsewhere, waypoints=['left']),
    Bundle('step4', Elsewhere, waypoints=['left'])
]

In [19]:
## Make a partition on what type of action the user took

actions_by_type = Partition.Simple('type', ['view', 'edit', 'survey', 'left'])

# Set the colours for the labels in the partition.
palette = {'view': '#FF9933', 'edit': '#003399', 'survey': '#99CCCC', 'left': '#CCCCCC'}

In [20]:
sdd = SankeyDefinition(nodes, bundles, ordering, flow_partition=actions_by_type)
weave(sdd, cs_transitions, palette=palette).to_widget(**size)

SankeyWidget(groups=[{'nodes': ['step1^save survey', 'step1^skip survey', 'step1^abandon survey'], 'title': ''…

In [21]:
sdd = SankeyDefinition(nodes, bundles, ordering, flow_partition=actions_by_type)
weave(sdd, ko_transitions, palette=palette).to_widget(**size)

SankeyWidget(groups=[{'nodes': ['step1^save survey', 'step1^skip survey', 'step1^abandon survey'], 'title': ''…