In [1]:
import numpy as np
import pandas as pd

# Load some data
This data is derived from the VAST Challenge 2014 dataset. It contains some simulated patterns of life. From this data we know what activity each agent at each hour is doing.

In [2]:
data = pd.read_csv('./vast2014geo.csv', index_col=0, parse_dates=['Timestamp'])
data.head()

Unnamed: 0,FullName,Timestamp,name,id,time
0,"Alcazar, Lucas",2014-01-06 07:00:00,Hallowed Grounds,"Alcazar, Lucas@2014-01-06",7
1,"Alcazar, Lucas",2014-01-06 08:00:00,GAStech-Kronos,"Alcazar, Lucas@2014-01-06",8
2,"Alcazar, Lucas",2014-01-06 09:00:00,GAStech-Kronos,"Alcazar, Lucas@2014-01-06",9
3,"Alcazar, Lucas",2014-01-06 10:00:00,GAStech-Kronos,"Alcazar, Lucas@2014-01-06",10
4,"Alcazar, Lucas",2014-01-06 11:00:00,GAStech-Kronos,"Alcazar, Lucas@2014-01-06",11


# Filter down
Visualizing all this data at once with storylines wouldn't make sense, so we need to pick a subset. We can do this by picking an arbitrary person and a few dates. The code written below is flexible enough to handle multiple people too.

In [3]:
people = ['Alcazar, Lucas']
dates=['2014-01-9', '2014-01-10', '2014-01-11']

people_mask = data.FullName.apply(set(people).__contains__)
date_mask = np.any([data.Timestamp.dt.date == d for d in map(pd.Timestamp, dates)], axis=0)

data_subset = data[people_mask & date_mask]

data_subset.head()

Unnamed: 0,FullName,Timestamp,name,id,time
65,"Alcazar, Lucas",2014-01-09 00:00:00,Lucas Alcazar home,"Alcazar, Lucas@2014-01-09",0
66,"Alcazar, Lucas",2014-01-09 01:00:00,Lucas Alcazar home,"Alcazar, Lucas@2014-01-09",1
67,"Alcazar, Lucas",2014-01-09 02:00:00,Lucas Alcazar home,"Alcazar, Lucas@2014-01-09",2
68,"Alcazar, Lucas",2014-01-09 03:00:00,Lucas Alcazar home,"Alcazar, Lucas@2014-01-09",3
69,"Alcazar, Lucas",2014-01-09 04:00:00,Lucas Alcazar home,"Alcazar, Lucas@2014-01-09",4


# A Basic Storyline
The storyline visualization requires 1 parameter, **data** which must be a pandas DataFrame with the following 3 columns:
* id -- a variable uniquely identifying each storyline
* group -- the 'state' of each storyline over time (the y-axis)
* time -- the timestep of each storyline (the x-axis)

The interface doesn't require that these column names exist. If you have different column names, you can use the pamameters, id, group, and name, to specify the column names for these without renaming your DataFrame.

In [4]:
from sven import StorylineChart

StorylineChart(
    data=data_subset,
    group='name', # here we tell the widget to use the 'name' column to identify groups
)

StorylineChart(component='StorylineChart', props={'data': [{'FullName': 'Alcazar, Lucas', 'Timestamp': Timesta…

# Addidtional Customization
The widget allows for aditional customization of storyline labels and colors. We'll use a more complex example to motivate this. Suppose we want to focus on one person and compare actual behavior to their typical behavior. We'd want to change the line colors to accentuate the typical behavior, and we don't need all the repeated names in the labels.

In [5]:
person = 'Alcazar, Lucas'
dates=['2014-01-9', '2014-01-10', '2014-01-11']

data_subset = data[data.FullName == person]

typical = data_subset.groupby('time').name\
    .apply(lambda ser: ser.value_counts().idxmax())\
    .to_frame()\
    .assign(id=f'{person}@Typical')\
    .reset_index()

date_mask = np.any([data_subset.Timestamp.dt.date == d for d in map(pd.Timestamp, dates)], axis=0)
data_subset = pd.concat((data_subset[date_mask], typical), axis=0).dropna(axis=1)
data_subset

Unnamed: 0,name,id,time
65,Lucas Alcazar home,"Alcazar, Lucas@2014-01-09",0
66,Lucas Alcazar home,"Alcazar, Lucas@2014-01-09",1
67,Lucas Alcazar home,"Alcazar, Lucas@2014-01-09",2
68,Lucas Alcazar home,"Alcazar, Lucas@2014-01-09",3
69,Lucas Alcazar home,"Alcazar, Lucas@2014-01-09",4
...,...,...,...
19,Ouzeri Elian,"Alcazar, Lucas@Typical",19
20,Ouzeri Elian,"Alcazar, Lucas@Typical",20
21,Lucas Alcazar home,"Alcazar, Lucas@Typical",21
22,Lucas Alcazar home,"Alcazar, Lucas@Typical",22


In [6]:
lines = data_subset.id.unique()

# set the color of the typical line to red and everything else black
color = {
    v: 'red' if 'Typical' in v else 'black'
    for v in lines
}

# for the label, just show the date because they're all about one person
lineLabel = {
    v: v.split('@')[-1]
    for v in lines
}

StorylineChart(
    data=data_subset,
    group='name',
    # additional customization below
    color=color,
    lineLabel=lineLabel
)

StorylineChart(component='StorylineChart', props={'data': [{'name': 'Lucas Alcazar home', 'id': 'Alcazar, Luca…