In [1]:
import sys
sys.path.insert(1, '..')

import numpy as np
from pandas import isnull
from matplotlib import cm
from matplotlib.colors import to_hex
import seaborn as sns

from drilldown import Renderer, Page, Header, Table, Cell, PageLinkCell

In [2]:
renderer = Renderer('titanic.xlsx')

data = sns.load_dataset('titanic')
data.head(3)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True


In [3]:
# Make new cell class with complex behaviour: 
# e.g. one which counts its color and formats its value.
class ColoredCell(Cell):
    def __init__(self, value, max_value=1.0, cmap=cm.viridis, fmt='%.2f'):
        self._value = value
        self._max_value = max_value
        self._cmap = cmap
        self._fmt = fmt
        
    def _get_string(self):
        if isnull(self._value):
            return ''
        return self._fmt % self._value
        
    def _get_color(self):
        if isnull(self._value):
            return
        color_coord = max(0, min(1-1e-9, self._value / self._max_value))
        return to_hex(self._cmap(color_coord))

In [4]:
# Preprocess data: assign age bins.
bins = np.array([0, 5, 10, 15, 20, 25, 30, 
                 35, 40, 45, 50, 55, 60, 65, 70])
bin_keys = ['?'] + [str(b) for b in bins[1:]] + ['>70']

def quantize_age(age):
    if isnull(age):
        return bin_keys[0]
    mask = age <= bins
    if any(mask):
        return bin_keys[mask.nonzero()[0].min()]
    return bin_keys[-1]

data['age_str'] = data.age.apply(quantize_age)

In [5]:
# Assemble the frame with ratios of survivors.
frame = data.pivot_table(index=['pclass', 'sex'], 
                         columns=['age_str'], 
                         values='survived', 
                         aggfunc=np.mean)
# Sort columns.
frame = frame[bin_keys]
# Make colored cells.
frame = frame.applymap(lambda x: ColoredCell(100*x, 
                                             max_value=100, 
                                             fmt='%i%%'))

# Make links out of the first level of indices.
frame.reset_index(inplace=True)
frame['pclass'] = [PageLinkCell(str(x), page_name=f"pclass_{x}") 
                   for x in frame.pclass.values]
frame.set_index(['pclass', 'sex'], inplace=True)

frame

Unnamed: 0_level_0,age_str,?,5,10,15,20,25,30,35,40,45,50,55,60,65,70,>70
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1,female,100%,0%,,100%,100%,92%,100%,100%,100%,100%,85%,100%,100%,100%,,
1,male,23%,100%,,100%,25%,42%,50%,66%,43%,37%,37%,28%,28%,0%,0%,33%
2,female,100%,100%,100%,100%,100%,92%,86%,100%,85%,83%,100%,100%,0%,,,
2,male,22%,100%,100%,,10%,0%,0%,17%,0%,20%,0%,0%,0%,100%,0%,
3,female,59%,73%,0%,62%,47%,50%,50%,50%,33%,0%,0%,,,100%,,
3,male,9%,38%,33%,16%,12%,9%,20%,18%,5%,12%,0%,0%,0%,0%,,0%


In [6]:
# Make top page
top_page = Page(
    'index',
    Header('Titanic dataset exploration', 
           'Percent of suvivors by categories.'),
    Table(frame, 
          column_widths=[10, 20] + [8] * len(frame.columns),
          group_level=0))
renderer.add_page(top_page)

In [7]:
# Make other pages
for pclass in frame.index.unique(level=0):
    pclass_frame = (data.query(f'pclass=="{pclass}"')
                    .set_index(['embark_town', 'who', 'alive'])
                    .sort_index())
    page = Page(f'pclass_{pclass}',
                Header(f'Passengers with class {pclass} tickets', ''),
                Table(pclass_frame,
                      group_level=1,
                      column_widths=([15, 7, 7] 
                                     + [12] * len(pclass_frame.columns)),
                      hidden_columns=[4, 11]),
                parent=top_page)
    renderer.add_page(page)

In [8]:
renderer.render_pages()

  warn("Can't merge single cell")
