### 1. Initial Configuration

#### 1.1 Imports & Config

In [2]:
# data analysis
import numpy as np
import pandas as pd
import xlwings as xw

# plotly
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.offline as po
import plotly.figure_factory as ff

# colors
import colorlover as cl
from IPython.display import HTML

import os

In [3]:
%matplotlib inline

In [4]:
po.init_notebook_mode(connected=True)

#### 1.2 Code Examples

In [106]:
HTML(cl.to_html(cl.scales['10']['qual']['Paired']))

In [31]:
HTML(cl.to_html(cl.scales['5']['seq']['GnBu']))

#### 1.3 Global Functions

In [5]:
def strip_totals(df):
    cols = df.columns
    rows_to_remove = pd.Series().reindex_like(df)
    rows_to_remove.loc[:] = False

    for col in cols:
        try:
            rows_to_remove = (rows_to_remove | df[col].str.contains('Total'))
        except AttributeError:
            continue

    return df[-rows_to_remove]

### My Work

In [15]:
data = pd.read_csv(
    r'.\Healthcare Data\Health Expenditure by Age and Gender\age and gender.csv'
)

In [16]:
# Reshape
hlth_spend = data.melt(
    id_vars = data.columns[:4], 
    value_vars = data.columns[4:],
    var_name = 'Year',
    value_name = 'Spending(M)'
)

# Rename
hlth_spend.rename(
    columns = {
        col_nm: col_nm.replace(' ','') for col_nm in hlth_spend.columns
    }, 
    inplace=True
)

In [78]:
data = pd.read_csv(r'.\Healthcare Data\Health Expenditure by Age and Gender\age and gender.csv')

In [82]:
hlth_spend = data.melt(
    id_vars = data.columns[:4],
    value_vars = data.columns[4:],
    var_name = 'Year',
    value_name = 'Spend'
)

In [101]:
spend_by_service = strip_totals(hlth_spend).pivot_table(
    values = 'Spend',
    index = 'Service',
    columns = 'Year',
    aggfunc = np.sum
)
spend_by_service

Year,2002,2004,2006,2008,2010,2012
Service,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dental Services,73445,81751,91144,101939,105033,108855
Durable Medical Equipment,27130,30371,34412,37681,39924,43677
Home Health Care,36464,44639,52054,62321,71055,76874
Hospital Care,486480,565378,651218,727631,817617,902675
Nursing Care Facilities and Continuing Care Retirement Communities,94500,105436,116870,131518,140903,148308
Other Health Residential and Personal Care,76708,89343,101084,114521,129009,137937
Other Nondurable Medical Products,33293,38088,43732,49473,51245,53733
Other Professional Services,43341,49887,54540,63681,69894,77572
Physician and Clinical Services,338339,390435,435919,483745,516364,563043
Prescription Drugs,157917,192826,224074,241422,253035,259120


In [111]:
service_order = spend_by_service.iloc[:, -1].sort_values(ascending=False).index
service_order

Index(['Hospital Care', 'Physician and Clinical Services',
       'Prescription Drugs',
       'Nursing Care Facilities and Continuing Care Retirement Communities',
       'Other Health Residential and Personal Care', 'Dental Services',
       'Other Professional Services', 'Home Health Care',
       'Other Nondurable Medical Products', 'Durable Medical Equipment'],
      dtype='object', name='Service')

In [113]:
def stacked_line_plot(df, x, y_labels, color_scale, nbr_form='{:.1f}%', normalize=True):
    # Containers
    y_org = []
    y_cum = []
    y_txt = []
    traces = []
    
    # Normalized Data
    if normalize:
        df = (df / df.sum(axis=0))
    
    # Original Y Data
    for y in y_labels:
        y_org.append(df.loc[y])
    
    # Cumulative Y Data
    for y in y_org:
        if len(y_cum) == 0:
            y_cum.append(y)
        else:
            y_cum.append( y_cum[-1] + y ) 
 
    # Text Formatted Y Data
    for y in y_org:
        y_txt.append([nbr_form.format(num * 100) for num in y])

    # Build Traces
    trace_data = zip(y_cum, y_txt, color_scale, y_labels)

    for y, y_txt, color, name in trace_data:
        trace = go.Scatter(
            x = x,
            y = y,
            text = y_txt,
            hoverinfo = 'x+text',
            mode = 'lines',
            line = dict(
                width = 3,
                color = color
            ),
            fill = 'tonexty',
            name = name
        )
        traces.append(trace)

    fig = go.Figure(data=traces)
    po.plot(fig, filename='my-stacked-area-plot-hover')

In [114]:
stacked_line_plot(
    df = spend_by_service,
    x = spend_by_service.columns,
    y_labels = service_order,
    color_scale = cl.scales['10']['qual']['Paired']
)


Your filename `my-stacked-area-plot-hover` didn't end with .html. Adding .html to the end of your file.

