# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [1]:
import pandas as pd
import plotly.offline as py;
import cufflinks as cf
from ipywidgets import interact
from plotly import tools
import plotly.graph_objs as go

cf.go_offline()

In [2]:
data = pd.read_excel('../data/Online Retail.xlsx')

## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [3]:
# Transform the data
mask = (data['InvoiceDate'] >= '2011-04-01') & (data['InvoiceDate'] < '2011-05-01') & (data['Country'] != 'United Kingdom')

april = data[mask]

total_qty_rvn = april.groupby('Country').sum()[['Quantity', 'Revenue']]
total_qty_rvn.head()

Unnamed: 0_level_0,Quantity,Revenue
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Australia,224,421.6
Austria,308,584.78
Belgium,1170,1788.48
Brazil,356,1143.6
Channel Islands,96,243.0


In [4]:
total_qty_rvn.iplot(kind='bar')

In [5]:
# Method 2

# Traces
traces = [go.Bar(x=total_qty_rvn.index, y=total_qty_rvn[column], name=column) for column in total_qty_rvn.columns];

# Layout
layout = go.Layout(barmode='group', width=1000, height=500,
                   xaxis=go.layout.XAxis(title=go.layout.xaxis.Title(text='Country')),
                   yaxis=go.layout.YAxis(title=go.layout.yaxis.Title(text='Total')),
                   title=go.layout.Title(text='Total Quantity and Revenue per Country'));

# Plot
py.iplot(go.Figure(data=traces, layout=layout));

## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [6]:
# Transform the data
mask = (data['InvoiceDate'] >= '2011-01-01') & (data['InvoiceDate'] <= '2011-05-31') & (data['Country'] == 'France')

france_5m_2011 = data[mask]

france_5m_2011['InvoiceDate'] = france_5m_2011['InvoiceDate'].apply(lambda row: row.strftime("%m/%d"))

france_qty_rvn = france_5m_2011.groupby('InvoiceDate').sum()[['Quantity', 'Revenue']]

france_qty_rvn.head()

Unnamed: 0_level_0,Quantity,Revenue
InvoiceDate,Unnamed: 1_level_1,Unnamed: 2_level_1
01/05,728,1265.18
01/06,438,709.02
01/07,591,975.09
01/09,78,114.0
01/10,628,1112.06


In [7]:
france_qty_rvn.iplot(kind = 'line')

In [8]:
# Method 3

# Traces
traces = [go.Scatter(x=france_qty_rvn.index, y=france_qty_rvn[column], name=column) 
          for column in france_qty_rvn.columns];

# Layout
layout = go.Layout(width=1000, height=500,
                   xaxis=go.layout.XAxis(title=go.layout.xaxis.Title(text='Date')),
                   yaxis=go.layout.YAxis(title=go.layout.yaxis.Title(text='Total')),
                   title=go.layout.Title(text='Total Quantity and Revenue in France Over Time (Jan-May 2011)'));

# Plot
py.iplot(go.Figure(data=traces, layout=layout));

## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [9]:
# Transform the data
mask = (data['Description'] == 'PARTY BUNTING')

party_bunting = data[mask]

pb_country_qty_up = party_bunting.groupby('Country').mean()[['Quantity', 'UnitPrice']].reset_index()

pb_country_qty_up.head()

Unnamed: 0,Country,Quantity,UnitPrice
0,Australia,33.125,4.7125
1,Austria,8.0,4.95
2,Belgium,4.0,4.95
3,Channel Islands,13.333333,4.95
4,Cyprus,2.333333,4.75


In [10]:
pb_country_qty_up.iplot(kind = 'scatter', x = 'Quantity', y = 'UnitPrice', categories = 'Country')


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead



In [11]:
# Method 2

#without reset_index
pb_country_qty_up = party_bunting.groupby('Country').mean()[['Quantity', 'UnitPrice']]

# Traces
traces = [go.Scatter(x=[pb_country_qty_up['Quantity'][country]], 
                     y=[pb_country_qty_up['UnitPrice'][country]], 
                     mode='markers',
                     name=country)
          for country in pb_country_qty_up.index];

# Layout 
layout = go.Layout(width=700, height=600,
                   xaxis=go.layout.XAxis(title=go.layout.xaxis.Title(text='Average Quantity')),
                   yaxis=go.layout.YAxis(title=go.layout.yaxis.Title(text='Average Unit Price')),
                   title=go.layout.Title(text='Average Quantity VS Average Unit Price'));
# Plot
py.iplot(go.Figure(data=traces, layout=layout));

## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [12]:
# Transform the data
countries = ['EIRE', 'Germany', 'France', 'Netherlands']

mask = data['Country'].isin(countries)

qty_invc = data[mask]


qty_invc = qty_invc[['InvoiceNo', 'Quantity', 'Country']].pivot_table(index='InvoiceNo', 
                                                                      columns='Country', 
                                                                      values='Quantity', 
                                                                      aggfunc='sum');

qty_invc.head()

Country,EIRE,France,Germany,Netherlands
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
536370,,446.0,,
536403,,,,96.0
536527,,,156.0,
536540,230.0,,,
536541,12.0,,,


In [13]:
fig = tools.make_subplots(2, 2, subplot_titles=tuple(countries))

fig.add_histogram(x = qty_invc['EIRE'], row = 1, col= 1, name = 'EIRE')
fig.add_histogram(x = qty_invc['Germany'], row = 1, col= 2, name = 'Germany')
fig.add_histogram(x = qty_invc['France'], row = 2, col= 1, name = 'Frande')
fig.add_histogram(x = qty_invc['Netherlands'], row = 2, col= 2, name = 'Netherlands')
py.iplot(fig);

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]
[ (2,1) x3,y3 ]  [ (2,2) x4,y4 ]



In [14]:
# Method 2

# Traces
traces = [go.Histogram(x=qty_invc[country], name=country) for country in qty_invc.columns];

# Subplots
fig = tools.make_subplots(rows=2, cols=2, subplot_titles=tuple(countries), print_grid=False);
subplots = pd.MultiIndex.from_product([range(1,3), range(1,3)]);
for trace, subplot in zip(traces, subplots):
    fig.append_trace(trace, subplot[0], subplot[1]);

# Layout
fig['layout'].update(width=1000, height=600, title='Histogram (Quantity per Invoice)');

# Plot
py.iplot(fig);

## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [15]:
product_list = ['JUMBO BAG RED RETROSPOT', 
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']

country_list = ['EIRE', 'Germany', 'France', 'Netherlands']

In [16]:
# Transform the data
mask = data['Country'].isin(country_list) & data['Description'].isin(product_list)

rvn_country = data[mask]

rvn_country = rvn_country.pivot_table(index='Country', columns='Description', values='Revenue', aggfunc='sum')

rvn_country

Description,CREAM HANGING HEART T-LIGHT HOLDER,JUMBO BAG RED RETROSPOT,REGENCY CAKESTAND 3 TIER
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
EIRE,2740.8,278.72,7388.55
France,131.75,903.37,2816.85
Germany,35.4,1072.76,9061.95
Netherlands,1167.0,3468.0,3166.35


In [17]:
rvn_country.iplot(kind = 'bar')

In [18]:
# another way

# Traces
traces = [go.Bar(x=rvn_country.index, y=rvn_country[column], name=column) for column in rvn_country.columns];

# Layout
layout = go.Layout(barmode='group', width=1000, height=500,
                   xaxis=go.layout.XAxis(title=go.layout.xaxis.Title(text='Country')),
                   yaxis=go.layout.YAxis(title=go.layout.yaxis.Title(text='Total Revenue')),
                   title=go.layout.Title(text='Total Revenue per Country for given products'));

# Plot
py.iplot(go.Figure(data=traces, layout=layout));

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [19]:
data['Year'] = pd.DatetimeIndex(data['InvoiceDate']).year
data['Month'] = pd.DatetimeIndex(data['InvoiceDate']).month
data['Day'] = pd.DatetimeIndex(data['InvoiceDate']).day
uk = data[data['Country']=='United Kingdom']

In [20]:
# Variables
years = uk['Year'].unique()
months = uk['Month'].unique()
months.sort()

In [21]:
# Interactive menu
@interact(year=years, 
          month=months)

# Update function
def linechart(year=2011, month=4):
    # Transform the data
    mask = (uk['Year'] == year) & (uk['Month'] == month)
    
    sales_date = uk[mask]
    
    sales_day = sales_date.groupby('Day').sum()['Quantity'];
    
    #plot
    sales_day.iplot()

interactive(children=(Dropdown(description='year', index=1, options=(2010, 2011), value=2011), Dropdown(descri…

In [22]:
# another way

# Interactive menu
@interact(year=years, 
          month=months)

# Update function
def linechart(year=2011, month=4):
    # Transform the data
    sales_date = uk[(uk['Year']==year) & (uk['Month']==month)];
    sales_day = sales_date.groupby('Day').sum()['Quantity'];
    # Traces
    traces = [go.Scatter(x=sales_day.index, y=sales_day)];
    # Layout
    layout = go.Layout(width=1000, height=500,
                       xaxis=go.layout.XAxis(title=go.layout.xaxis.Title(text='Day')),
                       yaxis=go.layout.YAxis(title=go.layout.yaxis.Title(text='Quantity')),
                       title=go.layout.Title(text=f'Total Quantity sold in UK ({month}-{year})'));
    # Plot
    py.iplot(go.Figure(data=traces, layout=layout));

interactive(children=(Dropdown(description='year', index=1, options=(2010, 2011), value=2011), Dropdown(descri…

## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [23]:
agg_func = {'InvoiceNo':'nunique',
            'Quantity':'sum',
            'UnitPrice':'mean',
            'Revenue':'sum',
            'CustomerID':'nunique'}

products = uk.groupby('Description').agg(agg_func)

In [24]:
products.head()

Unnamed: 0_level_0,InvoiceNo,Quantity,UnitPrice,Revenue,CustomerID
Description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4 PURPLE FLOCK DINNER CANDLES,35,134,2.318421,255.46,30
50'S CHRISTMAS GIFT BAG LARGE,100,1721,1.2479,2067.25,98
DOLLY GIRL BEAKER,100,661,1.25,826.25,77
I LOVE LONDON MINI BACKPACK,55,181,4.15,751.15,46
NINE DRAWER OFFICE TIDY,25,44,14.761538,628.4,24


In [25]:
# Interactive menu
@interact(invoices=(products['InvoiceNo'].min(), products['InvoiceNo'].max(), 10), 
          customers=(products['CustomerID'].min(), products['CustomerID'].max(), 10))



# Update function
def scatter(invoices=1, customers=1):
    # Transform the data
    filtered = products[(products['CustomerID'] > customers) & 
                        (products['InvoiceNo'] > invoices)].reset_index()
    
    # Plot
    filtered.iplot(kind = 'scatter', x = 'InvoiceNo', y = 'CustomerID', mode = 'markers')

interactive(children=(IntSlider(value=1, description='invoices', max=1891, min=1, step=10), IntSlider(value=1,…

In [26]:
# another way



# Interactive menu
@interact(invoices=(products['InvoiceNo'].min(), products['InvoiceNo'].max(), 10), 
          customers=(products['CustomerID'].min(), products['CustomerID'].max(), 10))

# Update function
def scatter(invoices=1, customers=1):
    # Transform the data
    filtered = products[(products['CustomerID'] > customers) & 
                  (products['InvoiceNo'] > invoices)]
    # Traces
    traces = [go.Scatter(x=filtered['InvoiceNo'], 
                         y=filtered['CustomerID'], 
                         mode='markers')]
    # Layout 
    layout = go.Layout(width=700, height=600,
                       xaxis=go.layout.XAxis(title=go.layout.xaxis.Title(text='#Invoices')),
                       yaxis=go.layout.YAxis(title=go.layout.yaxis.Title(text='#Customers')),
                       title=go.layout.Title(text='Number of Invoices VS Number of Customers'));
    # Plot
    py.iplot(go.Figure(data=traces, layout=layout));

interactive(children=(IntSlider(value=1, description='invoices', max=1891, min=1, step=10), IntSlider(value=1,…

## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.

In [27]:
# Interactive menu
@interact(product='')

# Update function
def chart(product):
    # Transform the data
    mask = data['Description'].str.contains(product.upper())
    
    filtered = data[mask]
    
    prdct_rvn = filtered.groupby('Description')['Revenue'].sum()

    prdct_rvn.iplot(kind = 'bar')

interactive(children=(Text(value='', description='product'), Output()), _dom_classes=('widget-interact',))

In [28]:
# another way


# Interactive menu
@interact(product='')

# Update function
def chart(product):
    # Transform the data
    filtered = data[data['Description'].str.contains(product.upper())]
    prdct_rvn = filtered.groupby('Description')['Revenue'].sum()
    # Traces
    traces = [go.Bar(x=prdct_rvn.index, y=prdct_rvn)];
    # Layout
    layout = go.Layout(width=1000, height=500,
                   yaxis=go.layout.YAxis(title=go.layout.yaxis.Title(text='Total Revenue')),
                   title=go.layout.Title(text='Total Revenue per Product'));
    # Plot
    py.iplot(go.Figure(data=traces, layout=layout));

interactive(children=(Text(value='', description='product'), Output()), _dom_classes=('widget-interact',))