# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [1]:
import pandas as pd
import plotly.plotly as py
import cufflinks as cf
import plotly.graph_objs as go
from ipywidgets import interact
from ipywidgets import widgets

cf.go_offline()

In [2]:
data = pd.read_excel('../data/Online Retail.xlsx')

In [3]:
data.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom


## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [4]:
data['Month'] = data['InvoiceDate'].map(lambda x: x.strftime('%m'))
data['Year'] = data['InvoiceDate'].map(lambda x: x.strftime('%Y'))
data.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country,Month,Year
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,12,2010
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,12,2010
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,12,2010
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom,12,2010
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom,12,2010


In [5]:
# Group by country and sum Quantity and Revenue
df = data.groupby(by=['Country', 'Year', 'Month'])[['Quantity', 'Revenue']].sum().reset_index()

# Exclude United Kingdom from dataframe and get April 2011
mask1 = df['Country'] != 'United Kingdom'
mask2 = df['Year'] == '2011'
mask3 = df['Month'] == '04'
df = df.loc[mask1 & mask2 & mask3, :]

# Plot
df = df[['Country', 'Quantity', 'Revenue']]
df.iplot(kind='bar', x='Country', )

## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [8]:
df = data.groupby(by=['Country', 'Year', 'Month'])[['Quantity', 'Revenue']].sum().reset_index()

# Exclude United Kingdom from dataframe and get April 2011
mask1 = df['Country'] == 'France'
mask2 = df['Year'] == '2011'
mask3 = df['Month'] <= '05'
mask4 = df['Month'] >= '01'
df = df.loc[mask1 & mask2 & mask3 & mask4, :]
df = df[['Month', 'Quantity', 'Revenue']]
df.iplot(kind='line', x='Month')

## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [9]:
df = data.loc[data['Description'] == 'PARTY BUNTING',:]
df = df.groupby(by='Country')[['Quantity', 'UnitPrice']].mean().reset_index()

df.iplot(kind='scatter', mode='markers', x='Quantity', y='UnitPrice', categories='Country', xTitle='Quantity', yTitle='UnitPrice', title='PARTY BUNTING')


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead



## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [10]:
lst_country = ['EIRE','Germany','France','Netherlands']
mask = data['Country'].isin(lst_country)


df = data.loc[mask, :]
df = df.groupby('InvoiceDate')[['Quantity']].count()
df.iplot(kind='histogram',title='Distribution of Quantity per Invoice',
                         xTitle='Total Charges', yTitle='Frequency', bins=20)

## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [11]:
product_list = ['JUMBO BAG RED RETROSPOT', 
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']

country_list = ['EIRE', 'Germany', 'France', 'Netherlands']

In [12]:
mask = data['Country'].isin(country_list)
mask2 = data['Description'].isin(product_list)


df = data.loc[mask & mask2, :]
df = df[['Description', 'Revenue', 'Country']]
df = df.groupby(by=['Description','Country']).sum().reset_index()
df.iplot(kind='bar', x='Description', y='Revenue')

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [13]:
data['Year'] = pd.DatetimeIndex(data['InvoiceDate']).year
data['Month'] = pd.DatetimeIndex(data['InvoiceDate']).month
data['Day'] = pd.DatetimeIndex(data['InvoiceDate']).day
uk = data[data['Country']=='United Kingdom']

In [14]:
@interact(Year=list(uk['Year'].unique()), 
          Month=list(uk['Month'].unique())
         )
def plot(Year, Month):
    
    data = uk.loc[(uk['Year']==Year) & 
                  (uk['Month']==Month)]
    data = data.groupby(by='Day').sum().reset_index()
    
    data.iplot(kind = 'line', x='Day', y='Quantity', title='Quantity Sold By Day',
                  xTitle='Day', yTitle='Quantity')

interactive(children=(Dropdown(description='Year', options=(2010, 2011), value=2010), Dropdown(description='Mo…

## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [15]:
agg_func = {'InvoiceNo':'nunique',
            'Quantity':'sum',
            'UnitPrice':'mean',
            'Revenue':'sum',
            'CustomerID':'nunique'}

products = uk.groupby('Description').agg(agg_func)

In [16]:
my_slider = widgets.IntSlider()
my_slider2 = widgets.IntSlider()

@interact(n_invoices = my_slider, n_customers = my_slider2)
def plot(n_invoices, n_customers):
    products.iplot(kind='scatter', x='InvoiceNo', y='CustomerID')


interactive(children=(IntSlider(value=0, description='n_invoices'), IntSlider(value=0, description='n_customer…

## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.

In [17]:
df = data.groupby(by='Description')[['Description','Revenue']].sum().reset_index()


@interact(Description='')

def chart(Description):

    data = df.loc[df['Description'].str.contains(Description)]
    data = data.groupby(by='Description')[['Description','Revenue']].sum().reset_index()
    
    data.iplot(kind='bar', x='Description', y='Revenue', xTitle='Product', yTitle='Revenue')

interactive(children=(Text(value='', description='Description'), Output()), _dom_classes=('widget-interact',))