# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [1]:
import pandas as pd
import plotly.plotly as py
import cufflinks as cf
from ipywidgets import interact

cf.go_offline()

In [2]:
data = pd.read_excel('Online Retail.xlsx')


In [3]:
data.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom


In [4]:
data.dtypes

InvoiceNo               int64
InvoiceDate    datetime64[ns]
StockCode              object
Description            object
Quantity                int64
UnitPrice             float64
Revenue               float64
CustomerID              int64
Country                object
dtype: object

## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [5]:
#regex = 'dddd-dd-dd+'

data['Date']=data['InvoiceDate'].dt.strftime('%d-%m-%Y')
data['Time']=data['InvoiceDate'].dt.strftime('%H:%M')

In [6]:
data_April_2011 = data[(data['Country'] != 'United Kingdom') & (data['Date'].astype(str).str.contains('04-2011'))]
data_April_2011.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country,Date,Time
724,549667,2011-04-11 12:20:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.95,17.7,14911,EIRE,11-04-2011,12:20
807,551163,2011-04-26 15:52:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,9,2.95,26.55,12573,France,26-04-2011,15:52
3320,550899,2011-04-21 12:07:00,22752,SET 7 BABUSHKA NESTING BOXES,2,8.5,17.0,13505,Switzerland,21-04-2011,12:07
4880,550527,2011-04-19 10:48:00,84879,ASSORTED COLOUR BIRD ORNAMENT,40,1.69,67.6,12476,Germany,19-04-2011,10:48
4882,550620,2011-04-19 13:39:00,84879,ASSORTED COLOUR BIRD ORNAMENT,32,1.69,54.08,12585,Germany,19-04-2011,13:39


In [7]:
data_April_2011_plot = data_April_2011.groupby('Country', as_index=False).sum()
data_April_2011_plot.head()

Unnamed: 0,Country,InvoiceNo,Quantity,UnitPrice,Revenue,CustomerID
0,Australia,9338321,224,40.65,421.6,210681
1,Austria,13225072,308,95.18,584.78,298236
2,Belgium,60518251,1170,273.38,1788.48,1365606
3,Brazil,17606432,356,142.6,1143.6,408608
4,Channel Islands,4399312,96,28.1,243.0,119448


In [8]:
data_April_2011_plot.iplot(kind='bar', x='Country', xTitle='Country', y=['Quantity', 'Revenue'], title='Total quantity and revenue by country (excluding United Kingdom) for the month of April 2011')

## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [9]:
data['Date']=data['InvoiceDate'].dt.strftime('%Y-%m-%d')
data.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country,Date,Time
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010-12-01,08:26
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010-12-01,09:02
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010-12-01,09:32
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom,2010-12-01,10:19
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom,2010-12-01,10:39


In [10]:
data_France = data[(data['Country'] == 'France')&(data['InvoiceDate'].between('2011-01-01', '2011-05-31'))].sort_values(by='Date')
#data_France.head()
data_France_group = data_France.groupby('Date', as_index=False).sum()

data_France_group.head()

Unnamed: 0,Date,InvoiceNo,Quantity,UnitPrice,Revenue,CustomerID
0,2011-01-05,38895195,728,197.38,1265.18,913071
1,2011-01-06,23775766,438,181.55,709.02,552934
2,2011-01-07,18916037,591,119.81,975.09,439089
3,2011-01-09,4864689,78,20.2,114.0,113859
4,2011-01-10,26491458,628,130.73,1112.06,621369


In [11]:
data_France_group[['Quantity', 'Revenue', 'Date']].iplot(kind='line', x='Date', xTitle='Date', title='Quantity and revenue sold to France')


## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [16]:
data_Party_bunting = data[data['Description'] == 'PARTY BUNTING']
data_Party_bunting.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country,Date,Time
252864,536956,2010-12-03 12:43:00,47566,PARTY BUNTING,5,4.65,23.25,14210,United Kingdom,2010-12-03,12:43
252865,537065,2010-12-05 11:57:00,47566,PARTY BUNTING,5,4.65,23.25,12567,France,2010-12-05,11:57
252866,537128,2010-12-05 12:15:00,47566,PARTY BUNTING,2,4.65,9.3,12841,United Kingdom,2010-12-05,12:15
252867,537142,2010-12-05 12:57:00,47566,PARTY BUNTING,1,4.65,4.65,12748,United Kingdom,2010-12-05,12:57
252868,537420,2010-12-06 15:18:00,47566,PARTY BUNTING,5,4.65,23.25,17519,United Kingdom,2010-12-06,15:18


In [17]:
data_Party_bunting = data_Party_bunting.groupby('Country', as_index=False).mean()
data_Party_bunting.head()

Unnamed: 0,Country,InvoiceNo,Quantity,UnitPrice,Revenue,CustomerID
0,Australia,554329.625,33.125,4.7125,143.78125,12399.25
1,Austria,552202.0,8.0,4.95,39.6,12414.0
2,Belgium,557600.0,4.0,4.95,19.8,12363.0
3,Channel Islands,561821.666667,13.333333,4.95,66.0,14934.0
4,Cyprus,553141.333333,2.333333,4.75,10.95,12373.333333


In [20]:
data_Party_bunting.iplot(x='Quantity', y='UnitPrice', categories = 'Country',
           xTitle='Avr Quantity', yTitle='Avr Unit Price',
           title='Quantity vs. Price')

## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [35]:
pays = ['EIRE', 'Germany', 'France', 'Netherlands']
country = data[data['Country'].isin(pays)]
country



Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country,Date,Time
179,539320,2010-12-16 19:16:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,24,2.95,70.80,14911,EIRE,2010-12-16,19:16
198,539722,2010-12-21 13:45:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,24,2.95,70.80,14911,EIRE,2010-12-21,13:45
304,541570,2011-01-19 12:34:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,256,2.55,652.80,14646,Netherlands,2011-01-19,12:34
322,541979,2011-01-24 14:54:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.95,17.70,14911,EIRE,2011-01-24,14:54
367,542777,2011-02-01 08:31:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.95,17.70,14911,EIRE,2011-02-01,08:31
393,543114,2011-02-03 13:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.60,14156,EIRE,2011-02-03,13:26
447,544210,2011-02-17 11:01:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.95,17.70,14911,EIRE,2011-02-17,11:01
464,544690,2011-02-23 08:48:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.60,14156,EIRE,2011-02-23,08:48
514,545657,2011-03-04 14:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.95,17.70,14911,EIRE,2011-03-04,14:02
534,546027,2011-03-09 08:09:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,12,2.95,35.40,12759,Netherlands,2011-03-09,08:09


In [37]:
data_country = country.pivot_table(values='Quantity', columns='Country', index='InvoiceNo', aggfunc='sum')
data_country.head()

Country,EIRE,France,Germany,Netherlands
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
536370,,446.0,,
536403,,,,96.0
536527,,,156.0,
536540,230.0,,,
536541,12.0,,,


In [39]:
data_country.iplot(kind='hist', xTitle='Invoice', subplots=True,
yTitle='Quantity', title='Distributions of quantity per invoice')

## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [41]:
product_list = ['JUMBO BAG RED RETROSPOT', 
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']

country_list = ['EIRE', 'Germany', 'France', 'Netherlands']

In [43]:
side = data[(data['Country'].isin(country_list)) & (data['Description'].isin(product_list))]
side.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country,Date,Time
179,539320,2010-12-16 19:16:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,24,2.95,70.8,14911,EIRE,2010-12-16,19:16
198,539722,2010-12-21 13:45:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,24,2.95,70.8,14911,EIRE,2010-12-21,13:45
304,541570,2011-01-19 12:34:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,256,2.55,652.8,14646,Netherlands,2011-01-19,12:34
322,541979,2011-01-24 14:54:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.95,17.7,14911,EIRE,2011-01-24,14:54
367,542777,2011-02-01 08:31:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.95,17.7,14911,EIRE,2011-02-01,08:31


In [50]:
revenue= side.pivot_table(values='Revenue', columns='Description', index='Country', aggfunc='sum')
revenue['Country']=revenue.index

revenue.head()

Description,CREAM HANGING HEART T-LIGHT HOLDER,JUMBO BAG RED RETROSPOT,REGENCY CAKESTAND 3 TIER,Country
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
EIRE,2740.8,278.72,7388.55,EIRE
France,131.75,903.37,2816.85,France
Germany,35.4,1072.76,9061.95,Germany
Netherlands,1167.0,3468.0,3166.35,Netherlands


In [52]:
revenue.iplot(kind='bar', x='Country', xTitle='Country', 
           yTitle='Revenue', title='Revenue by country')

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [53]:
data['Year'] = pd.DatetimeIndex(data['InvoiceDate']).year
data['Month'] = pd.DatetimeIndex(data['InvoiceDate']).month
data['Day'] = pd.DatetimeIndex(data['InvoiceDate']).day
uk = data[data['Country']=='United Kingdom']

In [54]:
uk.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country,Date,Time,Year,Month,Day
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010-12-01,08:26,2010,12,1
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010-12-01,09:02,2010,12,1
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010-12-01,09:32,2010,12,1
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom,2010-12-01,10:19,2010,12,1
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom,2010-12-01,10:39,2010,12,1


## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [None]:
agg_func = {'InvoiceNo':'nunique',
            'Quantity':'sum',
            'UnitPrice':'mean',
            'Revenue':'sum',
            'CustomerID':'nunique'}

products = uk.groupby('Description').agg(agg_func)

## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.