# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import cufflinks as cf
from ipywidgets import interact
import seaborn as sns

cf.go_offline()

In [2]:
data = pd.read_excel('../data/Online Retail.xlsx')

In [3]:
data.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 396034 entries, 0 to 396033
Data columns (total 9 columns):
 #   Column       Non-Null Count   Dtype         
---  ------       --------------   -----         
 0   InvoiceNo    396034 non-null  int64         
 1   InvoiceDate  396034 non-null  datetime64[ns]
 2   StockCode    396034 non-null  object        
 3   Description  396034 non-null  object        
 4   Quantity     396034 non-null  int64         
 5   UnitPrice    396034 non-null  float64       
 6   Revenue      396034 non-null  float64       
 7   CustomerID   396034 non-null  int64         
 8   Country      396034 non-null  object        
dtypes: datetime64[ns](1), float64(2), int64(3), object(3)
memory usage: 27.2+ MB


## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [5]:
data_1 = data[(data['InvoiceDate'].dt.month == 4) & (data['InvoiceDate'].dt.year == 2011) & (data['Country'] != 'United Kingdom')].pivot_table(index=['Country'], values=['Quantity', 'Revenue'], aggfunc=sum)
data_1.head()

Unnamed: 0_level_0,Quantity,Revenue
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Australia,224,421.6
Austria,308,584.78
Belgium,1170,1788.48
Brazil,356,1143.6
Channel Islands,96,243.0


In [6]:
data_1.iplot(kind='bar')

## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [7]:
data_2 = data[(data['InvoiceDate'] >= '2011-01-01') & (data['InvoiceDate'] <= '2011-05-31') & (data['Country'] == 'France')].pivot_table(index=data['InvoiceDate'].dt.date, values=['Quantity', 'Revenue'], aggfunc=sum)

In [8]:
data_2.iplot(kind='line')

## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [9]:
data_3 = data[data['Description'] == 'PARTY BUNTING'].groupby(['Country'])[['Quantity', 'UnitPrice']].mean().reset_index()

In [10]:
px.scatter(data_3, x='Quantity', y='UnitPrice', color='Country')

## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [11]:
countries = ['EIRE', 'Germany', 'France', 'Netherlands']
for country in countries:
    data[data['Country'] == country].groupby(['InvoiceNo'])['Quantity'].sum().iplot(kind = 'hist', title = f'Quantity per invoice in {country}')

## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [12]:
product_list = ['JUMBO BAG RED RETROSPOT', 
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']

country_list = ['EIRE', 'Germany', 'France', 'Netherlands']

In [13]:
data_5 = data[data['Description'].isin(product_list) & data['Country'].isin(country_list)][['Country','Description','Revenue']].groupby(['Country','Description']).sum().reset_index()

In [14]:
px.bar(data_5, x='Description', y='Revenue', color='Country', barmode='group').update_layout(xaxis_tickangle=5)

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [15]:
data['Year'] = pd.DatetimeIndex(data['InvoiceDate']).year
data['Month'] = pd.DatetimeIndex(data['InvoiceDate']).month
data['Day'] = pd.DatetimeIndex(data['InvoiceDate']).day
uk = data[data['Country']=='United Kingdom']

In [23]:
@interact(Year=uk['Year'].sort_values(ascending=False).unique(), Month=uk['Month'].sort_values(ascending=False).unique())
def explore(Year,Month):
    data_6 = uk[(uk['Year'] == Year) & (uk['Month'] == Month)]
    sns.relplot(data=data_6, x='Day', y='Quantity', kind='line')

interactive(children=(Dropdown(description='Year', options=(2011, 2010), value=2011), Dropdown(description='Mo…

## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [17]:
agg_func = {'InvoiceNo':'nunique',
            'Quantity':'sum',
            'UnitPrice':'mean',
            'Revenue':'sum',
            'CustomerID':'nunique'}

products = uk.groupby('Description').agg(agg_func)

In [18]:
products.head()

Unnamed: 0_level_0,InvoiceNo,Quantity,UnitPrice,Revenue,CustomerID
Description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4 PURPLE FLOCK DINNER CANDLES,35,134,2.318421,255.46,30
50'S CHRISTMAS GIFT BAG LARGE,100,1721,1.2479,2067.25,98
DOLLY GIRL BEAKER,100,661,1.25,826.25,77
I LOVE LONDON MINI BACKPACK,55,181,4.15,751.15,46
NINE DRAWER OFFICE TIDY,25,44,14.761538,628.4,24


In [19]:
@interact(invoices=(0, products['InvoiceNo'].max()), customers=(0, products['CustomerID'].max()))
def scatter_plot(invoices, customers):
    products.iplot(kind='scatter', x='InvoiceNo', y='CustomerID', mode='markers', xrange=(0, invoices), yrange=(0, customers))

interactive(children=(IntSlider(value=945, description='invoices', max=1891), IntSlider(value=410, description…

## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.

In [20]:
agg_func = {'Revenue':'sum'}
data_8 = uk.groupby('Description').agg(agg_func).reset_index()
data_8.head()

Unnamed: 0,Description,Revenue
0,4 PURPLE FLOCK DINNER CANDLES,255.46
1,50'S CHRISTMAS GIFT BAG LARGE,2067.25
2,DOLLY GIRL BEAKER,826.25
3,I LOVE LONDON MINI BACKPACK,751.15
4,NINE DRAWER OFFICE TIDY,628.4


In [21]:
@interact(Product='')
def bar(Product):
    Product = Product.upper()
    data_8[data_8['Description'].str.contains(Product)].iplot(kind='bar', x='Description', y='Revenue')

interactive(children=(Text(value='', description='Product'), Output()), _dom_classes=('widget-interact',))