# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [110]:
import pandas as pd
import plotly.plotly as py
import cufflinks as cf
from ipywidgets import interact
import numpy as np
import re

cf.go_offline()

In [3]:
data = pd.read_excel('data/Online Retail.xlsx')

In [4]:
data.head(5)

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom


In [31]:
data.shape

(396034, 9)

## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [17]:
exclude_uk=data["Country"]!="United Kingdom"
month_mask = data["InvoiceDate"].apply(lambda x: str(x)[0:7])=="2011-04"
by_country_201104=data.loc[exclude_uk & month_mask,:].groupby("Country").sum().loc[:,["Quantity","Revenue"]]

In [18]:
by_country_201104.iplot(kind="bar",title="Quantity and Revenue per country, excluding UK, in April 2011")

## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [24]:
france=data["Country"]=="France"
month_mask_201101 = data["InvoiceDate"].apply(lambda x: str(x)[0:7])=="2011-01"
month_mask_201102 = data["InvoiceDate"].apply(lambda x: str(x)[0:7])=="2011-02"
month_mask_201103 = data["InvoiceDate"].apply(lambda x: str(x)[0:7])=="2011-03"
month_mask_201104 = data["InvoiceDate"].apply(lambda x: str(x)[0:7])=="2011-04"
month_mask_201105 = data["InvoiceDate"].apply(lambda x: str(x)[0:7])=="2011-05"
france_20110105=data.loc[france & (month_mask_201101|month_mask_201102|month_mask_201103|month_mask_201104|month_mask_201105),:].groupby("InvoiceDate").sum().loc[:,["Quantity","Revenue"]]

In [25]:
france_20110105.iplot()

## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [29]:
groupby_countr_avgs

Unnamed: 0,Country,InvoiceNo,Quantity,UnitPrice,Revenue,CustomerID
0,Australia,554329.625,33.125,4.7125,143.78125,12399.25
1,Austria,552202.0,8.0,4.95,39.6,12414.0
2,Belgium,557600.0,4.0,4.95,19.8,12363.0
3,Channel Islands,561821.666667,13.333333,4.95,66.0,14934.0
4,Cyprus,553141.333333,2.333333,4.75,10.95,12373.333333
5,Denmark,557427.0,12.0,4.95,59.4,12429.0
6,EIRE,564075.0,21.210526,4.739474,90.676316,14665.210526
7,Finland,552115.0,6.0,4.95,29.7,12428.0
8,France,560401.590909,5.727273,4.922727,28.213636,12640.863636
9,Germany,554094.4,6.8,4.89,33.06,12616.6


In [30]:
partybunting=data["Description"]=="PARTY BUNTING"
groupby_countr_avgs=data.loc[partybunting, :].groupby("Country").mean().reset_index()
groupby_countr_avgs.iplot(kind="scatter",x="Quantity",y="UnitPrice",categories="Country")


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead



## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [38]:
countries = ["EIRE", "Germany", "France", "Netherlands"]
for c in countries:
    data.loc[data["Country"]==c,"InvoiceNo"].iplot(kind="histogram",title=f"InvoiceNo histogram of {c}")

## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [40]:
product_list = ['JUMBO BAG RED RETROSPOT', 
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']

country_list = ['EIRE', 'Germany', 'France', 'Netherlands']

In [55]:
product_mask= data["Description"].isin(product_list)
product_mask
country_mask= data["Country"].isin(country_list)
country_mask

0         False
1         False
2         False
3         False
4         False
          ...  
396029    False
396030    False
396031    False
396032    False
396033    False
Name: Country, Length: 396034, dtype: bool

In [56]:
revenue_per_country_product=pd.pivot_table(data.loc[product_mask & country_mask,:],aggfunc="sum",index="Country",values="Revenue",columns="Description")

In [61]:
revenue_per_country_product.iplot(kind="bar",title="Revenue per Country")

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [58]:
data['Year'] = pd.DatetimeIndex(data['InvoiceDate']).year
data['Month'] = pd.DatetimeIndex(data['InvoiceDate']).month
data['Day'] = pd.DatetimeIndex(data['InvoiceDate']).day
uk = data[data['Country']=='United Kingdom']

In [67]:
np.sort(data["Year"].unique())

array([2010, 2011], dtype=int64)

In [83]:
data.groupby("InvoiceDate").sum()["Quantity"].reset_index().iplot(kind="line", x = "InvoiceDate", y="Quantity")

In [80]:
@interact(Year=data["Year"].unique(),
         Month=np.sort(data["Month"].unique()))

def linechart(Year, Month):
    interactive_data=data.loc[(data["Year"]==Year) & (data["Month"]==Month) & (data["Country"]=="United Kingdom"),:]
    interactive_data.groupby("InvoiceDate").sum()["Quantity"].reset_index().iplot(kind="line", x = "InvoiceDate", y="Quantity")

interactive(children=(Dropdown(description='Year', options=(2010, 2011), value=2010), Dropdown(description='Mo…

## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [104]:
agg_func = {'InvoiceNo':'nunique',
            'Quantity':'sum',
            'UnitPrice':'mean',
            'Revenue':'sum',
            'CustomerID':'nunique'}

products = uk.groupby('Description').agg(agg_func)
products.columns
len(np.unique(data["InvoiceNo"]))

18389

In [109]:
def scatter(customers,invoices):
    products.reset_index().iplot(kind="scatter",x="InvoiceNo",y="CustomerID",categories="Description",
                                 rangeslider=True),

## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.

In [142]:
revenue_per_description=data.groupby("Description").sum().loc[:,"Revenue"].reset_index()

In [141]:
@interact(Product_Description="")
def barchart(Product_Description):
    search_string=revenue_per_description["Description"].apply(lambda x: False if re.search(Product_Description,x,
                                                                                re.IGNORECASE) == None else True)
    revenue_per_description.loc[search_string,:].iplot(kind="bar", x = "Description", title="Revenue per Product",
                                                      margin=(50,45,250,50))
    

interactive(children=(Text(value='', description='Product_Description'), Output()), _dom_classes=('widget-inte…