# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [None]:
import pandas as pd
import chart_studio.plotly as py
import cufflinks as cf
from ipywidgets import interact

import sys
# print(sys.path)
# Adding a path so that it finds the package for numpy
sys.path.append('/usr/local/lib/python3.7/site-packages')
import numpy as np

cf.go_offline()

Please download the Online_retail dataset dataset from [here](https://drive.google.com/file/d/1jD7eQYnfcvbm1ksDbD12U22JtbKItU7E/view?usp=sharing), place it in the data folder.<br>
Load the data:

In [None]:
data = pd.read_excel('../data/Online_Retail.xlsx')
data.head()

In [None]:
data['Year'] = pd.to_datetime(data['InvoiceDate']).dt.year
data['Month'] = pd.to_datetime(data['InvoiceDate']).dt.month
data['Day'] = pd.to_datetime(data['InvoiceDate']).dt.day

data.head()

## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [None]:
data['Country'].unique()

In [None]:
data_noUK = data[data['Country'].isin(['United Kingdom']) == False]

In [None]:
data_noUK = data_noUK[data_noUK['Year'].isin(['2011'])]
data_noUK = data_noUK[data_noUK['Month'].isin(['4'])]


In [None]:
data_noUK = data_noUK.groupby('Country',as_index=True).agg(Revenue=('Revenue', 'sum'), Quantity=('Quantity', 'sum'))

data_noUK=data_noUK.reset_index()

In [None]:
# data_noUK.info()

In [None]:
data_noUK.iplot(kind='bar', x='Country', xTitle='Country', color={'Revenue':'Red', 'Quantity':'blue'},
           yTitle='Revenue', title='Total Revenue by Country')

## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [None]:
data_France = data[data['Country'].isin(['France'])]
data_France = data_France[data_France['Year'].isin(['2011'])]
data_France = data_France[data_France['Month'].isin([1,2,3,4,5])]

data_France


In [None]:
date = pd.to_datetime(data_France[['Year','Month','Day']])
data_France['Date'] = date

In [None]:
data_France = data_France.groupby(['Date'], as_index=True).agg(Revenue=('Revenue', 'sum'), Quantity=('Quantity', 'sum'))
data_France = data_France.reset_index()

data_France


In [None]:
data_France.iplot(kind='line', x='Date', xTitle='Jan to May 2011', 
           yTitle='$ or #', title='France: Revenue and Quantity')

## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [None]:
data_PB = data[data['Description'].isin(['PARTY BUNTING'])]
data_PB = data_PB.groupby('Country', as_index=True).agg(Quantity=('Quantity', 'mean'), Price=('UnitPrice', 'mean'))

data_PB = data_PB.reset_index()

data_PB

In [None]:
data_PB.iplot(x='Quantity', y='Price', categories='Country',
           xTitle='Average Quantity', yTitle='Average Price',
           title='Quantity vs Price')

## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [None]:

data_EGFN = data[data['Country'].isin(['EIRE','Germany','France','Netherlands'])]
data_EGFN_Q = data_EGFN.pivot_table(values='Quantity', columns='Country', index='InvoiceNo', aggfunc='mean',fill_value=0)
data_EGFN_Q.iplot(kind='hist', histnorm='percent', xTitle='Quantity per Invoice', yTitle='Percent',subplots=True, title='Quantity per Invoice per Country')

## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [None]:
product_list = ['JUMBO BAG RED RETROSPOT', 
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']

country_list = ['EIRE', 'Germany', 'France', 'Netherlands']

In [None]:
data_EGFN = data_EGFN[data_EGFN['Description'].isin(product_list)]
data_EGFN

In [None]:
data_EGFN_P = data_EGFN.pivot_table(values='Revenue', columns='Description', index='Country', aggfunc='sum').reset_index()
data_EGFN_P.iplot(kind='bar', x = 'Country', xTitle='Country', yTitle='Revenue', title='Revenue by Country')

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [None]:
data['Year'] = pd.DatetimeIndex(data['InvoiceDate']).year
data['Month'] = pd.DatetimeIndex(data['InvoiceDate']).month
data['Day'] = pd.DatetimeIndex(data['InvoiceDate']).day
uk = data[data['Country']=='United Kingdom']

In [None]:
@interact(Selection=['Year','Month'])

def linechart(Selection):
    data = uk.pivot_table(values='Quantity', columns=Selection,
                            index='Day', aggfunc='mean').reset_index()

    data.iplot(kind='line', x='Day', xTitle='Day', 
               yTitle='Quantity sold by Day', title='UK' + Selection.title())

# Bonus challenges

## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [None]:
agg_func = {'InvoiceNo':'nunique',
            'Quantity':'sum',
            'UnitPrice':'mean',
            'Revenue':'sum',
            'CustomerID':'nunique'}

products = uk.groupby('Description').agg(agg_func)

## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.