# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [None]:
import pandas as pd
import chart_studio.plotly as py
import cufflinks as cf
from ipywidgets import interact

cf.go_offline()

Please download the Online_retail dataset dataset from [here](https://drive.google.com/file/d/1jD7eQYnfcvbm1ksDbD12U22JtbKItU7E/view?usp=sharing), place it in the data folder.<br>
Load the data:

In [None]:
online_retail = pd.read_excel('../data/Online_Retail.xlsx')
online_retail.head()

## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [None]:
#selecting the columns I need
revenue_per_country = online_retail[['InvoiceDate','Country','Quantity','Revenue']]

#checking if invoice date is a date-type
revenue_per_country.dtypes

#selecting dates needed
revenue_per_country = revenue_per_country.set_index(['InvoiceDate'])
revenue_per_country = revenue_per_country.loc['2011-4-1':'2011-4-30']
revenue_per_country

#grouping and removing UK
revenue_per_country = revenue_per_country.pivot_table(index=['Country'],values=['Quantity','Revenue'], aggfunc=sum)
revenue_per_country = revenue_per_country.drop('United Kingdom')
revenue_per_country = revenue_per_country.reset_index()
revenue_per_country

#plotting
revenue_per_country.iplot(kind='bar', x='Country', xTitle='Country', yTitle='Total', title='quantity and revenue by country')


## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [None]:
#selecting the columns, dates and country I need
revenue_france = online_retail[['InvoiceDate','Country','Quantity','Revenue']]
revenue_france = revenue_france.set_index(['InvoiceDate'])
revenue_france = revenue_france.loc['2011-1-1':'2011-5-31']
revenue_france = revenue_france.loc[revenue_france.Country =='France']
revenue_france = revenue_france.reset_index()

#group by day
revenue_france['day'] = revenue_france['InvoiceDate'].dt.date
revenue_france = revenue_france.pivot_table(index=['day'],values=['Quantity','Revenue'], aggfunc=sum)
revenue_france = revenue_france.reset_index()
revenue_france

#plotting
revenue_france.iplot(kind='line', x='day', xTitle='Date', yTitle='Total', title='Quantity and Revenue for France')

## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [None]:
#selecting all rows with the product
party_bunting = online_retail.loc[online_retail.Description=='PARTY BUNTING']
party_bunting.iplot( x='Quantity', y='UnitPrice', categories='Country',xTitle='Quantity', yTitle='Unit Price', title='avg. Quantity and Price per Country for party buntings')


## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [None]:
#selecting countries
invoice_country = online_retail[online_retail['Country'].isin(['EIRE', 'Germany', 'France', 'Netherlands'])]

#pivot
invoice_country = invoice_country.pivot_table(index='InvoiceNo',columns='Country',values='Quantity', aggfunc=sum)
invoice_country #= invoice_country.reset_index()

#plot (not really sure what is on the X axis and what is on Y)
invoice_country.iplot(kind='hist',xTitle='Invoice#', yTitle='Quantity',subplots=True)


# Hi Paulo, do you happen to know why, when I was pivoting as below (with square brackets) it gives an error?
# I've always pivoted like this and it always seemed to work fine? It took me forever to find the error and fix it
# Thanks! 

# Original code that throws error:

# #pivot
# invoice_country = invoice_country.pivot_table(index=['InvoiceNo'],columns=['Country'],values=['Quantity'], aggfunc=sum)
# invoice_country #= invoice_country.reset_index()

# #plot
# invoice_country.iplot(kind='hist',subplots=True)


## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [None]:
product_list = ['JUMBO BAG RED RETROSPOT', 
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']

country_list = ['EIRE', 'Germany', 'France', 'Netherlands']

In [None]:
# selecting products and countries
revenue_country = online_retail[online_retail['Country'].isin(country_list)]
revenue_country = online_retail[online_retail['Description'].isin(product_list)][['Country','Description','Revenue']]
revenue_country = revenue_country.groupby(by = ['Country','Description'], as_index=False).agg({"Revenue":'sum'})


revenue_country.iplot(kind='bar', x=['Country','Description'], xTitle='Country', yTitle='Revenue', title='revenue per country')

In [None]:
# Version 2 with Pivot (much nicer)
# selecting products and countries
revenue_country = online_retail[online_retail['Country'].isin(country_list)]
revenue_country = online_retail[online_retail['Description'].isin(product_list)][['Country','Description','Revenue']]

# pivot
revenue_country = revenue_country.pivot_table(index='Country',columns='Description',values='Revenue', aggfunc=sum)
revenue_country

revenue_country.iplot(kind='bar', xTitle='Country', yTitle='Revenue', title='revenue per country')

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [None]:
online_retail['Year'] = pd.DatetimeIndex(online_retail['InvoiceDate']).year
online_retail['Month'] = pd.DatetimeIndex(online_retail['InvoiceDate']).month
online_retail['Day'] = pd.DatetimeIndex(online_retail['InvoiceDate']).day
uk = online_retail[online_retail['Country']=='United Kingdom']
uk

In [None]:
@interact(Selection=['Year', 'Month'])

def linechart(Selection):
    data = uk.pivot_table(values='Quantity', columns=Selection,
                            index='Day', aggfunc='sum').reset_index()

    data.iplot(kind='line', x='Day', xTitle='Day', 
               yTitle='total Quantity', title='total Quantity by day by ' + Selection.title())

# Bonus challenges

## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [None]:
agg_func = {'InvoiceNo':'nunique',
            'Quantity':'sum',
            'UnitPrice':'mean',
            'Revenue':'sum',
            'CustomerID':'nunique'}

products = uk.groupby('Description').agg(agg_func)

## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.