# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [None]:
import chart_studio.plotly as py

import cufflinks as cf

import pandas as pd

import numpy as np

%matplotlib inline

cf.go_offline()

import ipywidgets as widgets

from ipywidgets import interact

from ipywidgets import interact, interactive, fixed, interact_manual

Please download the Online_retail dataset dataset from [here](https://drive.google.com/file/d/1jD7eQYnfcvbm1ksDbD12U22JtbKItU7E/view?usp=sharing), place it in the data folder.<br>
Load the data:

In [None]:
data = pd.read_excel('../data/Online_Retail.xlsx')
data.head()

## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [None]:
# Filter Date and Country:
# Filter Date:
start_date = "2011-04-1"
end_date = "2011-04-30"

after_start_date = data["InvoiceDate"] >= start_date
before_end_date = data["InvoiceDate"] <= end_date
between_two_dates = after_start_date & before_end_date
filtered_dates = data.loc[between_two_dates]

# Filter Country:
filtered_dates = filtered_dates.loc[filtered_dates["Country"] != "United Kingdom"]
filtered_dates.head()

# Interactive Plot:
sales = filtered_dates[["Country", "Quantity", "Revenue"]].groupby(["Country"]).agg("sum")

# sales.iplot(kind='bar')
sales.iplot(kind='bar')


## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [None]:
# Filter Date and Country:
# Filter Date:
start_date = "2011-01-1"
end_date = "2011-05-31"

after_start_date = data["InvoiceDate"] >= start_date
before_end_date = data["InvoiceDate"] <= end_date
between_two_dates = after_start_date & before_end_date
filtered_dates = data.loc[between_two_dates]

# Filter France:
filtered_dates = filtered_dates.loc[filtered_dates["Country"] == "France"]
filtered_dates.head()

# Interactive Plot:
sales = filtered_dates[["Country", "Quantity", "Revenue"]].groupby(["Country"]).agg("sum")

sales.iplot(kind = "line")


# Not sure why its showing a straight line which cannot be read visually.
# Paolo: good effort, plot does not show here. You  should plot as a function of time so group by InvoceDate, this gives you the time
# variable you can use to plot. You could group by InvoiceDate instead of Country. See cell below for example

In [None]:
# Filter Date and Country:
# Filter Date:
start_date = "2011-01-1"
end_date = "2011-05-31"

after_start_date = data["InvoiceDate"] >= start_date
before_end_date = data["InvoiceDate"] <= end_date
between_two_dates = after_start_date & before_end_date
filtered_dates = data.loc[between_two_dates]

# Filter France:
filtered_dates = filtered_dates.loc[filtered_dates["Country"] == "France"]
filtered_dates.head()

# Interactive Plot'
sales = filtered_dates.groupby('InvoiceDate').sum()[['Quantity','Revenue']]

sales.iplot(kind = "scatter")




## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [None]:
filter_product = data.loc[data["Description"] == "PARTY BUNTING"]
 
filter_product.iplot(x='Quantity', y='UnitPrice', categories='Country', xTitle='Quantity', yTitle='UnitPrice')
#Paolo: good approach but remember that you are asked about average values so yuo should average quanity and unit price

## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [None]:
selected_countries = data[data["Country"].isin(['EIRE', 'Germany', 'France', 'Netherlands'])]

x = selected_countries.pivot_table(values='Quantity', columns='Country', index='InvoiceNo', aggfunc='sum')

x.iplot(kind = 'hist',subplots=True, xTitle='Number of Invoices', yTitle='Quantity')

# Not sure if this is correct as I do not know if the x-axis is number of invoices. Its very confusing.
#Paolo:Yes it is correct!


## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [None]:
# product_list = ['JUMBO BAG RED RETROSPOT', 
#                 'CREAM HANGING HEART T-LIGHT HOLDER',
#                 'REGENCY CAKESTAND 3 TIER']

country_list = ['EIRE', 'Germany', 'France', 'Netherlands']

In [None]:
product_list = selected_countries[selected_countries["Description"].isin(['JUMBO BAG RED RETROSPOT', 'CREAM HANGING HEART T-LIGHT HOLDER', 
                               'REGENCY CAKESTAND 3 TIER'])]


combined = product_list.pivot_table(values='Revenue', columns='Country', index='Description', aggfunc='sum')

combined.iplot(kind='bar', xTitle='Type of Product per Country', yTitle='Revenue', title='Revenue per country per product')


#Paolo: yes!

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [None]:
data['Year'] = pd.DatetimeIndex(data['InvoiceDate']).year
data['Month'] = pd.DatetimeIndex(data['InvoiceDate']).month
data['Day'] = pd.DatetimeIndex(data['InvoiceDate']).day
uk = data[data['Country']=='United Kingdom']

In [None]:
uk

In [None]:

@interact(Selection=['Year', 'Month'])

def linechart(Selection):
        data = uk.pivot_table(values='Quantity', columns=Selection,
                            index='Day', aggfunc='mean').reset_index()

        data.iplot(kind='line', x='Day', xTitle='Day', 
               yTitle='Quantity sold by day', title='Quantity sold by day in the United Kingdom' + Selection.title())

#Paolo: good effort, you could also have tried per year and then per month within the year. See cell below.

In [None]:
@interact(Year=uk['Year'].unique(), 
          Month=uk['Month'].unique())

def linechart(Year, Month):
    df = uk[(uk['Year']==Year) & (uk['Month']==Month)]
    grouped = df.groupby('Day').agg('sum')['Quantity']
    grouped.iplot(kind='line', title='UK Sales Quantity by Day')

# Bonus challenges

## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [None]:
agg_func = {'InvoiceNo':'nunique',
            'Quantity':'sum',
            'UnitPrice':'mean',
            'Revenue':'sum',
            'CustomerID':'nunique'}

products = uk.groupby('Description').agg(agg_func)

## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.