# Interactive Time Series Visualization Assignment

In [1]:
#!pip install plotly_express

In [27]:
import pandas as pd
import plotly.express as px
from ipywidgets import interact
import matplotlib as plt

import warnings
warnings.filterwarnings("ignore")

### Load the Online Retail.xlsx Excel file into a Pandas dataframe.

In [3]:
retail = pd.read_excel('../data/Online Retail.xlsx')

### Create an interactive histogram to explore the distributions of daily (by date) Quantity, UnitPrice, and Revenue. Include a dropdown widget where you can switch between the three fields.

**Hint:** You will need to create a Date field and then aggregate on it. The Quantity and Revenue fields should be summed and the UnitPrice field should be averaged (mean).

In [4]:
retail.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom


In [5]:
retail['date'] = retail['InvoiceDate'].dt.date

In [16]:
grp1_df = retail.groupby('date').agg({'Quantity': 'sum', 'Revenue': 'sum', 'UnitPrice': 'mean'}).reset_index()
grp1_df

Unnamed: 0,date,Quantity,Revenue,UnitPrice
0,2010-12-01,24207,46219.29,3.109995
1,2010-12-02,31140,47283.53,3.184610
2,2010-12-03,11822,23576.01,3.166670
3,2010-12-05,16370,31315.64,2.840382
4,2010-12-06,16267,31014.21,2.878689
...,...,...,...,...
300,2011-12-05,37609,55920.60,2.581249
301,2011-12-06,27798,45584.19,2.318553
302,2011-12-07,41080,68699.21,2.397698
303,2011-12-08,27536,50214.15,2.572785


In [18]:
@interact(sel = ['Quantity', 'UnitPrice', 'Revenue'])
def viz(sel):
    fig = px.histogram(grp1_df, x=sel, nbins=30, title='Distribution of Quantities')
    fig.show()

interactive(children=(Dropdown(description='sel', options=('Quantity', 'UnitPrice', 'Revenue'), value='Quantit…

### Create an interactive line chart where you can explore Quantity and Revenue by day (date) for each country. Include two dropdown widgets - one to choose between the Quantity and Revenue fields and the other to choose the Country.

In [8]:
grp2_df = retail.groupby(['date', 'Country']).agg({'Quantity': 'sum', 'Revenue': 'sum'}).reset_index()
grp2_df

Unnamed: 0,date,Country,Quantity,Revenue
0,2010-12-01,Australia,107,358.25
1,2010-12-01,EIRE,242,505.38
2,2010-12-01,France,446,801.86
3,2010-12-01,Germany,156,243.48
4,2010-12-01,Netherlands,96,177.60
...,...,...,...,...
1485,2011-12-09,Belgium,202,193.64
1486,2011-12-09,France,105,249.45
1487,2011-12-09,Germany,872,1545.72
1488,2011-12-09,Norway,2227,2638.69


In [19]:
@interact(sel = ['Quantity', 'Revenue'], sel_country = retail['Country'].unique())
def viz(sel, sel_country):
    fig = px.line(grp2_df[grp2_df['Country'] == sel_country], x="date", y=sel,
                       title=sel + ' By Date for ' + sel_country)
    fig.show()

interactive(children=(Dropdown(description='sel', options=('Quantity', 'Revenue'), value='Quantity'), Dropdown…

### Create an interactive scatter plot showing the relationships between daily Quantity, UnitPrice, and Revenue for the United Kingdom. Include two dropdown boxes that let you choose between the 3 fields - one for the x axis and one for the y axis of your scatter plot. Size the data points according to Revenue.

In [10]:
uk = retail[retail['Country'] == 'United Kingdom']
grp3_df = uk.groupby(['date', 'Country']).agg({'Quantity': 'sum', 'Revenue': 'sum', 'UnitPrice': 'mean'}).reset_index()
grp3_df

Unnamed: 0,date,Country,Quantity,Revenue,UnitPrice
0,2010-12-01,United Kingdom,21308,42213.58,3.172830
1,2010-12-02,United Kingdom,30987,47123.68,3.187544
2,2010-12-03,United Kingdom,7646,18833.99,3.143373
3,2010-12-05,United Kingdom,13603,25807.89,2.818036
4,2010-12-06,United Kingdom,15515,29651.87,2.875897
...,...,...,...,...,...
300,2011-12-05,United Kingdom,35874,52893.63,2.571737
301,2011-12-06,United Kingdom,25909,42063.55,2.249006
302,2011-12-07,United Kingdom,29078,49238.47,2.328416
303,2011-12-08,United Kingdom,25441,47146.11,2.565525


In [11]:
@interact(sel1 = ['Quantity', 'Revenue', 'UnitPrice'], sel2 = ['Quantity', 'Revenue', 'UnitPrice'])
def viz(sel1, sel2):
    fig = px.scatter(grp3_df, 
                       x=sel1, y=sel2,
                       size='Revenue',
                       title=sel1 + ' By ' + sel2)
    fig.show()

interactive(children=(Dropdown(description='sel1', options=('Quantity', 'Revenue', 'UnitPrice'), value='Quanti…

### Create a bar chart showing the top X products sold (by quantity) in the United Kingdom in a specific month. Use a dropdown box to select the month name and a slider to show the top X products. The range for X should be from 5 to 25. Make sure the bars are sorted in descending order according to their total quantity sold for the month.

Bonus points if you can figure out how to ensure that the month names in the dropdown appear in their correct order (January, February, March, April, etc.).

In [12]:
retail.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country,date
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010-12-01
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010-12-01
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom,2010-12-01
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom,2010-12-01
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom,2010-12-01


In [28]:
uk = retail[retail['Country'] == 'United Kingdom']
uk['month'] = uk['InvoiceDate'].dt.month_name()
uk['month_num'] = uk['InvoiceDate'].dt.month
grp4_df = uk.groupby(['month_num', 'month', 'Description']).agg({'Quantity': 'sum'}).reset_index()
grp4_df

Unnamed: 0,month_num,month,Description,Quantity
0,1,January,4 PURPLE FLOCK DINNER CANDLES,2
1,1,January,OVAL WALL MIRROR DIAMANTE,10
2,1,January,SET 2 TEA TOWELS I LOVE LONDON,397
3,1,January,10 COLOUR SPACEBOY PEN,156
4,1,January,12 COLOURED PARTY BALLOONS,60
...,...,...,...,...
28237,12,December,ZINC T-LIGHT HOLDER STARS SMALL,120
28238,12,December,ZINC TOP 2 DOOR WOODEN SHELF,1
28239,12,December,ZINC WILLIE WINKIE CANDLE STICK,309
28240,12,December,ZINC WIRE KITCHEN ORGANISER,16


In [29]:
@interact(Month=uk['month'].unique(), Top=(5, 25))
def viz(Month, Top):
    plot_df = grp4_df[grp4_df['month'] == Month].sort_values(by=['Quantity'], ascending=False)
    fig = px.bar(plot_df[0 : Top], x='Description', y='Quantity',
                 title='Top Products for ' + Month, height=700)
    fig.show()

interactive(children=(Dropdown(description='Month', options=('December', 'January', 'February', 'March', 'Apri…

In [30]:
grp4_df[grp4_df['Description'] == 'POPCORN HOLDER']

Unnamed: 0,month_num,month,Description,Quantity
1401,1,January,POPCORN HOLDER,1563
3467,2,February,POPCORN HOLDER,2081
5619,3,March,POPCORN HOLDER,1742
7782,4,April,POPCORN HOLDER,1593
9959,5,May,POPCORN HOLDER,6618
12195,6,June,POPCORN HOLDER,1883
14459,7,July,POPCORN HOLDER,1663
16743,8,August,POPCORN HOLDER,5100
19157,9,September,POPCORN HOLDER,3432
21669,10,October,POPCORN HOLDER,4909
