# Interactive Time Series Visualization Assignment

In [15]:
#!pip install plotly_express
!pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.0.5-py2.py3-none-any.whl (242 kB)
Collecting et-xmlfile
  Downloading et_xmlfile-1.0.1.tar.gz (8.4 kB)
Collecting jdcal
  Downloading jdcal-1.4.1-py2.py3-none-any.whl (9.5 kB)
Using legacy 'setup.py install' for et-xmlfile, since package 'wheel' is not installed.
Installing collected packages: et-xmlfile, jdcal, openpyxl
    Running setup.py install for et-xmlfile: started
    Running setup.py install for et-xmlfile: finished with status 'done'
Successfully installed et-xmlfile-1.0.1 jdcal-1.4.1 openpyxl-3.0.5


You should consider upgrading via the 'c:\users\juss\appdata\local\programs\python\python38-32\python.exe -m pip install --upgrade pip' command.


In [16]:
import pandas as pd
import plotly_express as px
from ipywidgets import interact
import openpyxl

### Load the Online Retail.xlsx Excel file into a Pandas dataframe.

In [None]:
link = 'https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%202/Online%20Retail.xlsx'

In [17]:
data = pd.read_excel('C:/Users/Juss/Downloads/Online_Retail.xlsx', engine='openpyxl')

In [18]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 396034 entries, 0 to 396033
Data columns (total 9 columns):
 #   Column       Non-Null Count   Dtype         
---  ------       --------------   -----         
 0   InvoiceNo    396034 non-null  int64         
 1   InvoiceDate  396034 non-null  datetime64[ns]
 2   StockCode    396034 non-null  object        
 3   Description  396034 non-null  object        
 4   Quantity     396034 non-null  int64         
 5   UnitPrice    396034 non-null  float64       
 6   Revenue      396034 non-null  float64       
 7   CustomerID   396034 non-null  int64         
 8   Country      396034 non-null  object        
dtypes: datetime64[ns](1), float64(2), int64(3), object(3)
memory usage: 22.7+ MB


In [19]:
data.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom


### Create an interactive histogram to explore the distributions of daily (by date) Quantity, UnitPrice, and Revenue. Include a dropdown widget where you can switch between the three fields.

**Hint:** You will need to create a Date field and then aggregate on it. The Quantity and Revenue fields should be summed and the UnitPrice field should be averaged (mean).

In [20]:
from datetime import datetime as dt

In [21]:
data['Date'] = data['InvoiceDate'].dt.date

In [23]:
datahist = data.groupby('Date').agg({'Quantity':'sum','Revenue':'sum','UnitPrice':'mean'}).reset_index()

In [24]:
datahist

Unnamed: 0,Date,Quantity,Revenue,UnitPrice
0,2010-12-01,24207,46219.29,3.109995
1,2010-12-02,31140,47283.53,3.184610
2,2010-12-03,11822,23576.01,3.166670
3,2010-12-05,16370,31315.64,2.840382
4,2010-12-06,16267,31014.21,2.878689
...,...,...,...,...
300,2011-12-05,37609,55920.60,2.581249
301,2011-12-06,27798,45584.19,2.318553
302,2011-12-07,41080,68699.21,2.397698
303,2011-12-08,27536,50214.15,2.572785


In [25]:
def ihistogram(df, field, title=''):
    fig = px.histogram(df, x=field, title=title, template='none').update(layout=dict(title=dict(x=0.5)))
    fig.update_yaxes(title_text='Number of Records')
    fig.update_traces(marker_color='lightskyblue', marker_line_color='black', marker_line_width=1)
    fig.show()

In [28]:
metric = ['Quantity', 'Revenue', 'UnitPrice']

@interact(Selection=metric)

def plot_hist(Selection):
    ihistogram(datahist, Selection, title='Daily Retail Distribution for '+Selection+'.')

interactive(children=(Dropdown(description='Selection', options=('Quantity', 'Revenue', 'UnitPrice'), value='Q…

### Create an interactive line chart where you can explore Quantity and Revenue by day (date) for each country. Include two dropdown widgets - one to choose between the Quantity and Revenue fields and the other to choose the Country.

In [29]:
def ilinechart(df, x, y, groups=None, title=''):
    fig = px.line(df, x=x, y=y, color=groups, title=title, template='none').update(layout=dict(title=dict(x=0.5)))
    fig.show()

In [31]:
metric = ['Quantity', 'Revenue']
country = data['Country'].unique()

@interact(Metric=metric, Country=country)

def line_plot(Metric, Country):
    filter_ = data.loc[lambda x: x.Country == Country]
    group_ = filter_.groupby(['Date'], as_index=False).sum()
    ilinechart(group_, 'Date', Metric, title='Daily '+Metric+' by '+Country+'.')

interactive(children=(Dropdown(description='Metric', options=('Quantity', 'Revenue'), value='Quantity'), Dropd…

### Create an interactive scatter plot showing the relationships between daily Quantity, UnitPrice, and Revenue for the United Kingdom. Include two dropdown boxes that let you choose between the 3 fields - one for the x axis and one for the y axis of your scatter plot. Size the data points according to Revenue.

In [32]:
def iscatter(df, x, y, color=None, size=None, title=''):
    fig = px.scatter(df, x=x, y=y, color=color, size=size, title=title, template='none')
    fig.update_traces(marker_line_color='black', marker_line_width=1)
    fig.show()

In [36]:
axis = ['Quantity', 'Revenue', 'UnitPrice']

@interact(Horizontal=axis, Vertical=axis)

def scat_plot(Horizontal, Vertical):
    filter_ = data.loc[lambda x: x.Country == 'United Kingdom']
    group_ = filter_.groupby(['Date'], as_index=False).sum()
    iscatter(group_, Horizontal, Vertical, size='Revenue', title='UK Daily Retail ' + Horizontal + ' by ' + Vertical)

interactive(children=(Dropdown(description='Horizontal', options=('Quantity', 'Revenue', 'UnitPrice'), value='…

### Create a bar chart showing the top X products sold (by quantity) in the United Kingdom in a specific month. Use a dropdown box to select the month name and a slider to show the top X products. The range for X should be from 5 to 25. Make sure the bars are sorted in descending order according to their total quantity sold for the month.

Bonus points if you can figure out how to ensure that the month names in the dropdown appear in their correct order (January, February, March, April, etc.).

In [38]:
data['Month'] = data['InvoiceDate'].dt.strftime('%B')

In [43]:
def ibarchart(df, x, y, order=None, title=''):
    fig = px.bar(df, x=x, y=y, title=title, template='none')
    fig.update_traces(marker_color='lightskyblue', marker_line_color='black', marker_line_width=1)
    fig.update_layout(xaxis={'categoryorder':'array', 'categoryarray': order})
    fig.show()

In [47]:
month = data['Month'].unique()

@interact(Month=month, Products=(5,25))

def plot_bar(Month, Products):
    filter_ = data.loc[(lambda x: (x.Country == 'United Kingdom') & (x.Month == Month))]
    group_ = filter_.groupby(['Month','Description'], as_index=False).agg(
        {'Quantity':'sum'}).sort_values(['Month','Quantity'], ascending=[True,False]).head(Products)
    ibarchart(group_, 'Description', 'Quantity', data.Description, title=f'UK Top {Products} product sold by quantity in {Month}')

interactive(children=(Dropdown(description='Month', options=('December', 'January', 'February', 'March', 'Apri…