In [1]:
!pip install plotly
!pip install ipywidgets
import pandas as pd
import plotly.express as px
from ipywidgets import interact



In [2]:
o_df = pd.read_excel('https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%202/Online%20Retail.xlsx')

In [4]:
o_df['Date'] = o_df['InvoiceDate'].dt.date
o_df['Month'] = o_df['InvoiceDate'].dt.strftime('%B')
o_df['Day'] = o_df['InvoiceDate'].dt.day
o_df['Weekday'] = o_df['InvoiceDate'].dt.weekday
o_df['Hour'] = o_df['InvoiceDate'].dt.hour
o_df['Minute'] = o_df['InvoiceDate'].dt.minute

In [5]:
o_df['Date'] = o_df['InvoiceDate'].dt.date

hist_price = o_df.groupby('Date').agg({'Quantity':'sum', 
                                         'Revenue':'sum', 
                                         'UnitPrice':'mean'}).reset_index()
hist_price

Unnamed: 0,Date,Quantity,Revenue,UnitPrice
0,2010-12-01,24207,46219.29,3.109995
1,2010-12-02,31140,47283.53,3.184610
2,2010-12-03,11822,23576.01,3.166670
3,2010-12-05,16370,31315.64,2.840382
4,2010-12-06,16267,31014.21,2.878689
...,...,...,...,...
300,2011-12-05,37609,55920.60,2.581249
301,2011-12-06,27798,45584.19,2.318553
302,2011-12-07,41080,68699.21,2.397698
303,2011-12-08,27536,50214.15,2.572785


In [7]:
#interactive histogram def
def ihistogram(df, field, bins=None, title=''):
    fig = px.histogram(df, x=field, title=title, nbins=bins,
                       template='none').update(layout=dict(title=dict(x=0.5)))

    fig.update_yaxes(title_text='Number of Records')
    fig.update_traces(marker_color='lightskyblue',
                      marker_line_color='black', 
                      marker_line_width=1)

    fig.show()

In [8]:
selection = ['Quantity', 'Revenue', 'UnitPrice']

@interact(Metric=selection)

def plot_hist(Metric):
    ihistogram(hist_price, Metric, bins=50, title='Daily Retail ' + Metric + ' Distribution')

interactive(children=(Dropdown(description='Metric', options=('Quantity', 'Revenue', 'UnitPrice'), value='Quanâ€¦

In [10]:
#Create an interactive line chart where you can explore Quantity and Revenue by day (date) for each country. Include two dropdown widgets - one to choose between the Quantity and Revenue fields and the other to choose the Country

def ilinechart(df, x, y, groups=None, title=''):
    fig = px.line(df, x=x, y=y, color=groups, title=title, 
                  template='none')
    fig.show()

In [13]:
metrics = ['Quantity', 'Revenue']
countries = o_df['Country'].unique()

@interact(Metric=metrics, Country=countries)

def line_plot(Metric, Country):
    filtered = o_df.loc[lambda x: x.Country == Country]
    grouped = filtered.groupby(['Date'], as_index=False).sum()
    ilinechart(grouped, 'Date', Metric, title='Daily Retail ' + Metric + ' by ' + Country)

interactive(children=(Dropdown(description='Metric', options=('Quantity', 'Revenue'), value='Quantity'), Dropdâ€¦

In [15]:
#Create an interactive scatter plot showing the relationships between daily Quantity, UnitPrice, and Revenue for the United Kingdom. Include two dropdown boxes that let you choose between the 3 fields - one for the x axis and one for the y axis of your scatter plot. Size the data points according to Revenue.

def iscatter(df, x, y, color=None, size=None, title=''):
    fig = px.scatter(df, x=x, y=y, color=color, size=size, 
                     title=title, template='none')

    fig.update_traces(marker_line_color='black', 
                  marker_line_width=1)

    fig.show()

In [16]:
x_axis = ['Quantity', 'Revenue', 'UnitPrice']
y_axis = ['Quantity', 'Revenue', 'UnitPrice']

@interact(X=x_axis, Y=y_axis)

def scat_plot(X, Y):
    filtered = o_df.loc[lambda x: x.Country == 'United Kingdom']
    grouped = filtered.groupby(['Date'], as_index=False).sum()
    iscatter(grouped, X, Y, size='Revenue', title='UK Daily Retail ' + X + ' by ' + Y)

interactive(children=(Dropdown(description='X', options=('Quantity', 'Revenue', 'UnitPrice'), value='Quantity'â€¦

In [17]:
#Create a bar chart showing the top X products sold (by quantity) in the United Kingdom in a specific month. Use a dropdown box to select the month name and a slider to show the top X products. The range for X should be from 5 to 25. Make sure the bars are sorted in descending order according to their total quantity sold for the month

def ibarchart(df, x, y, color=None, order=None, title=''):
    fig = px.bar(df, x=x, y=y, color=color, title=title, template='none')

    fig.update_traces(marker_line_color='black', 
                      marker_line_width=1)
    
    fig.update_layout(xaxis={'categoryorder':'array', 
                             'categoryarray': order})
    
    fig.show()

In [18]:
o_df['Month'] = o_df['InvoiceDate'].dt.strftime('%B')
o_df['Month_num'] = o_df['InvoiceDate'].dt.month

In [20]:
months = o_df['Month'].unique()

@interact(Month=months, Top_Products=(5,25))

def plot_bar(Month, Top_Products):
    filtered = o_df.loc[(lambda x: (x.Country == 'United Kingdom') &  (x.Month == Month))]
    grouped = filtered.groupby(['Month','Description'], as_index=False).agg(
        {'Quantity':'sum'}).sort_values(['Month','Quantity'], ascending=[True,False]).head(Top_Products)
    ibarchart(grouped, 'Description', 'Quantity', 'Description',
              title=f'UK Top {Top_Products} product sold by quantity in {Month}')

interactive(children=(Dropdown(description='Month', options=('December', 'January', 'February', 'March', 'Apriâ€¦