# Interactive Time Series Visualization Assignment

In [1]:
import pandas as pd
import numpy as np

import plotly.express as px
from ipywidgets import interact

### Load the Online Retail.xlsx Excel file into a Pandas dataframe.

In [2]:
url = 'https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%202/Online%20Retail.xlsx'
df = pd.read_excel(url)

In [3]:
df.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom


In [4]:
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])

In [5]:
df['Date'] = df['InvoiceDate'].dt.date
df['Month'] = df['InvoiceDate'].dt.strftime('%B')
df['YearMonth'] = df['InvoiceDate'].dt.strftime('%Y-%B')

In [6]:
df['InvoiceDate'].min(), df['InvoiceDate'].max()

(Timestamp('2010-12-01 08:26:00'), Timestamp('2011-12-09 12:50:00'))

### Create an interactive histogram to explore the distributions of daily (by date) Quantity, UnitPrice, and Revenue. Include a dropdown widget where you can switch between the three fields.

**Hint:** You will need to create a Date field and then aggregate on it. The Quantity and Revenue fields should be summed and the UnitPrice field should be averaged (mean).

In [7]:
agg_field_sum = df.groupby('Date', as_index=False).agg({'Quantity':'sum', 'Revenue':'sum','UnitPrice':'mean'})
agg_field_sum

Unnamed: 0,Date,Quantity,Revenue,UnitPrice
0,2010-12-01,24207,46219.29,3.109995
1,2010-12-02,31140,47283.53,3.184610
2,2010-12-03,11822,23576.01,3.166670
3,2010-12-05,16370,31315.64,2.840382
4,2010-12-06,16267,31014.21,2.878689
...,...,...,...,...
300,2011-12-05,37609,55920.60,2.581249
301,2011-12-06,27798,45584.19,2.318553
302,2011-12-07,41080,68699.21,2.397698
303,2011-12-08,27536,50214.15,2.572785


In [8]:
@interact(field= ['Quantity', 'UnitPrice', 'Revenue'])

def daily_dist(field):
  agg_field = df.groupby('Date', as_index=False).agg({'Quantity':'sum', 'Revenue':'sum','UnitPrice':'mean'})

  fig = px.histogram(agg_field, x=field, title=f'Daily distribution of {field}', template='none')
  fig.update_traces(marker_color='lightskyblue', 
                  marker_line_color='black',
                  marker_line_width=1)
  fig.show()


interactive(children=(Dropdown(description='field', options=('Quantity', 'UnitPrice', 'Revenue'), value='Quant…

### Create an interactive line chart where you can explore Quantity and Revenue by day (date) for each country. Include two dropdown widgets - one to choose between the Quantity and Revenue fields and the other to choose the Country.

In [9]:
@interact(field=['Quantity', 'Revenue'], country=np.sort(df['Country'].unique()))

def daily_line_country(field, country):
    agg_country = df.loc[df['Country']==country].groupby(['Date'], as_index=False).agg({'Quantity':'sum', 'Revenue':'sum'})
    fig = px.line(agg_country, 'Date', field, title=f'Plot of daily {field} in {country}', template='none')
    fig.show()

interactive(children=(Dropdown(description='field', options=('Quantity', 'Revenue'), value='Quantity'), Dropdo…

### Create an interactive scatter plot showing the relationships between daily Quantity, UnitPrice, and Revenue for the United Kingdom. Include two dropdown boxes that let you choose between the 3 fields - one for the x axis and one for the y axis of your scatter plot. Size the data points according to Revenue.

In [10]:
@interact(x_field=['Quantity', 'UnitPrice', 'Revenue'], y_field=['Quantity', 'UnitPrice', 'Revenue'])

def UK_scatter_field(x_field, y_field):
  agg_field = df.loc[df['Country']=='United Kingdom'].groupby('Date', as_index=False).agg({'Quantity':'sum', 'Revenue':'sum','UnitPrice':'mean'})

  fig = px.scatter(agg_field, x=x_field, y=y_field, size='Revenue')
  fig.update_traces(marker_line_color='black',
                    marker_line_width=1,
                    )
  fig.show()

interactive(children=(Dropdown(description='x_field', options=('Quantity', 'UnitPrice', 'Revenue'), value='Qua…

### Create a bar chart showing the top X products sold (by quantity) in the United Kingdom in a specific month. Use a dropdown box to select the month name and a slider to show the top X products. The range for X should be from 5 to 25. Make sure the bars are sorted in descending order according to their total quantity sold for the month.

Bonus points if you can figure out how to ensure that the month names in the dropdown appear in their correct order (January, February, March, April, etc.).

In [11]:
@interact(top_products=(5,25), YearMonth=df['YearMonth'].unique())

def top_products_UK(top_products, YearMonth):
  agg_field = df.loc[(df['Country']=='United Kingdom')&(df['YearMonth']==YearMonth)].groupby(['YearMonth','StockCode'], as_index=False).agg({'Quantity':'sum'}).sort_values(['YearMonth','Quantity'], ascending=[True,False]).head(top_products)
  fig = px.bar(agg_field, x='StockCode', y='Quantity', color='StockCode', title=f'UK\'s Top {top_products} products sold by Quantity in {YearMonth}')
  fig.update_traces(marker_line_color='black',
                    marker_line_width=1)
  fig.update_xaxes(type='category')
  fig.show()

interactive(children=(IntSlider(value=15, description='top_products', max=25, min=5), Dropdown(description='Ye…

# Lecture Note

In [12]:
web_events_df = pd.read_csv('https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/web_events.csv')
web_events_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2756101 entries, 0 to 2756100
Data columns (total 5 columns):
 #   Column         Dtype  
---  ------         -----  
 0   timestamp      int64  
 1   visitorid      int64  
 2   event          object 
 3   itemid         int64  
 4   transactionid  float64
dtypes: float64(1), int64(3), object(1)
memory usage: 105.1+ MB


In [13]:
web_events_df['timestamp'] = pd.to_datetime(web_events_df['timestamp'], unit='ms')

In [14]:
web_events_df['Date'] = web_events_df['timestamp'].dt.date
web_events_df['Month'] = web_events_df['timestamp'].dt.strftime('%B')
web_events_df['Day'] = web_events_df['timestamp'].dt.day
web_events_df['Hour'] = web_events_df['timestamp'].dt.hour
web_events_df['Minute'] = web_events_df['timestamp'].dt.minute
web_events_df['Weekday'] = web_events_df['timestamp'].dt.strftime('%A')

In [15]:
web_events_df.head()

Unnamed: 0,timestamp,visitorid,event,itemid,transactionid,Date,Month,Day,Hour,Minute,Weekday
0,2015-06-02 05:02:12.117,257597,view,355908,,2015-06-02,June,2,5,2,Tuesday
1,2015-06-02 05:50:14.164,992329,view,248676,,2015-06-02,June,2,5,50,Tuesday
2,2015-06-02 05:13:19.827,111016,view,318965,,2015-06-02,June,2,5,13,Tuesday
3,2015-06-02 05:12:35.914,483717,view,253185,,2015-06-02,June,2,5,12,Tuesday
4,2015-06-02 05:02:17.106,951259,view,367447,,2015-06-02,June,2,5,2,Tuesday


In [16]:
df = web_events_df.groupby(['Day', 'Month'], as_index=False).agg({'event':'count'})
df

Unnamed: 0,Day,Month,event
0,1,August,15459
1,1,July,20764
2,1,June,22796
3,1,September,18178
4,2,August,15220
...,...,...,...
134,30,June,21448
135,30,May,16030
136,31,August,18627
137,31,July,18942


In [17]:
px.line(df, x='Day', y='event', color='Month', title='Number of Events by Day', template='none')

In [18]:
fig = px.histogram(df, x='event', title='Distribution', nbins=50, template='none')
fig.update_yaxes(title_text='Number of Records')
fig.update_traces(marker_color='lightskyblue', 
                  marker_line_color='black',
                  marker_line_width=1)

In [19]:
new_df = web_events_df.groupby('Weekday', as_index=False).agg({'event':'count'})
new_df

Unnamed: 0,Weekday,event
0,Friday,379699
1,Monday,439813
2,Saturday,305215
3,Sunday,334422
4,Thursday,418761
5,Tuesday,447077
6,Wednesday,431114


In [20]:
order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
fig = px.bar(new_df, 'Weekday', 'event', template='none')
fig.update_yaxes(title_text='Number of Records')
fig.update_traces(marker_color='lightskyblue', 
                  marker_line_color='black',
                  marker_line_width=1)
fig.update_layout(xaxis={'categoryorder':'array',
                         'categoryarray':order})

In [21]:
my_dropdown = ['apples', 'oranges', 'pizza']

@interact(Selection=my_dropdown)

def viz(Selection):
  print(f'You have selected: {Selection}')

interactive(children=(Dropdown(description='Selection', options=('apples', 'oranges', 'pizza'), value='apples'…

In [22]:
dates = list(web_events_df['Date'].unique())

@interact(firstdate=sorted(dates), enddate= sorted(dates, reverse=True))

def plot_dates(firstdate, enddate):
  filter_df = web_events_df[(web_events_df['timestamp'] > str(firstdate))&(web_events_df['timestamp'] < str(enddate))]
  group_df = filter_df.groupby('Date', as_index=False).count()
  fig = px.line(group_df, x='Date', y='event', title='Number of Events by Day', template='none')
  fig.show()

interactive(children=(Dropdown(description='firstdate', options=(datetime.date(2015, 5, 3), datetime.date(2015…

In [23]:
@interact(View=True, AddtoCart=False, Transaction=False)

def viz(View, AddtoCart, Transaction):
  events = []

  if View:
    events.append('view')
  if AddtoCart:
    events.append('addtocart')
  if Transaction:
    events.append('transaction')
  
  filtered = web_events_df.loc[web_events_df['event'].isin(events)]
  print(filtered.shape)


interactive(children=(Checkbox(value=True, description='View'), Checkbox(value=False, description='AddtoCart')…