In [1]:
import numpy as np
import pandas as pd
import plotly.express as px

In [2]:
events_products = pd.read_csv('data/combined_events_products.csv')

In [3]:
events_products['date'] = pd.to_datetime(events_products['event_time']).dt.date

In [4]:
events_products.head()

Unnamed: 0,user_id,visit_id,event_name,sequence_number,event_time,page_name,product_category,date
0,1,ccf365,Page View,1,2020-02-04 19:16:09.182546,Home Page,,2020-02-04
1,1,ccf365,Page View,2,2020-02-04 19:16:17.358191,All Products,,2020-02-04
2,1,ccf365,Page View,3,2020-02-04 19:16:58.454669,Russian Caviar,Luxury,2020-02-04
3,1,ccf365,Page View,4,2020-02-04 19:16:58.609142,Lobster,Shellfish,2020-02-04
4,1,ccf365,Add to Cart,5,2020-02-04 19:17:51.72942,Lobster,Shellfish,2020-02-04


In [5]:
event_timeline = events_products.groupby(['date', 'event_name']).count()['user_id'].unstack(1).reset_index()

event_timeline.head()

event_name,date,Ad Click,Ad Impression,Add to Cart,Page View,Purchase
0,2020-01-01,2.0,2.0,22.0,47.0,3.0
1,2020-01-02,6.0,8.0,61.0,146.0,14.0
2,2020-01-03,4.0,4.0,54.0,135.0,12.0
3,2020-01-04,2.0,3.0,42.0,96.0,10.0
4,2020-01-05,2.0,2.0,20.0,47.0,5.0


In [6]:
unique_visits = events_products.groupby('date').nunique()['visit_id']

In [7]:
event_timeline = event_timeline.join(unique_visits, on='date')

In [8]:
event_timeline.rename(columns={"visit_id":"Site Visit"}, inplace=True)

In [9]:
fig = px.line(event_timeline, x='date', y=event_timeline.columns[1:])

# Add shading to show when marketing campaigns occur
# Half Off - Treat Your Shelf(ish)
fig.add_vrect(x0='2020-02-01', x1='2020-03-31', line_width=0, fillcolor="red", opacity=0.2)

fig.show()

In [10]:
events_products.groupby('event_name').count()['user_id'].reset_index().rename(columns={'user_id':'Count'}).sort_values('Count', ascending=False)

Unnamed: 0,event_name,Count
3,Page View,20928
2,Add to Cart,8451
4,Purchase,1777
1,Ad Impression,876
0,Ad Click,702


In [11]:
num_unique_visits = sum(unique_visits)

num_purchases = len(events_products[events_products['event_name'] == 'Purchase'])

# Percentage of all visits with a purchase event
percentage_purchase = round(num_purchases / num_unique_visits * 100, 1)
print(percentage_purchase)

49.7


In [12]:
def viewed_checkout(columns):
    event_name = columns[0]
    page_name = columns[1]

    if event_name == 'Page View' and page_name == 'Checkout':
        return 1
    else:
        return 0

def made_purchase(column):
    event_name = column

    if event_name == 'Purchase':
        return 1
    else:
        return 0

In [13]:
events_products['viewed_checkout'] = events_products[['event_name', 'page_name']].apply(viewed_checkout, axis=1)

In [14]:
events_products['made_purchase'] = events_products['event_name'].apply(made_purchase)

In [15]:
view_checkout_no_purchase = events_products.groupby('visit_id').sum()[['viewed_checkout', 'made_purchase']]

In [16]:
num_view_checkout_no_purchase = len(view_checkout_no_purchase[(view_checkout_no_purchase['viewed_checkout'] == 1) & (view_checkout_no_purchase['made_purchase'] == 0)])

# Percentage of visits that have a checkout view, but no purchase

percentage_no_purchase = round(num_view_checkout_no_purchase / num_unique_visits * 100, 1)
print(percentage_no_purchase)

9.1


In [96]:
top_pages = events_products[events_products['event_name'] == 'Page View'].groupby('page_name').count().reset_index()[['page_name', 'user_id']].rename(columns={'user_id':'Views'})

top_pages = top_pages.sort_values('Views', ascending=False)#.iloc[:3]
top_pages

Unnamed: 0,page_name,Views
1,All Products,3174
3,Checkout,2103
5,Home Page,1782
8,Oyster,1568
4,Crab,1564
9,Russian Caviar,1563
6,Kingfish,1559
10,Salmon,1559
7,Lobster,1547
0,Abalone,1525


In [95]:
purchase_visit_id = events_products[events_products['event_name'] == 'Purchase']['visit_id']

purchases = events_products[events_products['visit_id'].isin(purchase_visit_id)]
top_purchases = purchases.groupby('page_name').count().reset_index()[['page_name', 'user_id']].rename(columns={'user_id':'Purchases'}).sort_values('Purchases', ascending=False)
top_purchases = top_purchases[~top_purchases['page_name'].isin(['All Products', 'Checkout', 'Confirmation', 'Home Page'])]#.iloc[:3]
top_purchases

Unnamed: 0,page_name,Purchases
8,Lobster,1867
9,Oyster,1862
5,Crab,1827
7,Kingfish,1813
11,Salmon,1805
10,Russian Caviar,1785
0,Abalone,1784
12,Tuna,1755
2,Black Truffle,1752


In [94]:
product_category_views_cart_adds = events_products[events_products['event_name'].isin(['Page View', 'Add to Cart'])].groupby(['product_category', 'event_name']).count().unstack()['user_id'].reset_index()

product_category_views_cart_adds

event_name,product_category,Add to Cart,Page View
0,Fish,2789,4633
1,Luxury,1870,3032
2,Shellfish,3792,6204
