In [1]:
import pandas as pd

filepath='data_set_da_test.csv'

df=pd.read_csv(filepath)

df.head()

Unnamed: 0,event_date,session,user,page_type,event_type,product
0,2022-10-08 17:02:41,14274187577460658115s,2006979063809820329u,search_listing_page,page_view,0
1,2022-10-08 17:06:19,14274187577460658115s,2006979063809820329u,search_listing_page,page_view,0
2,2022-10-08 22:19:47,2704204808571844605s,2007646148110679693u,listing_page,page_view,0
3,2022-10-08 22:24:30,8970170322512311099s,11839491588321754710u,search_listing_page,page_view,0
4,2022-10-08 21:22:20,16223970371660715740s,11839887495958431209u,product_page,page_view,0


In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 637238 entries, 0 to 637237
Data columns (total 6 columns):
 #   Column      Non-Null Count   Dtype 
---  ------      --------------   ----- 
 0   event_date  637238 non-null  object
 1   session     637238 non-null  object
 2   user        637238 non-null  object
 3   page_type   637238 non-null  object
 4   event_type  637238 non-null  object
 5   product     637238 non-null  int64 
dtypes: int64(1), object(5)
memory usage: 29.2+ MB


## Event and pagetype value counts

In [3]:
event_type_counts = df.groupby('event_type').size().reset_index(name='count')
print(event_type_counts)



    event_type   count
0  add_to_cart   15999
1        order    8741
2    page_view  612498


In [4]:
page_type_counts = df.groupby('page_type').size().reset_index(name='count')
print(page_type_counts)

             page_type   count
0         listing_page  231789
1           order_page    8741
2         product_page  282950
3  search_listing_page  113758


# event and pagetype value counts by session and user id

## event

In [5]:
event_type_user_counts = df.groupby('event_type')['user'].nunique().reset_index(name='unique_user_count')
print(event_type_user_counts)

    event_type  unique_user_count
0  add_to_cart              10060
1        order               7338
2    page_view             287453


In [6]:
event_type_session_counts = df.groupby('event_type')['session'].nunique().reset_index(name='unique_session_count')
print(event_type_session_counts)

    event_type  unique_session_count
0  add_to_cart                 10667
1        order                  7637
2    page_view                339345


## page

In [7]:
page_type_user_counts = df.groupby('page_type')['user'].nunique().reset_index(name='unique_user_count')
print(page_type_user_counts)

             page_type  unique_user_count
0         listing_page             153009
1           order_page               7338
2         product_page             142515
3  search_listing_page              27711


In [8]:
page_type_session_counts = df.groupby('page_type')['session'].nunique().reset_index(name='unique_session_count')
print(page_type_session_counts)

             page_type  unique_session_count
0         listing_page                180930
1           order_page                  7637
2         product_page                162856
3  search_listing_page                 32498


# Funnel definition
Looking at available data schema the Purchase Funnel can be defined by count the number of sessions or users per major step (action or page)

1. Total Traffic
2. Interest demonstration (page visit on listing page, product page or search listing page) - an user can add an item through any of these three page types
3. Add to Cart
4. Purchase


Additionally other important data

- Successfull Search and discovery Rate (search or listing page results in product page view or add to cart)
- breakdown of funnel metrics by product id

## Total Metrics

In [9]:
total_sessions=df['session'].nunique()
print(f'sessions: {total_sessions}')


interested_session_df=df[df['page_type'] != 'order_page']
interested_sessions=interested_session_df['session'].nunique()

print(f'interested sessions: {interested_sessions}')



add_to_cart_df=df[df['event_type'] == 'add_to_cart']
add_to_cart_sessions=add_to_cart_df['session'].nunique()

print(f'add to cart sessions: {add_to_cart_sessions}')


purchase_df=df[df['event_type'] == 'order']
purchase_sessions=purchase_df['session'].nunique()

print(f'purchase sessions: {purchase_sessions}')

sessions: 340443
interested sessions: 339411
add to cart sessions: 10667
purchase sessions: 7637


## dataframe event and pagetype filters

In [10]:

search_listing_df=df[df['page_type'] == 'search_listing_page']
search_listing_sessions=search_listing_df['session'].nunique()
print(f'search_listing_sessions: {search_listing_sessions}')

search_listing_atc_df=df[(df['page_type'] == 'search_listing_page') & (df['event_type']=='add_to_cart')]


listing_df=df[df['page_type'] == 'listing_page']
listing_sessions=listing_df['session'].nunique()
print(f'listing_df: {listing_sessions}')


listing_atc_df=df[(df['page_type'] == 'listing_page') & (df['event_type']=='add_to_cart')]
listing_atc_sessions=listing_atc_df['session'].nunique()
print(f'listing_atc_sessions: {listing_atc_sessions}')


product_page_df=df[df['page_type'] == 'product_page']
product_page_sessions=product_page_df['session'].nunique()
print(f'product_page_sessions: {product_page_sessions}')


product_page_atc_df=df[(df['page_type'] == 'product_page') & (df['event_type']=='add_to_cart')]
product_page_atc_sessions=product_page_atc_df['session'].nunique()
print(f'product_page_atc_sessions: {product_page_atc_sessions}')


search_listing_sessions: 32498
search_listing_atc_sessions: 1992
listing_df: 180930
listing_atc_sessions: 386
product_page_sessions: 162856
product_page_atc_sessions: 8879


In [27]:
def funnel_merge_df_type(df, page_type):
    total_sessions = df['session'].nunique()

    if page_type == 'search_listing_page':
        df_funnel = df.merge(search_listing_df, on='session', how='left', suffixes=('', '_search'))
        df_funnel['purchase'] = df_funnel['session'].isin(search_listing_atc_df['session'])
        atc_type_sessions = search_listing_atc_df['session'].nunique()
        purchase_sessions = search_listing_atc_df.merge(purchase_df, on='session', how='inner')['session'].nunique()


    elif page_type == 'listing_page':
        df_funnel = df.merge(listing_df, on='session', how='left', suffixes=('', '_listing'))
        df_funnel['purchase'] = df_funnel['session'].isin(listing_atc_df['session'])
        atc_type_sessions = listing_atc_df['session'].nunique()
        purchase_sessions = listing_atc_df.merge(purchase_df, on='session', how='inner')['session'].nunique()


    elif page_type == 'product_page':
        df_funnel = df.merge(product_page_df, on='session', how='left', suffixes=('', '_product'))
        df_funnel['purchase'] = df_funnel['session'].isin(product_page_atc_df['session'])
        atc_type_sessions = product_page_atc_df['session'].nunique()
        purchase_sessions = product_page_atc_df.merge(purchase_df, on='session', how='inner')['session'].nunique()


    else:
        raise ValueError(f"Unknown page type: {page_type}")


    return pd.Series({
        'sessions': total_sessions,
        'interested_sessions': interested_sessions,
        'add_to_cart_sessions': atc_type_sessions,
        'purchase_sessions': purchase_sessions
    })

# Example usage:
for page_type in ['search_listing_page', 'listing_page', 'product_page']:
    metrics = funnel_merge_df_type(df, page_type)
    print(f"{page_type} metrics:\n{metrics}\n")


search_listing_page metrics:
sessions                340443
interested_sessions     339411
add_to_cart_sessions      1992
purchase_sessions          844
dtype: int64

listing_page metrics:
sessions                340443
interested_sessions     339411
add_to_cart_sessions       386
purchase_sessions           95
dtype: int64

product_page metrics:
sessions                340443
interested_sessions     339411
add_to_cart_sessions      8879
purchase_sessions         3603
dtype: int64



In [28]:
def funnel_merge_df_type(df, page_type):

    total_sessions = df['session'].nunique()

    if page_type == 'search_listing_page':
        df_funnel = df.merge(search_listing_df, on='session', how='left')
        interested_type_sessions = search_listing_df['session'].nunique()
        atc_type_sessions = search_listing_atc_df['session'].nunique()
        purchase_sessions = search_listing_atc_df.merge(purchase_df, on='session', how='inner')['session'].nunique()

    elif page_type == 'listing_page':
        df_funnel = df.merge(listing_df, on='session', how='left')
        interested_type_sessions = listing_df['session'].nunique()
        atc_type_sessions = listing_atc_df['session'].nunique()
        purchase_sessions = listing_atc_df.merge(purchase_df, on='session', how='inner')['session'].nunique()

    elif page_type == 'product_page':
        df_funnel = df.merge(product_page_df, on='session', how='left')
        interested_type_sessions = product_page_df['session'].nunique()
        atc_type_sessions = product_page_atc_df['session'].nunique()
        purchase_sessions = product_page_atc_df.merge(purchase_df, on='session', how='inner')['session'].nunique()

    else:
        raise ValueError(f"Unknown page type: {page_type}")

    return pd.Series({
        'sessions': total_sessions,
        'interested_sessions': interested_type_sessions,
        'add_to_cart_sessions': atc_type_sessions,
        'purchase_sessions': purchase_sessions
    })

# Example usage:
for page_type in ['search_listing_page', 'listing_page', 'product_page']:
    metrics = funnel_merge_df_type(df, page_type)
    print(f"{page_type} metrics:\n{metrics}\n")


search_listing_page metrics:
sessions                340443
interested_sessions      32498
add_to_cart_sessions      1992
purchase_sessions          844
dtype: int64

listing_page metrics:
sessions                340443
interested_sessions     180930
add_to_cart_sessions       386
purchase_sessions           95
dtype: int64

product_page metrics:
sessions                340443
interested_sessions     162856
add_to_cart_sessions      8879
purchase_sessions         3603
dtype: int64



## Conversion Rate Metrics

In [11]:
session_cr=round(purchase_sessions/total_sessions*100,2)
print(f'session conversion rate: {session_cr} %')

add_to_cart_rate=round(add_to_cart_sessions/total_sessions*100,2)
print(f'add to cart rate: {add_to_cart_rate} %')

cart_abandonment_rate=round((add_to_cart_sessions-purchase_sessions)/add_to_cart_sessions*100,2)
print(f'Cart Abandonment rate: {cart_abandonment_rate} %')


session conversion rate: 2.24 %
add to cart rate: 3.13 %
Cart Abandonment rate: 28.41 %


# Funnel Visualization with Plotly

In [12]:
import plotly.graph_objects as go

stages = ['Total Sessions', 'Interested Sessions', 'Add to Cart Sessions', 'Purchase Sessions']
values = [total_sessions, interested_sessions, add_to_cart_sessions, purchase_sessions]

fig = go.Figure(go.Funnel(
    y = stages,
    x = values,
    textinfo = "value+percent initial"
))

fig.update_layout(title="Funnel Visualization")

fig.show()