# Basic analysis

In [1]:
# import Bach
from bach_open_taxonomy import ObjectivFrame 

In [2]:
# connect to SQL db
of = ObjectivFrame.from_objectiv_data(db_url='postgresql://postgres:@localhost:5432/objectiv',
                                      table_name='data',
                                      time_aggregation='YYYY-MM-DD',
                                      start_date='2022-02-02')

In [3]:
# adding specific contexts to the data
of['application'] = of.global_contexts.gc.get_from_context_with_type_series(type='ApplicationContext', key='id')
of['root_location'] = of.location_stack.ls.get_from_context_with_type_series(type='RootLocationContext', key='id')
of['referrer'] = of.global_contexts.gc.get_from_context_with_type_series(type='HttpContext', key='referrer')
of['feature_nice_name'] = of.location_stack.ls.nice_name

In [4]:
# explore the data
of.sort_values('session_id', ascending=False).head()

Unnamed: 0_level_0,day,moment,user_id,global_contexts,location_stack,event_type,stack_event_types,session_id,session_hit_number,application,root_location,referrer,feature_nice_name
event_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
a0e92481-153e-4673-8991-50028e5b7988,2022-02-14,2022-02-14 09:13:09.907,8299fb69-22da-48a7-9ae4-4de8ddb47068,"[{'id': 'http_context', '_type': 'HttpContext'...","[{'id': 'home', '_type': 'RootLocationContext'...",PressEvent,"[AbstractEvent, InteractiveEvent, PressEvent]",806,1,objectiv-website,home,,Link: objectiv-on-github located at Root Locat...
2926d1f0-05f3-4d92-8f84-95338678e041,2022-02-14,2022-02-14 09:13:07.846,7c406acb-3d85-416f-a515-1caf6bc2a8a1,"[{'id': 'http_context', '_type': 'HttpContext'...","[{'id': 'blog', '_type': 'RootLocationContext'...",PressEvent,"[AbstractEvent, InteractiveEvent, PressEvent]",805,4,objectiv-website,blog,,Link: meet-objectiv-open-source-product-analyt...
cbbbbca4-e076-4a3f-b698-cd9e646c6947,2022-02-14,2022-02-14 09:13:05.155,7c406acb-3d85-416f-a515-1caf6bc2a8a1,"[{'id': 'http_context', '_type': 'HttpContext'...","[{'id': 'home', '_type': 'RootLocationContext'...",ApplicationLoadedEvent,"[AbstractEvent, ApplicationLoadedEvent, NonInt...",805,1,objectiv-website,home,,Root Location: home
ac1075d2-a1a2-4ea6-bd00-22675ffc26e7,2022-02-14,2022-02-14 09:13:06.084,7c406acb-3d85-416f-a515-1caf6bc2a8a1,"[{'id': 'http_context', '_type': 'HttpContext'...","[{'id': 'home', '_type': 'RootLocationContext'...",MediaLoadEvent,"[AbstractEvent, MediaEvent, MediaLoadEvent, No...",805,2,objectiv-website,home,,Media Player: 2-minute-video located at Root L...
248a4052-aa5c-484e-bf66-01e2e7be0f4f,2022-02-14,2022-02-14 09:13:06.741,7c406acb-3d85-416f-a515-1caf6bc2a8a1,"[{'id': 'http_context', '_type': 'HttpContext'...","[{'id': 'home', '_type': 'RootLocationContext'...",PressEvent,"[AbstractEvent, InteractiveEvent, PressEvent]",805,3,objectiv-website,home,,Link: blog located at Root Location: home => N...


In [5]:
# explore the data with describe
of.describe(include='all').head()

Unnamed: 0_level_0,day,moment,event_type,session_id,session_hit_number,application,root_location,referrer,feature_nice_name
__stat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
count,6979,6979,6979,6979.0,6979.0,6979,6979,353,6979
mean,,,,298.74,21.07,,,,
std,,,,266.55,38.34,,,,
min,2022-02-02,2022-02-02 07:42:11.957,ApplicationLoadedEvent,1.0,1.0,objectiv-docs,about,,Expandable: Aggregation & windowing located at...
max,2022-02-14,2022-02-14 10:01:08.329,VisibleEvent,806.0,308.0,objectiv-website,tracking,https://www.google.com/,Root Location: tracking


## Users

In [6]:
# model hub: unique users, total month
users = of.model_hub.aggregate.unique_users(time_aggregation='YYYY-MM')
users.sort_index(ascending=False).head()

moment
2022-02    485
Name: unique_users, dtype: int64

In [7]:
# model hub: unique users, daily
users = of.model_hub.aggregate.unique_users()
users.sort_index(ascending=False).head(15)

moment
2022-02-14      9
2022-02-13      3
2022-02-12      8
2022-02-11     14
2022-02-10     13
2022-02-09     23
2022-02-08     33
2022-02-07     50
2022-02-06     20
2022-02-05     33
2022-02-04     53
2022-02-03    100
2022-02-02    239
Name: unique_users, dtype: int64

In [8]:
# users by RootLocation
users_root = of.groupby(['application', 'root_location']).agg({'user_id':'nunique'})
users_root.sort_values('user_id_nunique', ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,user_id_nunique
application,root_location,Unnamed: 2_level_1
objectiv-website,blog,263
objectiv-website,home,241
objectiv-docs,docs,160
objectiv-website,about,77
objectiv-website,jobs,66
objectiv-docs,home,7
objectiv-docs,tracking,4
objectiv-docs,taxonomy,4
objectiv-website,privacy,2
objectiv-docs,modeling,1


## Time spent

In [9]:
# model hub: duration, total month
duration = of.model_hub.aggregate.session_duration(time_aggregation='YYYY-MM')
duration.sort_index(ascending=False).head()

moment
2022-02   0 days 00:04:09.943920
Name: session_duration, dtype: timedelta64[ns]

In [10]:
# model hub: duration, daily
duration = of.model_hub.aggregate.session_duration(time_aggregation='YYYY-MM-DD')
duration.sort_index(ascending=False).head()

moment
2022-02-14   0 days 00:07:16.427273
2022-02-13   0 days 00:00:24.635000
2022-02-12   0 days 00:02:30.655000
2022-02-11   0 days 00:03:09.692500
2022-02-10   0 days 00:04:31.578263
Name: session_duration, dtype: timedelta64[ns]

In [11]:
# duration for RootLocation home 
of[(of.root_location == 'home')].model_hub.aggregate.session_duration(time_aggregation='YYYY-MM').head()

moment
2022-02   0 days 00:05:06.371854
Name: session_duration, dtype: timedelta64[ns]

In [12]:
# duration for RootLocation blog
of[(of.root_location == 'blog')].model_hub.aggregate.session_duration(time_aggregation='YYYY-MM').head()

moment
2022-02   0 days 00:02:26.030345
Name: session_duration, dtype: timedelta64[ns]

In [13]:
# duration for the whole docs
of[(of.application == 'objectiv-docs')].model_hub.aggregate.session_duration(time_aggregation='YYYY-MM').head()

moment
2022-02   0 days 00:05:16.982017
Name: session_duration, dtype: timedelta64[ns]

## Top user interactions

In [14]:
# select PressEvent and MediaStart event, to focus on user actions
users_feature = of[(of.event_type == 'PressEvent') |
                   (of.event_type == 'MediaStartEvent')]

# users by feature
users_feature = users_feature.groupby(['application', 'feature_nice_name', 'event_type']).agg({'user_id':'nunique'})
users_feature.sort_values('user_id_nunique', ascending=False).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,user_id_nunique
application,feature_nice_name,event_type,Unnamed: 3_level_1
objectiv-website,Media Player: objectiv-in-2-minutes located at Root Location: blog => Content: post-meet-objectiv-open-source-product-analytics-designed-for-data-sc,MediaStartEvent,96
objectiv-website,Media Player: 2-minute-video located at Root Location: home,MediaStartEvent,69
objectiv-website,Link: about-us located at Root Location: home => Navigation: navbar-top,PressEvent,48
objectiv-website,Pressable: hamburger located at Root Location: home => Navigation: navbar-top,PressEvent,36
objectiv-website,Link: logo located at Root Location: blog => Navigation: navbar-top,PressEvent,35


## What did users do most on home?

In [15]:
# select homepage RootLocation only
home_users = of[(of.root_location == 'home')]

# select website only and exclude the docs
home_users = home_users[(home_users.application == 'objectiv-website')]

# select PressEvent and MediaStart event, to focus on user actions
home_users = home_users[(home_users.event_type == 'PressEvent') |
                   (home_users.event_type == 'MediaStartEvent')]

# home users by feature
home_users = home_users.groupby(['feature_nice_name', 'event_type']).agg({'user_id':'nunique'})
home_users.sort_values('user_id_nunique', ascending=False).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,user_id_nunique
feature_nice_name,event_type,Unnamed: 2_level_1
Media Player: 2-minute-video located at Root Location: home,MediaStartEvent,69
Link: about-us located at Root Location: home => Navigation: navbar-top,PressEvent,48
Pressable: hamburger located at Root Location: home => Navigation: navbar-top,PressEvent,36
Link: docs located at Root Location: home => Navigation: navbar-top,PressEvent,19
Link: blog located at Root Location: home => Navigation: navbar-top,PressEvent,18


## What did users do most in docs?

In [16]:
# select docs RootLocation only
docs_users = of[(of.application == 'objectiv-docs')]

# select PressEvent and MediaStart event, to focus on user actions
docs_users = docs_users[(docs_users.event_type == 'PressEvent') |
                   (docs_users.event_type == 'MediaStartEvent')]

# docs users by feature
docs_users = docs_users.groupby(['feature_nice_name', 'event_type']).agg({'user_id':'nunique'})
docs_users.sort_values('user_id_nunique', ascending=False).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,user_id_nunique
feature_nice_name,event_type,Unnamed: 2_level_1
Link: Tracking located at Root Location: docs => Navigation: navbar-top,PressEvent,20
Link: logo located at Root Location: docs => Navigation: navbar-top,PressEvent,18
Link: Modeling located at Root Location: docs => Navigation: navbar-top,PressEvent,18
Link: Taxonomy located at Root Location: docs => Navigation: navbar-top,PressEvent,13
Link: Introduction located at Root Location: docs => Navigation: docs-sidebar,PressEvent,9


## Where did users come from?

In [17]:
# users by referrer
referrer_users = of.groupby(['referrer']).agg({'user_id':'nunique'})
referrer_users.sort_values('user_id_nunique', ascending=False).head()

Unnamed: 0_level_0,user_id_nunique
referrer,Unnamed: 1_level_1
,467
,24
android-app://com.slack/,1
https://www.google.com,1
https://www.google.com/,1


## Set conversion

In [18]:
# define which events to use as conversion events
of.add_conversion_event(location_stack=of.location_stack.json[{'id': 'objectiv-on-github', 
                                                        '_type': 'LinkContext'}:].fillna(
                                       of.location_stack.json[{'id': 'github', '_type': 'LinkContext'}:]),
                        event_type='PressEvent',
                        name='github_press')

## Number of daily conversions

In [19]:
of.model_hub.filter(of.model_hub.map.is_conversion_event('github_press'))\
    .model_hub.aggregate.unique_users()\
    .sort_index(ascending=False).head(10)

moment
2022-02-14     1
2022-02-13     1
2022-02-12     1
2022-02-10     2
2022-02-09     3
2022-02-07     3
2022-02-04     5
2022-02-03     7
2022-02-02    16
Name: unique_users, dtype: int64

## From where do users convert most?

In [20]:
# filter conversion events
conversion_locations = of.model_hub.filter(of.model_hub.map.is_conversion_event('github_press'))\
    .groupby(['application', 'feature_nice_name', 'event_type'])\
    .agg({'user_id':'nunique'})

conversion_locations.sort_values('user_id_nunique', ascending=False).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,user_id_nunique
application,feature_nice_name,event_type,Unnamed: 3_level_1
objectiv-website,Link: objectiv-on-github located at Root Location: home => Content: the-stack,PressEvent,16
objectiv-website,Link: objectiv-on-github located at Root Location: blog => Content: post-meet-objectiv-open-source-product-analytics-designed-for-data-sc => Content: blog-post-try-objectiv,PressEvent,11
objectiv-website,Link: github located at Root Location: home => Navigation: navbar-top,PressEvent,8
objectiv-website,Link: github located at Root Location: blog => Navigation: navbar-top => Overlay: hamburger-menu,PressEvent,2
objectiv-website,Link: github located at Root Location: home => Navigation: navbar-top => Overlay: hamburger-menu,PressEvent,2


## Time spent for converting users?

In [21]:
# use model hub to filter converting users and calculate duration
of.model_hub.filter(of.model_hub.map.conversions_counter(name='github_press', partition='user_id')>=1)\
  .model_hub.agg.session_duration()\
  .sort_index(ascending=False).head()

moment
2022-02-14   0 days 00:14:37.563250
2022-02-13   0 days 00:00:06.779000
2022-02-12   0 days 00:03:10.018000
2022-02-10   0 days 00:00:50.597000
2022-02-09   0 days 00:05:40.884571
Name: session_duration, dtype: timedelta64[ns]

## Top user interactions before conversions

In [22]:
# select sessions with a conversion
converted_users = of.model_hub.filter(of.model_hub.map.conversions_counter(name='github_press')>=1)

# from those, select hits where number of conversions was still 0
converted_users = converted_users.mh.filter(converted_users.model_hub.map.conversions_in_time('github_press')==0)

# select PressEvent and MediaStart event, to focus on user actions
converted_users = converted_users[(converted_users.event_type == 'PressEvent') |
                   (converted_users.event_type == 'MediaStartEvent')]

converted_users.groupby(['application', 'feature_nice_name', 'event_type']).agg({'user_id':'nunique'})\
    .sort_values('user_id_nunique', ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,user_id_nunique
application,feature_nice_name,event_type,Unnamed: 3_level_1
objectiv-website,Media Player: objectiv-in-2-minutes located at Root Location: blog => Content: post-meet-objectiv-open-source-product-analytics-designed-for-data-sc,MediaStartEvent,5
objectiv-website,Link: logo located at Root Location: blog => Navigation: navbar-top,PressEvent,4
objectiv-website,Media Player: 2-minute-video located at Root Location: home,MediaStartEvent,4
objectiv-website,Pressable: hamburger located at Root Location: blog => Navigation: navbar-top,PressEvent,3
objectiv-website,Link: blog located at Root Location: home => Navigation: navbar-top,PressEvent,3
objectiv-website,Link: faq located at Root Location: home => Navigation: navbar-top,PressEvent,2
objectiv-website,Link: meet-objectiv-open-source-product-analytics-designed-for-data-sc located at Root Location: blog => Content: post-meet-objectiv-open-source-product-analytics-designed-for-data-sc => Navigation: header,PressEvent,2
objectiv-docs,Link: Modeling located at Root Location: docs => Navigation: navbar-top,PressEvent,2
objectiv-docs,Link: HttpContext located at Root Location: docs => Navigation: docs-sidebar => Expandable: Reference => Expandable: Global Contexts,PressEvent,2
objectiv-docs,Link: Tracking located at Root Location: docs => Navigation: navbar-top,PressEvent,2


## Time spent before conversions

In [23]:
# use model hub to calculate duration, converted users contains events before 1st conversion
converted_users.model_hub.aggregate.session_duration(time_aggregation='YYYY-MM').head()

moment
2022-02   0 days 00:05:13.075933
Name: session_duration, dtype: timedelta64[ns]