# Basic analysis

In [1]:
# import Bach
from bach_open_taxonomy import ObjectivFrame

In [2]:
# connect to SQL db
of = ObjectivFrame.from_objectiv_data(db_url='postgresql://postgres:@localhost:5432/objectiv',
                                      table_name='data',
                                      time_aggregation='YYYY-MM-DD',
                                      start_date='2022-02-02')

In [3]:
# adding specific contexts to the data
of['application'] = of.global_contexts.gc.get_from_context_with_type_series(type='ApplicationContext', key='id')
of['root_location'] = of.location_stack.ls.get_from_context_with_type_series(type='RootLocationContext', key='id')
of['feature_nice_name'] = of.location_stack.ls.nice_name

In [4]:
# explore the data
of.sort_values('session_id', ascending=False).head()

Unnamed: 0_level_0,day,moment,user_id,global_contexts,location_stack,event_type,stack_event_types,session_id,session_hit_number,application,root_location,feature_nice_name
event_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
89fc7524-d960-4dee-94bb-778ab2e82b7a,2022-02-09,2022-02-09 08:42:05.775,7c406acb-3d85-416f-a515-1caf6bc2a8a1,"[{'id': 'objectiv-website', '_type': 'Applicat...","[{'id': 'home', '_type': 'RootLocationContext'...",PressEvent,"[AbstractEvent, InteractiveEvent, PressEvent]",709,3,objectiv-website,home,Link: blog located at Root Location: home => N...
ca6c4155-db00-4ffa-92fa-bb9e7858bd87,2022-02-09,2022-02-09 08:42:06.789,7c406acb-3d85-416f-a515-1caf6bc2a8a1,"[{'id': 'objectiv-website', '_type': 'Applicat...","[{'id': 'blog', '_type': 'RootLocationContext'...",PressEvent,"[AbstractEvent, InteractiveEvent, PressEvent]",709,4,objectiv-website,blog,Link: meet-objectiv-open-source-product-analyt...
c6777b55-c184-490c-bc1f-01ef3f0cafee,2022-02-09,2022-02-09 08:41:55.744,7c406acb-3d85-416f-a515-1caf6bc2a8a1,"[{'id': 'objectiv-website', '_type': 'Applicat...","[{'id': 'home', '_type': 'RootLocationContext'...",ApplicationLoadedEvent,"[AbstractEvent, ApplicationLoadedEvent, NonInt...",709,1,objectiv-website,home,Root Location: home
734bdd87-da54-426a-959c-bc3147e5f691,2022-02-09,2022-02-09 08:41:56.623,7c406acb-3d85-416f-a515-1caf6bc2a8a1,"[{'id': 'objectiv-website', '_type': 'Applicat...","[{'id': 'home', '_type': 'RootLocationContext'...",MediaLoadEvent,"[AbstractEvent, MediaEvent, MediaLoadEvent, No...",709,2,objectiv-website,home,Media Player: 2-minute-video located at Root L...
ed35fd32-f5ad-4fdf-be50-4bd5224acd7e,2022-02-09,2022-02-09 08:42:07.623,7c406acb-3d85-416f-a515-1caf6bc2a8a1,"[{'id': 'objectiv-website', '_type': 'Applicat...","[{'id': 'blog', '_type': 'RootLocationContext'...",MediaLoadEvent,"[AbstractEvent, MediaEvent, MediaLoadEvent, No...",709,5,objectiv-website,blog,Media Player: objectiv-in-2-minutes located at...


In [23]:
# explore the data with describe
of.describe(include='all').head()

Unnamed: 0_level_0,day,moment,event_type,session_id,session_hit_number,application,root_location,feature_nice_name
__stat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
count,6323,6323,6323,6323.0,6323.0,6323,6323,6323
mean,,,,250.61,22.35,,,
std,,,,231.69,39.97,,,
min,2022-02-02,2022-02-02 07:42:11.957,ApplicationLoadedEvent,1.0,1.0,objectiv-docs,about,Expandable: Aggregation & windowing located at...
max,2022-02-09,2022-02-09 08:42:37.659,VisibleEvent,709.0,308.0,objectiv-website,privacy,Root Location: privacy


## Users

In [6]:
# model hub: unique users, total month
users = of.model_hub.aggregate.unique_users(time_aggregation='YYYY-MM')
users.sort_index(ascending=False).head()

moment
2022-02    449
Name: unique_users, dtype: int64

In [7]:
# model hub: unique users, daily
users = of.model_hub.aggregate.unique_users()
users.sort_index(ascending=False).head(10)

moment
2022-02-09      6
2022-02-08     33
2022-02-07     50
2022-02-06     20
2022-02-05     33
2022-02-04     53
2022-02-03    100
2022-02-02    239
Name: unique_users, dtype: int64

In [8]:
# users by RootLocation
users_root = of.groupby(['root_location']).agg({'user_id':'nunique'})
users_root.sort_values('user_id_nunique', ascending=False).head()

Unnamed: 0_level_0,user_id_nunique
root_location,Unnamed: 1_level_1
blog,253
home,211
docs,153
about,71
jobs,63


## Time spent

In [9]:
# model hub: duration, total month
duration = of.model_hub.aggregate.session_duration(time_aggregation='YYYY-MM')
duration.sort_index(ascending=False).head()

moment
2022-02   0 days 00:04:09.810404
Name: session_duration, dtype: timedelta64[ns]

In [10]:
# model hub: duration, daily
duration = of.model_hub.aggregate.session_duration(time_aggregation='YYYY-MM-DD')
duration.sort_index(ascending=False).head()

moment
2022-02-09   0 days 00:03:30.955000
2022-02-08   0 days 00:02:07.429182
2022-02-07   0 days 00:03:25.079214
2022-02-06   0 days 00:00:34.271120
2022-02-05   0 days 00:01:55.235333
Name: session_duration, dtype: timedelta64[ns]

In [11]:
# duration by RootLocation
of[(of.root_location == 'home')].model_hub.aggregate.session_duration(time_aggregation='YYYY-MM').head()

moment
2022-02   0 days 00:05:33.257737
Name: session_duration, dtype: timedelta64[ns]

In [12]:
of[(of.root_location == 'docs')].model_hub.aggregate.session_duration(time_aggregation='YYYY-MM').head()

moment
2022-02   0 days 00:05:24.889905
Name: session_duration, dtype: timedelta64[ns]

In [13]:
of[(of.root_location == 'docs')].model_hub.aggregate.session_duration(time_aggregation='YYYY-MM').head()

moment
2022-02   0 days 00:05:24.889905
Name: session_duration, dtype: timedelta64[ns]

## Top user interactions

In [14]:
# select PressEvent and MediaStart event, to focus on user actions
users_feature = of[(of.event_type == 'PressEvent') |
                   (of.event_type == 'MediaStartEvent')]

# users by feature
users_feature = users_feature.groupby(['feature_nice_name', 'event_type']).agg({'user_id':'nunique'})
users_feature.sort_values('user_id_nunique', ascending=False).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,user_id_nunique
feature_nice_name,event_type,Unnamed: 2_level_1
Media Player: objectiv-in-2-minutes located at Root Location: blog => Content: post-meet-objectiv-open-source-product-analytics-designed-for-data-sc,MediaStartEvent,95
Media Player: 2-minute-video located at Root Location: home,MediaStartEvent,65
Link: about-us located at Root Location: home => Navigation: navbar-top,PressEvent,43
Link: logo located at Root Location: blog => Navigation: navbar-top,PressEvent,35
Pressable: hamburger located at Root Location: home => Navigation: navbar-top,PressEvent,34


## What did users do most on home?

In [15]:
# select homepage RootLocation only
home_users = of[(of.root_location == 'home')]

# select PressEvent and MediaStart event, to focus on user actions
home_users = home_users[(home_users.event_type == 'PressEvent') |
                   (home_users.event_type == 'MediaStartEvent')]

# home users by feature
home_users = home_users.groupby(['feature_nice_name', 'event_type']).agg({'user_id':'nunique'})
home_users.sort_values('user_id_nunique', ascending=False).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,user_id_nunique
feature_nice_name,event_type,Unnamed: 2_level_1
Media Player: 2-minute-video located at Root Location: home,MediaStartEvent,65
Link: about-us located at Root Location: home => Navigation: navbar-top,PressEvent,43
Pressable: hamburger located at Root Location: home => Navigation: navbar-top,PressEvent,34
Link: docs located at Root Location: home => Navigation: navbar-top,PressEvent,18
Link: blog located at Root Location: home => Navigation: navbar-top,PressEvent,17


## What did users do most in docs?

In [16]:
# select docs RootLocation only
docs_users = of[(of.root_location == 'docs')]

# select PressEvent and MediaStart event, to focus on user actions
docs_users = docs_users[(docs_users.event_type == 'PressEvent') |
                   (docs_users.event_type == 'MediaStartEvent')]

# docs users by feature
docs_users = docs_users.groupby(['feature_nice_name', 'event_type']).agg({'user_id':'nunique'})
docs_users.sort_values('user_id_nunique', ascending=False).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,user_id_nunique
feature_nice_name,event_type,Unnamed: 2_level_1
Link: Tracking located at Root Location: docs => Navigation: navbar-top,PressEvent,18
Link: Modeling located at Root Location: docs => Navigation: navbar-top,PressEvent,18
Link: logo located at Root Location: docs => Navigation: navbar-top,PressEvent,16
Link: Taxonomy located at Root Location: docs => Navigation: navbar-top,PressEvent,10
Link: Core Concepts located at Root Location: docs => Navigation: docs-sidebar,PressEvent,8


## Set conversion

In [17]:
# NOTE: CAN WE DO THIS IN 1 GO AND DEFINE 1 CONVERSION EVENT NAME?

# define which events to use as conversion events
of.add_conversion_event(location_stack=of.location_stack.json[{'id': 'objectiv-on-github', '_type': 'LinkContext'}:],
                        event_type='PressEvent',
                        name='github_cta')

of.add_conversion_event(location_stack=of.location_stack.json[{'id': 'github', '_type': 'LinkContext'}:],
                        event_type='PressEvent',
                        name='github_nav')

# add conversion events to the dataframe
conversion_events = (of.model_hub.map.is_conversion_event('github_cta')|
                    of.model_hub.map.is_conversion_event('github_nav'))

## Number of daily conversions

In [18]:
of.model_hub.filter(conversion_events).model_hub.aggregate.unique_users().sort_index(ascending=False).head(10)

moment
2022-02-09     2
2022-02-07     3
2022-02-04     5
2022-02-03     7
2022-02-02    16
Name: unique_users, dtype: int64

## From where do users convert most?

In [19]:
# filter conversion events
conversion_locations = of.model_hub.filter(conversion_events).groupby(['feature_nice_name', 'event_type'])\
    .agg({'user_id':'nunique'})

conversion_locations.sort_values('user_id_nunique', ascending=False).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,user_id_nunique
feature_nice_name,event_type,Unnamed: 2_level_1
Link: objectiv-on-github located at Root Location: blog => Content: post-meet-objectiv-open-source-product-analytics-designed-for-data-sc => Content: blog-post-try-objectiv,PressEvent,11
Link: objectiv-on-github located at Root Location: home => Content: the-stack,PressEvent,11
Link: github located at Root Location: home => Navigation: navbar-top,PressEvent,7
Link: github located at Root Location: blog => Navigation: navbar-top => Overlay: hamburger-menu,PressEvent,2
Link: github located at Root Location: home => Navigation: navbar-top => Overlay: hamburger-menu,PressEvent,2


## Time spent for converting users?

In [20]:
# NOTE: NOW ONLY TAKES 1 OF THE CONVERSION EVENTS

# use model hub to filter converting users and calculate duration
of.model_hub.filter(of.model_hub.map.conversion_count(name='github_cta')>=1)\
  .model_hub.agg.session_duration()\
  .head()

moment
2022-02-02   0 days 00:46:28.725375
2022-02-03   0 days 00:07:56.027600
2022-02-04   0 days 00:00:54.977500
2022-02-07   0 days 00:00:36.135500
Name: session_duration, dtype: timedelta64[ns]

## Top user interactions before conversions

In [21]:
## LET'S DISCUSS MAKING A BACH FUNCTION, OTHERWISE LOTS OF MERGING AND SORTING DFs
## for now, easy way, just look at all events that users that have converted had, also after conversion

converted_users = of.model_hub.filter(of.model_hub.map.conversion_count(name='github_cta')>=1)

# select PressEvent and MediaStart event, to focus on user actions
converted_users = converted_users[(converted_users.event_type == 'PressEvent') |
                   (converted_users.event_type == 'MediaStartEvent')]

converted_users.groupby(['feature_nice_name', 'event_type']).agg({'user_id':'nunique'})\
    .sort_values('user_id_nunique', ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,user_id_nunique
feature_nice_name,event_type,Unnamed: 2_level_1
Link: objectiv-on-github located at Root Location: home => Content: the-stack,PressEvent,11
Link: objectiv-on-github located at Root Location: blog => Content: post-meet-objectiv-open-source-product-analytics-designed-for-data-sc => Content: blog-post-try-objectiv,PressEvent,11
Link: logo located at Root Location: docs => Navigation: navbar-top,PressEvent,3
Link: fly-ventures located at Root Location: jobs => Content: what-we-offer,PressEvent,2
Link: docs-open-analytics-taxonomy located at Root Location: home,PressEvent,2
Link: localglobe located at Root Location: jobs => Content: what-we-offer,PressEvent,2
Link: blog located at Root Location: jobs => Navigation: navbar-top,PressEvent,1
Link: blog located at Root Location: home => Navigation: navbar-top,PressEvent,1
Link: about-us located at Root Location: home => Navigation: navbar-top => Overlay: hamburger-menu,PressEvent,1
Link: docs located at Root Location: home => Navigation: navbar-top,PressEvent,1
