# Basic analysis

In [None]:
# import Bach
from bach_open_taxonomy import ObjectivFrame 

In [None]:
# connect to SQL db
of = ObjectivFrame.from_objectiv_data(db_url='postgresql://@localhost:5433/objectiv',
                                      table_name='data',
                                      time_aggregation='YYYY-MM-DD',
                                      start_date='2022-02-02')

In [None]:
# adding specific contexts to the data
of['application'] = of.global_contexts.gc.get_from_context_with_type_series(type='ApplicationContext', key='id')
of['root_location'] = of.location_stack.ls.get_from_context_with_type_series(type='RootLocationContext', key='id')
of['referrer'] = of.global_contexts.gc.get_from_context_with_type_series(type='HttpContext', key='referrer')
of['feature_nice_name'] = of.location_stack.ls.nice_name

In [None]:
# explore the data
of.sort_values('session_id', ascending=False).head()

In [None]:
# explore the data with describe
of.describe(include='all').head()

## Users

In [None]:
# model hub: unique users, total month
users = of.model_hub.aggregate.unique_users(time_aggregation='YYYY-MM')
users.sort_index(ascending=False).head()

In [None]:
# model hub: unique users, daily
users = of.model_hub.aggregate.unique_users()
users.sort_index(ascending=False).head(15)

In [None]:
# users by RootLocation
users_root = of.groupby(['application', 'root_location']).agg({'user_id':'nunique'})
users_root.sort_values('user_id_nunique', ascending=False).head(10)

## Time spent

In [None]:
# model hub: duration, total month
duration = of.model_hub.aggregate.session_duration(time_aggregation='YYYY-MM')
duration.sort_index(ascending=False).head()

In [None]:
# model hub: duration, daily
duration = of.model_hub.aggregate.session_duration(time_aggregation='YYYY-MM-DD')
duration.sort_index(ascending=False).head()

In [None]:
# duration for RootLocation home 
of[(of.root_location == 'home')].model_hub.aggregate.session_duration(time_aggregation='YYYY-MM').head()

In [None]:
# duration for RootLocation blog
of[(of.root_location == 'blog')].model_hub.aggregate.session_duration(time_aggregation='YYYY-MM').head()

In [None]:
# duration for the whole docs
of[(of.application == 'objectiv-docs')].model_hub.aggregate.session_duration(time_aggregation='YYYY-MM').head()

## Top user interactions

In [None]:
# select PressEvent and MediaStart event, to focus on user actions
users_feature = of[(of.event_type == 'PressEvent') |
                   (of.event_type == 'MediaStartEvent')]

# users by feature
users_feature = users_feature.groupby(['application', 'feature_nice_name', 'event_type']).agg({'user_id':'nunique'})
users_feature.sort_values('user_id_nunique', ascending=False).head()

## What did users do most on home?

In [None]:
# select homepage RootLocation only
home_users = of[(of.root_location == 'home')]

# select website only and exclude the docs
home_users = home_users[(home_users.application == 'objectiv-website')]

# select PressEvent and MediaStart event, to focus on user actions
home_users = home_users[(home_users.event_type == 'PressEvent') |
                   (home_users.event_type == 'MediaStartEvent')]

# home users by feature
home_users = home_users.groupby(['feature_nice_name', 'event_type']).agg({'user_id':'nunique'})
home_users.sort_values('user_id_nunique', ascending=False).head()

## What did users do most in docs?

In [None]:
# select docs RootLocation only
docs_users = of[(of.application == 'objectiv-docs')]

# select PressEvent and MediaStart event, to focus on user actions
docs_users = docs_users[(docs_users.event_type == 'PressEvent') |
                   (docs_users.event_type == 'MediaStartEvent')]

# docs users by feature
docs_users = docs_users.groupby(['feature_nice_name', 'event_type']).agg({'user_id':'nunique'})
docs_users.sort_values('user_id_nunique', ascending=False).head()

## Where did users come from?

In [None]:
# users by referrer
referrer_users = of.groupby(['referrer']).agg({'user_id':'nunique'})
referrer_users.sort_values('user_id_nunique', ascending=False).head()

## Set conversion

In [None]:
# define which events to use as conversion events
of.add_conversion_event(location_stack=of.location_stack.json[{'id': 'objectiv-on-github', 
                                                        '_type': 'LinkContext'}:].fillna(
                                       of.location_stack.json[{'id': 'github', '_type': 'LinkContext'}:]),
                        event_type='PressEvent',
                        name='github_press')

## Number of daily conversions

In [None]:
of.model_hub.filter(of.model_hub.map.is_conversion_event('github_press'))\
    .model_hub.aggregate.unique_users()\
    .sort_index(ascending=False).head(10)

## From where do users convert most?

In [None]:
# filter conversion events
conversion_locations = of.model_hub.filter(of.model_hub.map.is_conversion_event('github_press'))\
    .groupby(['application', 'feature_nice_name', 'event_type'])\
    .agg({'user_id':'nunique'})

conversion_locations.sort_values('user_id_nunique', ascending=False).head()

## Time spent for converting users?

In [None]:
# use model hub to filter converting users and calculate duration
of.model_hub.filter(of.model_hub.map.conversions_counter(name='github_press', partition='user_id')>=1)\
  .model_hub.agg.session_duration()\
  .sort_index(ascending=False).head()

## Top user interactions before conversions

In [None]:
# select sessions with a conversion
converted_users = of.model_hub.filter(of.model_hub.map.conversions_counter(name='github_press')>=1)

# from those, select hits where number of conversions was still 0
converted_users = converted_users.mh.filter(converted_users.model_hub.map.conversions_in_time('github_press')==0)

# select PressEvent and MediaStart event, to focus on user actions
converted_users = converted_users[(converted_users.event_type == 'PressEvent') |
                   (converted_users.event_type == 'MediaStartEvent')]

converted_users.groupby(['application', 'feature_nice_name', 'event_type']).agg({'user_id':'nunique'})\
    .sort_values('user_id_nunique', ascending=False).head(10)

## Time spent before conversions

In [None]:
# use model hub to calculate duration, converted users contains events before 1st conversion
converted_users.model_hub.aggregate.session_duration(time_aggregation='YYYY-MM').head()