# Basic product analysis

In this notebook, we briefly demonstrate how you can easily do basic product analysis on your data.

## Getting started

In [None]:
# import Bach
from bach_open_taxonomy import ObjectivFrame 
from bach import display_sql_as_markdown

In [None]:
# connect to SQL db
of = ObjectivFrame.from_objectiv_data(time_aggregation='YYYY-MM-DD', start_date='2022-02-02')

In [None]:
# adding specific contexts to the data
of['application'] = of.global_contexts.gc.application
of['root_location'] = of.location_stack.ls.get_from_context_with_type_series(type='RootLocationContext', key='id')
of['referrer'] = of.global_contexts.gc.get_from_context_with_type_series(type='HttpContext', key='referrer')
of['feature_nice_name'] = of.location_stack.ls.nice_name

In [None]:
# have a look at the data
of.sort_values('session_id', ascending=False).head()

In [None]:
# explore the data with describe
of.describe(include='all').head()

## How many users do we have?

In [None]:
# model hub: unique users, monthly
montly_users = of.model_hub.aggregate.unique_users(groupby=of.mh.time_agg('YYYY-MM'))
montly_users.head()

In [None]:
# model hub: unique users, daily
daily_users = of.model_hub.aggregate.unique_users()
daily_users.sort_index(ascending=False).head(10)

In [None]:
users_root = of.model_hub.aggregate.unique_users(groupby=['application', 'root_location'])
users_root.sort_index(ascending=False).head(10)

## What is their time spent?

In [None]:
# model hub: duration, total month
duration_monthly = of.model_hub.aggregate.session_duration(groupby=of.mh.time_agg('YYYY-MM'))
duration_monthly.sort_index(ascending=False).head()

In [None]:
# model hub: duration, daily
duration_daily = of.model_hub.aggregate.session_duration()
duration_daily.sort_index(ascending=False).head()

In [None]:
of.model_hub.aggregate.session_duration(groupby=['root_location',of.mh.time_agg('YYYY-MM')]).sort_index().head(10)

In [None]:
# how is this time spent distributed?
session_duration = of.mh.aggregate.session_duration(groupby='session_id', exclude_bounces=False)
session_duration.to_frame().materialize()['session_duration'].quantile(q=[0.25, 0.50, 0.75]).head()

## What are the top user interactions?

In [None]:
# select only user interactions
interactive_events = of[of.stack_event_types>=['InteractiveEvent']]

# users by feature
users_feature = interactive_events.groupby(['application', 'feature_nice_name', 'event_type']).agg({'user_id':'nunique'})
users_feature.sort_values('user_id_nunique', ascending=False).head()

## What users do in each of the main product areas?

In [None]:
most_interactions = interactive_events.mh.agg.unique_users(groupby=['application','root_location','feature_nice_name', 'event_type'])
most_interactions = most_interactions.to_frame().reset_index()

In [None]:
home_users = most_interactions[(most_interactions.application == 'objectiv-website') &
                               (most_interactions.root_location == 'home')]
home_users.sort_values('unique_users', ascending=False).head()

## What users do most in docs?

In [None]:
docs_users = most_interactions[most_interactions.application == 'objectiv-docs']
docs_users.sort_values('unique_users', ascending=False).head()

## Where are users coming from?

In [None]:
# users by referrer
of.mh.agg.unique_users(groupby='referrer').sort_values(ascending=False).head()

## How are conversions doing?

In [None]:
# define which events to use as conversion events
of.add_conversion_event(location_stack=of.location_stack.json[{'id': 'objectiv-on-github', 
                                                               '_type': 'LinkContext'}:].fillna(
                                       of.location_stack.json[{'id': 'github', '_type': 'LinkContext'}:]),
                        event_type='PressEvent',
                        name='github_press')

In [None]:
# model hub: calculate conversions
conversions = of.model_hub.filter(of.model_hub.map.is_conversion_event('github_press'))\
                .model_hub.aggregate.unique_users()

conversions.to_frame().sort_index(ascending=False).head(10)

In [None]:
# use earlier model hub outputs to calculate conversion rate
conversion_rate = conversions / daily_users
conversion_rate.sort_index(ascending=False).head(10)

In [None]:
# from where do users convert most?
conversion_locations = of.model_hub.filter(of.model_hub.map.is_conversion_event('github_press'))\
                         .model_hub.agg.unique_users(groupby=['application', 'feature_nice_name', 'event_type'])

conversion_locations.sort_values(ascending=False).head()

In [None]:
# what are users doing before they convert?

# select sessions with a conversion
converted_users = of.model_hub.filter(of.model_hub.map.conversions_counter(name='github_press')>=1)

# from those, select hits where number of conversions was still 0
converted_users = converted_users.mh.filter(converted_users.model_hub.map.conversions_in_time('github_press')==0)

# select only user interactions
converted_users_filtered = converted_users[converted_users.stack_event_types>=['InteractiveEvent']]

converted_users_features = converted_users_filtered.model_hub.agg.unique_users(groupby=['application',
                                                                                        'feature_nice_name',
                                                                                        'event_type'])

converted_users_features.sort_values(ascending=False).to_frame().head(10)

In [None]:
# how much time do users spent before they convert?
converted_users.model_hub.aggregate.session_duration(groupby=None).to_frame().head()

## Get the SQL for any analysis

In [None]:
# just one analysis as an example, this works for anything you do with Objectiv Bach
display_sql_as_markdown(conversions)