# Basic analysis

In [1]:
# import Bach
from bach_open_taxonomy import ObjectivFrame

In [2]:
# connect to SQL db
of = ObjectivFrame.from_objectiv_data(db_url='postgresql://postgres:@localhost:5432/objectiv',
                                      table_name='data',
                                      time_aggregation='YYYY-MM-DD',
                                      start_date='2022-02-02')

In [4]:
# explore the dataframe
of.describe()

KeyError: 'describe'

In [None]:
# adding specific contexts to the data
of['application'] = of.global_contexts.gc.get_from_context_with_type_series(type='ApplicationContext', key='id')
of['root_location'] = of.location_stack.ls.get_from_context_with_type_series(type='RootLocationContext', key='id')
of['feature_nice_name'] = of.location_stack.ls.nice_name

## Users

In [None]:
# model hub: unique users, total month
users = of.model_hub.aggregate.unique_users(time_aggregation='YYYY-MM')
users.sort_index(ascending=False).head()

In [None]:
# model hub: unique users, daily
users = of.model_hub.aggregate.unique_users()
users.sort_index(ascending=False).head(10)

In [None]:
# users by RootLocation
users_root = of.groupby(['root_location']).agg({'user_id':'nunique'})
users_root.sort_values('user_id_nunique', ascending=False).head()

## Time spent

In [None]:
# model hub: duration, total month
duration = of.model_hub.aggregate.session_duration(time_aggregation='YYYY-MM')
duration.sort_index(ascending=False).head()

In [None]:
# model hub: duration, daily
duration = of.model_hub.aggregate.session_duration(time_aggregation='YYYY-MM-DD')
duration.sort_index(ascending=False).head()

In [None]:
# duration by RootLocation
of[(of.root_location == 'home')].model_hub.aggregate.session_duration(time_aggregation='YYYY-MM').head()

In [None]:
of[(of.root_location == 'docs')].model_hub.aggregate.session_duration(time_aggregation='YYYY-MM').head()

In [None]:
of[(of.root_location == 'docs')].model_hub.aggregate.session_duration(time_aggregation='YYYY-MM').head()

## Top user interactions

In [None]:
# select PressEvent and MediaStart event, to focus on user actions
users_feature = of[(of.event_type == 'PressEvent') |
                   (of.event_type == 'MediaStartEvent')]

# users by feature
users_feature = users_feature.groupby(['feature_nice_name', 'event_type']).agg({'user_id':'nunique'})
users_feature.sort_values('user_id_nunique', ascending=False).head()

## What did users do most on home?

In [None]:
# select homepage RootLocation only
home_users = of[(of.root_location == 'home')]

# select PressEvent and MediaStart event, to focus on user actions
home_users = home_users[(home_users.event_type == 'PressEvent') |
                   (home_users.event_type == 'MediaStartEvent')]

# home users by feature
home_users = home_users.groupby(['feature_nice_name']).agg({'user_id':'nunique'})
home_users.sort_values('user_id_nunique', ascending=False).head()

## What did users do most in docs?

In [None]:
# select docs RootLocation only
docs_users = of[(of.root_location == 'docs')]

# select PressEvent and MediaStart event, to focus on user actions
docs_users = docs_users[(docs_users.event_type == 'PressEvent') |
                   (docs_users.event_type == 'MediaStartEvent')]

# docs users by feature
docs_users = docs_users.groupby(['feature_nice_name']).agg({'user_id':'nunique'})
docs_users.sort_values('user_id_nunique', ascending=False).head()

## Set conversion

In [None]:
# NOTE: CAN WE DO THIS IN 1 GO AND DEFINE 1 CONVERSION EVENT NAME?

# define which events to use as conversion events
of.add_conversion_event(location_stack=of.location_stack.json[{'id': 'objectiv-on-github', '_type': 'LinkContext'}:],
                        event_type='PressEvent',
                        name='github_cta')

of.add_conversion_event(location_stack=of.location_stack.json[{'id': 'github', '_type': 'LinkContext'}:],
                        event_type='PressEvent',
                        name='github_nav')

# add conversion events to the dataframe
conversion_events = (of.model_hub.map.is_conversion_event('github_cta')|
                    of.model_hub.map.is_conversion_event('github_nav'))

## Number of daily conversions

In [None]:
of.model_hub.filter(conversion_events).model_hub.aggregate.unique_users().sort_index(ascending=False).head(10)

## From where do users convert most?

In [None]:
# filter conversion events
conversion_locations = of.model_hub.filter(conversion_events).groupby(['feature_nice_name', 'event_type'])\
    .agg({'user_id':'nunique'})

conversion_locations.sort_values('user_id_nunique', ascending=False).head()

## Time spent for converting users?

In [None]:
# NOTE: NOW ONLY TAKES 1 OF THE CONVERSION EVENTS

# use model hub to filter converting users and calculate duration
of.model_hub.filter(of.model_hub.map.conversion_count(name='github_cta')>=1)\
  .model_hub.agg.session_duration()\
  .head()

## Top user interactions before conversions

In [None]:
## LET'S DISCUSS MAKING A BACH FUNCTION, OTHERWISE LOTS OF MERGING AND SORTING DFs
