In [None]:
# default_exp utils.interaction_utils

# Interaction Utils
> Implementation of user-item interactions utils.

In [None]:
#hide
from nbdev.showdoc import *
from fastcore.nb_imports import *
from fastcore.test import *

In [None]:
#export
from typing import List, Optional, Callable, Union, Any, Tuple

import pandas as pd

## user_activity_count

In [None]:
#export
def user_activity_count(df, user_col='userid', event_col='event'):
    user_activity_count = dict()
    events = df[event_col].unique().tolist()
    for _, row in df.iterrows():
        if row[user_col] not in user_activity_count:
            user_activity_count[row[user_col]] = {event:0 for event in events}
        for event in events:
            if row[event_col] == event:
                user_activity_count[row[user_col]][event] += 1

    user_activity = pd.DataFrame(user_activity_count)
    user_activity = user_activity.transpose()
    user_activity['activity'] = user_activity.sum(axis=1)

    _df = pd.DataFrame(user_activity.activity.value_counts()).reset_index()
    _df.columns = ['#Interactions','#Users']
    return _df, user_activity

Example

In [None]:
df = pd.read_csv('https://github.com/RecoHut-Datasets/retail_general/raw/v3/rawdata.csv',
                 header = 0,
                 names = ['event','userid','itemid','timestamp'],
                 dtype={0:'category', 1:'category', 2:'category'},
                 parse_dates=['timestamp'])
df.head()

Unnamed: 0,event,userid,itemid,timestamp
0,view_item,2763227,11056,2020-01-13 16:05:31.244000+00:00
1,add_to_cart,2828666,14441,2020-01-13 22:36:38.680000+00:00
2,view_item,620225789,14377,2020-01-14 10:54:41.886000+00:00
3,view_item,620225789,14377,2020-01-14 10:54:47.692000+00:00
4,add_to_cart,620225789,14377,2020-01-14 10:54:48.479000+00:00


In [None]:
df_user, user_activity = user_activity_count(df)
user_activity

Unnamed: 0,view_item,add_to_cart,begin_checkout,remove_from_cart,purchase,activity
2763227,1,0,0,0,0,1
2828666,0,1,0,0,0,1
0620225789,2,1,0,0,0,3
3390256999,1,0,0,0,0,1
807596,16,2,10,1,0,29
...,...,...,...,...,...,...
2232882,2,1,0,0,0,3
2484429,0,0,13,4,0,17
467977,4,3,0,0,0,7
2280228,0,0,3,0,0,3


## item_activity_count

In [None]:
#export
def item_activity_count(df, item_col='itemid', event_col='event'):
    item_activity_count = dict()
    events = df[event_col].unique().tolist()
    for _, row in df.iterrows():
        if row[item_col] not in item_activity_count:
            item_activity_count[row[item_col]] = {event:0 for event in events}
        for event in events:
            if row[event_col] == event:
                item_activity_count[row[item_col]][event] += 1

    item_activity = pd.DataFrame(item_activity_count)
    item_activity = item_activity.transpose()
    item_activity['activity'] = item_activity.sum(axis=1)

    _df = pd.DataFrame(item_activity.activity.value_counts()).reset_index()
    _df.columns = ['#Interactions','#Items']
    return _df, item_activity

Example

In [None]:
df_item, item_activity = item_activity_count(df)
item_activity

Unnamed: 0,view_item,add_to_cart,begin_checkout,remove_from_cart,purchase,activity
11056,34,3,16,4,10,67
14441,89,28,145,16,6,284
14377,19,4,3,0,11,37
11211,28,4,44,9,1,86
14442,98,18,58,9,37,220
...,...,...,...,...,...,...
41237,3,1,10,0,0,14
32150,0,0,1,1,0,2
35372,0,0,2,0,0,2
36064,0,0,1,0,0,1
