In [None]:
# default_exp transforms.session

# Session
> Create sessions.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export
import numpy as np

In [None]:
#export
def construct_session_sequences(df, sessionID, itemID):
    """
    Given a dataset in pandas df format, construct a list of lists where each sublist
    represents the interactions relevant to a specific session, for each sessionID. 
    These sublists are composed of a series of itemIDs (str) and are the core training 
    data used in the Word2Vec algorithm. 
    This is performed by first grouping over the SessionID column, then casting to list
    each group's series of values in the ItemID column. 
    INPUTS
    ------------
    df:                 pandas dataframe
    sessionID: str      column name in the df that represents invididual sessions
    itemID: str         column name in the df that represents the items within a session
    """
    grp_by_session = df.groupby([sessionID])

    session_sequences = []
    for name, group in grp_by_session:
        session_sequences.append(list(group[itemID].values))

    return session_sequences

In [None]:
import pandas as pd

df = pd.DataFrame.from_dict({
    'SessionID':[1,1,1,2,2,2,2,2],
    'ItemID':[111,123,345,45,334,342,8970,345]
})
df

Unnamed: 0,SessionID,ItemID
0,1,111
1,1,123
2,1,345
3,2,45
4,2,334
5,2,342
6,2,8970
7,2,345


In [None]:
construct_session_sequences(df, sessionID='SessionID', itemID='ItemID')

[[111, 123, 345], [45, 334, 342, 8970, 345]]

In [None]:
#hide
%reload_ext watermark
%watermark -a "Sparsh A." -m -iv -u -t -d -p recohut

Author: Sparsh A.

Last updated: 2021-12-26 08:50:09

recohut: 0.0.7

Compiler    : GCC 7.5.0
OS          : Linux
Release     : 5.4.144+
Machine     : x86_64
Processor   : x86_64
CPU cores   : 2
Architecture: 64bit

IPython: 5.5.0
pandas : 1.1.5
numpy  : 1.19.5

