# Event log analysis 

In [132]:
### IMPORT ###
from pathlib import Path
import pandas as pd


### LOCAL IMPORT ###
from config import config_reader
from utilities import df_read_csv_data

In [133]:
### GLOBALS ###
yaml_config = config_reader.config_read_yaml("config.yml", "config")
# print(yaml_config) # debug
log_dir = str(yaml_config["LOG_DIR"])
stats_dir = str(yaml_config["STATS_DIR"])
csv_sep = str(yaml_config["CSV_SEP"])
# INPUT
level_input = "PARA" # [PAGE, PARA]
log_file = "edu_event_log_LEVEL_raw_filtered.csv"

# FUNCTIONS

In [134]:
def calculate_session_count_and_percentage(df:pd.DataFrame) -> pd.DataFrame:
    """
    Calculate the distinct session counts and their percentage based on QuizSessionCount.

    Parameters:
    df (pd.DataFrame): A pandas DataFrame with at least two columns: 
        - 'QuizSessionCount': the number of quiz sessions
        - 'sessionID': unique identifier for each session

    Returns:
    pd.DataFrame: A new DataFrame with three columns:
        - 'QuizSessionCount': distinct values of QuizSessionCount
        - 'SessionCount': count of distinct sessionIDs for each QuizSessionCount
        - 'SessionPerc': percentage of SessionCount with respect to the total, rounded to 2 decimal places
    """

    # Count distinct sessionID for each QuizSessionCount
    session_counts = df.groupby('QuizSessionCount')['sessionID'].nunique().reset_index(name='SessionCount')
    
    # Calculate the percentage of session counts relative to the total
    total_sessions = session_counts['SessionCount'].sum()
    session_counts['SessionPerc'] = (session_counts['SessionCount'] / total_sessions * 100).round(2)
    
    return session_counts

In [135]:
def calculate_column_statistics(df: pd.DataFrame, id_column: str, calc_column: str) -> pd.DataFrame:
    """
    Calculate statistics on a specific column, considering only distinct IDs, and return the results 
    without grouping by ID.

    Parameters:
    df (pd.DataFrame): The input pandas DataFrame.
    id_column (str): The name of the column representing the unique identifiers (IDs).
    calc_column (str): The name of the column on which to perform the calculations.

    Returns:
    pd.DataFrame: A new DataFrame with the following columns:
        - '{calc_column}_not_na': count of non-empty values in the calc_column
        - '{calc_column}_min': the minimum value in the calc_column
        - '{calc_column}_max': the maximum value in the calc_column
        - '{calc_column}_avg': the average value in the calc_column
        - '{calc_column}_med': the median value in the calc_column
    """
    
    # Remove duplicate IDs
    df_no_duplicates = df.drop_duplicates(subset=[id_column])
    
    # Filter out rows where the calculation column is NaN
    non_empty_df = df_no_duplicates[df_no_duplicates[calc_column].notna()]
    
    # Calculate the statistics for the calc_column
    col_not_na = non_empty_df[calc_column].count()  # Count of non-NaN values
    col_min = non_empty_df[calc_column].min()       # Minimum value
    col_max = non_empty_df[calc_column].max()       # Maximum value
    col_avg = non_empty_df[calc_column].mean().round(2)      # Mean value
    col_med = non_empty_df[calc_column].median()    # Median value
    
    # Create a DataFrame with the results, using calc_column as prefix
    stats_df = pd.DataFrame({
        f'{calc_column}_not_na': [col_not_na],
        f'{calc_column}_min': [col_min],
        f'{calc_column}_max': [col_max],
        f'{calc_column}_avg': [col_avg],
        f'{calc_column}_med': [col_med]
    })
    
    return stats_df

In [136]:
def label_terciles_by_session(df: pd.DataFrame, session_column: str, value_column: str):
    """
    Label rows in terciles based on the value_column, considering all rows with the same session_column value 
    as belonging to the same tercile, and add the 'Tercile' column to the original DataFrame.
    
    Parameters:
    df (pd.DataFrame): The input DataFrame containing the data.
    session_column (str): The column representing session IDs (grouping key).
    value_column (str): The column containing the values to be split into terciles.
    
    Returns:
    pd.DataFrame: The original DataFrame with an additional column 'Tercile' indicating the tercile label.
    string: Name of the new tercile column
    """

    # Define the tercile column name
    col_tercile = f"{value_column}_Tercile"

    # First, remove duplicates based on session_column and value_column, because same session has same SUS
    df_unique = df.drop_duplicates(subset=[session_column, value_column])
    
    # Filter out rows where the value_column is NaN
    df_unique_non_nan = df_unique[df_unique[value_column].notna()]
    
    # Calculate terciles based on the unique non-NaN values
    df_unique_non_nan[col_tercile] = pd.qcut(df_unique_non_nan[value_column], q=3, labels=[1, 2, 3], duplicates='drop')
    
    # Merge the tercile labels back into the original dataframe
    df = df.merge(df_unique_non_nan[[session_column, col_tercile]], on=session_column, how='left')

    # Count the number of empty cells in the specified column
    num_empty = df[col_tercile].isna().sum()
    print(f"Number of empty cells in '{col_tercile}':", num_empty)

    # Add 0 as a category to allow setting empty cells to 0
    df[col_tercile] = df[col_tercile].cat.add_categories([0])

    # Replace empty values with 0 in the specified column
    df[col_tercile].fillna(0, inplace=True)

    return df, col_tercile

# MAIN

In [137]:
print(">> Setings")
log_file_name = log_file.replace("LEVEL", level_input)
print("Input file:", log_file_name)
path_log_file = Path(log_dir) /log_file_name 
print("Path file:", path_log_file)

>> Setings
Input file: edu_event_log_PARA_raw_filtered.csv
Path file: data_log/edu_event_log_PARA_raw_filtered.csv


In [138]:
print(">> Reading")
df_log = df_read_csv_data(path_log_file, None, csv_sep)

>> Reading
Data preview
                                           sessionID pageTitle    menu  \
0  HnFuoWoXedKrtylUCiHL0JMXXZxQS2gsLzadkGR9lus1Lv...     INTRO  menu_1   
1  HnFuoWoXedKrtylUCiHL0JMXXZxQS2gsLzadkGR9lus1Lv...     INTRO  menu_1   
2  HnFuoWoXedKrtylUCiHL0JMXXZxQS2gsLzadkGR9lus1Lv...     INTRO  menu_1   
3  HnFuoWoXedKrtylUCiHL0JMXXZxQS2gsLzadkGR9lus1Lv...     INTRO  menu_1   
4  HnFuoWoXedKrtylUCiHL0JMXXZxQS2gsLzadkGR9lus1Lv...     INTRO  menu_1   

   pageOrder  pagePara eventPage       eventTimestamp         eventPara  \
0          1         0    PageIN  2024-02-26 19:43:18    INTRO_PageIN_0   
1          1         2   MouseIN  2024-02-26 19:43:18   INTRO_MouseIN_2   
2          1         2  MouseOUT  2024-02-26 19:43:19  INTRO_MouseOUT_2   
3          1         1  MouseENT  2024-02-26 19:43:19  INTRO_MouseENT_1   
4          1         1  MouseOUT  2024-02-26 19:43:19  INTRO_MouseOUT_1   

   click_num  dbclick_num  ...  Q_28  SUS  Apprendimento percepito  \
0         

In [139]:
print("Distinct cases:", df_log["sessionID"].nunique())

Distinct cases: 1067


In [140]:
df_log.head(5)

Unnamed: 0,sessionID,pageTitle,menu,pageOrder,pagePara,eventPage,eventTimestamp,eventPara,click_num,dbclick_num,...,Q_28,SUS,Apprendimento percepito,UEQ - Pragmatic,UEQ - Hedonic,UEQ - Overall,TotalTimeHH,TotalTimeDD,CaseLength,Class
0,HnFuoWoXedKrtylUCiHL0JMXXZxQS2gsLzadkGR9lus1Lv...,INTRO,menu_1,1,0,PageIN,2024-02-26 19:43:18,INTRO_PageIN_0,0,0,...,,,,,,,0.0,0.0,6,
1,HnFuoWoXedKrtylUCiHL0JMXXZxQS2gsLzadkGR9lus1Lv...,INTRO,menu_1,1,2,MouseIN,2024-02-26 19:43:18,INTRO_MouseIN_2,0,0,...,,,,,,,0.0,0.0,6,
2,HnFuoWoXedKrtylUCiHL0JMXXZxQS2gsLzadkGR9lus1Lv...,INTRO,menu_1,1,2,MouseOUT,2024-02-26 19:43:19,INTRO_MouseOUT_2,0,0,...,,,,,,,0.0,0.0,6,
3,HnFuoWoXedKrtylUCiHL0JMXXZxQS2gsLzadkGR9lus1Lv...,INTRO,menu_1,1,1,MouseENT,2024-02-26 19:43:19,INTRO_MouseENT_1,0,0,...,,,,,,,0.0,0.0,6,
4,HnFuoWoXedKrtylUCiHL0JMXXZxQS2gsLzadkGR9lus1Lv...,INTRO,menu_1,1,1,MouseOUT,2024-02-26 19:43:19,INTRO_MouseOUT_1,0,0,...,,,,,,,0.0,0.0,6,


In [141]:
df_log.columns

Index(['sessionID', 'pageTitle', 'menu', 'pageOrder', 'pagePara', 'eventPage',
       'eventTimestamp', 'eventPara', 'click_num', 'dbclick_num',
       'QuizSessionCount', 'QuizAnswerCorrectTotal', 'QuizAnswerWrongTotal',
       'QuizAnswerCorrectRatioOverCount', 'QuizAnswerCorrectRatioOverAll',
       'Q_1', 'Q_2', 'Q_3', 'Q_4', 'Q_5', 'Q_6', 'Q_7', 'Q_8', 'Q_9', 'Q_10',
       'Q_11', 'Q_12', 'Q_13', 'Q_14', 'Q_15', 'Q_16', 'Q_17', 'Q_18', 'Q_19',
       'Q_20', 'Q_21', 'Q_22', 'Q_23', 'Q_24', 'Q_25', 'Q_26', 'Q_27', 'Q_28',
       'SUS', 'Apprendimento percepito', 'UEQ - Pragmatic', 'UEQ - Hedonic',
       'UEQ - Overall', 'TotalTimeHH', 'TotalTimeDD', 'CaseLength', 'Class'],
      dtype='object')

In [142]:
df_log.shape

(86855, 52)

## Classes

In [143]:
df_log["Class"].unique()

array([nan, 'ECO', 'SAA', 'SMTO3', 'SMCN1', 'SMCN2', 'SMTO2', 'SMTO1'],
      dtype=object)

## Quizzes

In [144]:
print(">> Checking quiz sessions")
df_log["QuizSessionCount"].unique()

>> Checking quiz sessions


array([ 0,  3,  1,  2,  4, 10,  5,  7,  9,  6,  8, 11])

In [145]:
result_df = calculate_session_count_and_percentage(df_log)

In [146]:
result_df

Unnamed: 0,QuizSessionCount,SessionCount,SessionPerc
0,0,165,15.46
1,1,25,2.34
2,2,21,1.97
3,3,63,5.9
4,4,48,4.5
5,5,46,4.31
6,6,28,2.62
7,7,31,2.91
8,8,29,2.72
9,9,57,5.34


In [147]:
print("> Saving quiz counts")
path_out = Path(stats_dir) / "quiz_count.csv"
print("File out:", path_out)
result_df.to_csv(path_out, index=False, sep=";")

> Saving quiz counts
File out: stats/quiz_count.csv


## Stats on experience

In [148]:
print(">> Stats on experiences")
list_col = ["SUS", "Apprendimento percepito", "UEQ - Pragmatic", "UEQ - Hedonic", "UEQ - Overall"]
print("Columns:", list_col)

>> Stats on experiences
Columns: ['SUS', 'Apprendimento percepito', 'UEQ - Pragmatic', 'UEQ - Hedonic', 'UEQ - Overall']


In [149]:
for col_name in list_col:
    print("Stats on column:", col_name)
    exp_df = calculate_column_statistics(df_log, "sessionID", col_name)
    file_put = f"{col_name}_stats.csv"
    path_out = Path(stats_dir) / file_put
    print("Saving stats to:", path_out)
    exp_df.to_csv(path_out, index=False, sep=csv_sep)
    print()

Stats on column: SUS
Saving stats to: stats/SUS_stats.csv

Stats on column: Apprendimento percepito
Saving stats to: stats/Apprendimento percepito_stats.csv

Stats on column: UEQ - Pragmatic
Saving stats to: stats/UEQ - Pragmatic_stats.csv

Stats on column: UEQ - Hedonic
Saving stats to: stats/UEQ - Hedonic_stats.csv

Stats on column: UEQ - Overall
Saving stats to: stats/UEQ - Overall_stats.csv



## Mergin Stats on a single file

In [150]:
# Directory containing the CSV files
csv_directory = Path(stats_dir)

# Output Excel file
output_excel = Path(stats_dir) / "_all_stats.xlsx"

# Initialise an Excel writer
i = 0
with pd.ExcelWriter(output_excel, engine='openpyxl') as writer:
    # Iterate over all CSV files in the directory
    for csv_file in csv_directory.glob('*.csv'):
        i+=1
        # Read the CSV file
        print(f"[{i}] Adding file: {csv_file}")
        df = pd.read_csv(csv_file, dtype={"sessionID":object}, sep=";", low_memory=False)
    
        # Use the filename without extension for the sheet name
        sheet_name = str(csv_file.stem).replace(" ","_")
        # Ensure the sheet name is at most 30 characters
        if len(sheet_name) > 30:
            sheet_name = sheet_name[:30]
        # Write the DataFrame to a new sheet in the Excel file
        print("Adding sheet name:", sheet_name)
        df.to_excel(writer, sheet_name=sheet_name, index=False)
print()
print(f"Excel file with all stats created successfully: {output_excel}")

[1] Adding file: stats/Apprendimento percepito_stats.csv
Adding sheet name: Apprendimento_percepito_stats
[2] Adding file: stats/SUS_stats.csv
Adding sheet name: SUS_stats
[3] Adding file: stats/UEQ - Hedonic_stats.csv
Adding sheet name: UEQ_-_Hedonic_stats
[4] Adding file: stats/UEQ - Overall_stats.csv
Adding sheet name: UEQ_-_Overall_stats
[5] Adding file: stats/UEQ - Pragmatic_stats.csv
Adding sheet name: UEQ_-_Pragmatic_stats
[6] Adding file: stats/_edu_event_log_PAGE_raw_filtered_stats_attributes.csv
Adding sheet name: _edu_event_log_PAGE_raw_filter
[7] Adding file: stats/class_distinct_session_counts.csv
Adding sheet name: class_distinct_session_counts
[8] Adding file: stats/distinct_event_timestamps_na_class.csv
Adding sheet name: distinct_event_timestamps_na_c
[9] Adding file: stats/edu_event_log_PAGE_raw_total_time.csv
Adding sheet name: edu_event_log_PAGE_raw_total_t
[10] Adding file: stats/edu_event_log_PARA_raw_total_time.csv
Adding sheet name: edu_event_log_PARA_raw_total_

## Terciles

In [151]:
print(">> Terciles")
list_col = ["SUS", "Apprendimento percepito", "UEQ - Overall", "QuizAnswerCorrectRatioOverAll"] # Columns on which to calculate the tertile
print("Columns:", list_col)

>> Terciles
Columns: ['SUS', 'Apprendimento percepito', 'UEQ - Overall', 'QuizAnswerCorrectRatioOverAll']


In [152]:
for col_name in list_col:
    print("Tercile on column:", col_name)
    df_log, col_tercile = label_terciles_by_session(df_log, session_column='sessionID', value_column=col_name)
    print("New tercile column:", col_tercile)
    print("Event log shape:", df_log.shape)
    print("Event log new tercile:", df_log[col_tercile].unique())
    print()

Tercile on column: SUS
Number of empty cells in 'SUS_Tercile': 45793
New tercile column: SUS_Tercile
Event log shape: (87320, 53)
Event log new tercile: [0, 1, 3, 2]
Categories (4, int64): [1 < 2 < 3 < 0]

Tercile on column: Apprendimento percepito
Number of empty cells in 'Apprendimento percepito_Tercile': 45793
New tercile column: Apprendimento percepito_Tercile
Event log shape: (88250, 54)
Event log new tercile: [0, 1, 2, 3]
Categories (4, int64): [1 < 2 < 3 < 0]

Tercile on column: UEQ - Overall
Number of empty cells in 'UEQ - Overall_Tercile': 45793
New tercile column: UEQ - Overall_Tercile
Event log shape: (90110, 55)
Event log new tercile: [0, 1, 2, 3]
Categories (4, int64): [1 < 2 < 3 < 0]

Tercile on column: QuizAnswerCorrectRatioOverAll
Number of empty cells in 'QuizAnswerCorrectRatioOverAll_Tercile': 3590
New tercile column: QuizAnswerCorrectRatioOverAll_Tercile
Event log shape: (90110, 56)
Event log new tercile: [0, 1, 2, 3]
Categories (4, int64): [1 < 2 < 3 < 0]



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_unique_non_nan[col_tercile] = pd.qcut(df_unique_non_nan[value_column], q=3, labels=[1, 2, 3], duplicates='drop')
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col_tercile].fillna(0, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-

In [153]:
df_log.head(10)

Unnamed: 0,sessionID,pageTitle,menu,pageOrder,pagePara,eventPage,eventTimestamp,eventPara,click_num,dbclick_num,...,UEQ - Hedonic,UEQ - Overall,TotalTimeHH,TotalTimeDD,CaseLength,Class,SUS_Tercile,Apprendimento percepito_Tercile,UEQ - Overall_Tercile,QuizAnswerCorrectRatioOverAll_Tercile
0,HnFuoWoXedKrtylUCiHL0JMXXZxQS2gsLzadkGR9lus1Lv...,INTRO,menu_1,1,0,PageIN,2024-02-26 19:43:18,INTRO_PageIN_0,0,0,...,,,0.0,0.0,6,,0,0,0,0
1,HnFuoWoXedKrtylUCiHL0JMXXZxQS2gsLzadkGR9lus1Lv...,INTRO,menu_1,1,2,MouseIN,2024-02-26 19:43:18,INTRO_MouseIN_2,0,0,...,,,0.0,0.0,6,,0,0,0,0
2,HnFuoWoXedKrtylUCiHL0JMXXZxQS2gsLzadkGR9lus1Lv...,INTRO,menu_1,1,2,MouseOUT,2024-02-26 19:43:19,INTRO_MouseOUT_2,0,0,...,,,0.0,0.0,6,,0,0,0,0
3,HnFuoWoXedKrtylUCiHL0JMXXZxQS2gsLzadkGR9lus1Lv...,INTRO,menu_1,1,1,MouseENT,2024-02-26 19:43:19,INTRO_MouseENT_1,0,0,...,,,0.0,0.0,6,,0,0,0,0
4,HnFuoWoXedKrtylUCiHL0JMXXZxQS2gsLzadkGR9lus1Lv...,INTRO,menu_1,1,1,MouseOUT,2024-02-26 19:43:19,INTRO_MouseOUT_1,0,0,...,,,0.0,0.0,6,,0,0,0,0
5,HnFuoWoXedKrtylUCiHL0JMXXZxQS2gsLzadkGR9lus1Lv...,INTRO,menu_1,1,1,MouseIN,2024-02-26 19:43:20,INTRO_MouseIN_1,0,0,...,,,0.0,0.0,6,,0,0,0,0
6,hCZyMIgoJw6QSotWnCSMkZNMWyIgmU0W3HtDlqgbkqiETB...,INTRO,menu_1,1,0,PageIN,2024-03-23 22:24:25,INTRO_PageIN_0,0,0,...,,,0.0,0.0,6,,0,0,0,0
7,hCZyMIgoJw6QSotWnCSMkZNMWyIgmU0W3HtDlqgbkqiETB...,INTRO,menu_1,1,0,PageOUT,2024-03-23 22:24:29,INTRO_PageOUT_0,0,0,...,,,0.0,0.0,6,,0,0,0,0
8,hCZyMIgoJw6QSotWnCSMkZNMWyIgmU0W3HtDlqgbkqiETB...,PROG,menu_1,2,0,PageIN,2024-03-23 22:24:30,PROG_PageIN_0,0,0,...,,,0.0,0.0,6,,0,0,0,0
9,hCZyMIgoJw6QSotWnCSMkZNMWyIgmU0W3HtDlqgbkqiETB...,PROG,menu_1,2,0,PageOUT,2024-03-23 22:24:36,PROG_PageOUT_0,0,0,...,,,0.0,0.0,6,,0,0,0,0


## Saving updated event log

In [154]:
print(">> Saving updated event log")
print("Level (PARA / PAGE):", level_input)
file_name = Path(log_file_name).stem
path_out = Path(log_dir) / f"{file_name}_terziles.csv"
print("Saving event log to:", path_out)
df_log.to_csv(path_out, sep=csv_sep, index=False)

>> Saving updated event log
Level (PARA / PAGE): PARA
Saving event log to: data_log/edu_event_log_PARA_raw_filtered_terziles.csv


## Creating a stats file with the attrbutes of the event-log for every CaseID 

In [155]:
# First, add 'TotalTimeMM' to the original DataFrame by converting 'TotalTimeHH' to minutes
df_log['TotalTimeMM'] = round(df_log['TotalTimeHH'] * 60,2)

# Grou by
attributes_log_df = df_log.groupby('sessionID').agg({
    'Class': 'first',
    'TotalTimeHH': 'first',
    'TotalTimeMM': 'first',
    'CaseLength': 'first',
    'SUS': 'first',
    'Apprendimento percepito': 'first',
    'UEQ - Pragmatic': 'first',
    'UEQ - Hedonic': 'first',
    'UEQ - Overall': 'first',
    'SUS_Tercile': 'first',
    'Apprendimento percepito_Tercile': 'first',
    'UEQ - Overall_Tercile': 'first',
    'QuizSessionCount': 'first',
    'QuizAnswerCorrectTotal': 'first',
    'QuizAnswerWrongTotal': 'first',
    'QuizAnswerCorrectRatioOverCount': 'first',
    'QuizAnswerCorrectRatioOverAll': 'first',
    'QuizAnswerCorrectRatioOverAll_Tercile': 'first',
    'menu': lambda x: list(set(x)),  # Creating a list of unique menu values per sessionID,
    'pageTitle': lambda x: list(set(x))
}).reset_index()

In [156]:
attributes_log_df.head(5)

Unnamed: 0,sessionID,Class,TotalTimeHH,TotalTimeMM,CaseLength,SUS,Apprendimento percepito,UEQ - Pragmatic,UEQ - Hedonic,UEQ - Overall,...,Apprendimento percepito_Tercile,UEQ - Overall_Tercile,QuizSessionCount,QuizAnswerCorrectTotal,QuizAnswerWrongTotal,QuizAnswerCorrectRatioOverCount,QuizAnswerCorrectRatioOverAll,QuizAnswerCorrectRatioOverAll_Tercile,menu,pageTitle
0,04aGqNhteyVxO5pHJZTEyMI5qKNw6QqnCx2TEFm4l3SCuA...,,0.49,29.4,115,,,,,,...,0,0,10,5,5,0.5,0.5,1,"[menu_1, menu_4]","[PROG, DICTS, FUNCT, FOR-Q, LISTS, DICTS-Q, PR..."
1,05aSDW4yUKJoCBrk0SrL7SG8le7ygDBtOVVALTrAAk5eUu...,SAA,0.13,7.8,16,,,,,,...,0,0,0,0,0,,,0,"[menu_1, menu_4]","[FUNCT, INTRO, SURVEY-START]"
2,06Y6g4YU1kgv9kNoKw9EaPATAkDc8rG3ZloaE3QWgGWEzp...,,0.08,4.8,10,,,,,,...,0,0,2,2,0,1.0,0.2,1,[menu_1],"[PROG, PROG-Q, INTRO-Q, VARS, INTRO]"
3,09OiPxDbTHeH0nnJo4g3Fa3JbxGjPcWJGbSK3YTZ2ZoFHA...,,2.01,120.6,80,50.0,5.0,1.25,-0.667,0.429,...,3,2,10,9,1,0.9,0.9,3,"[menu_1, menu_4]","[PROG, DICTS, FUNCT, FOR-Q, LISTS, DICTS-Q, PR..."
4,0HcsM5K14bTga4CpYETnQuMBKMrDBCQgHeGk48sRul6Pha...,SMCN2,0.65,39.0,50,42.5,3.0,-1.25,-0.75,-1.0,...,1,1,10,6,4,0.6,0.6,2,"[menu_2, menu_1]","[PROG, DICTS, FUNCT, FOR-Q, LISTS, DICTS-Q, PR..."


In [157]:
# Saving event log attributes
print(">> Saving event log attributes")
file_name = Path(log_file_name).stem
path_out = Path(stats_dir) / f"_{file_name}_stats_attributes.csv"
print("Saving event log attributes to:", path_out)
attributes_log_df.to_csv(path_out, sep=csv_sep, index=False)

>> Saving event log attributes
Saving event log attributes to: stats/_edu_event_log_PARA_raw_filtered_stats_attributes.csv
