# Objective Binge Classifier #

Import dependencies

In [None]:
!pip install openpyxl
import pandas as pd
import json

Set paths and columns of response document

In [None]:
path_in = '../resources/data_ignored/Onderzoeksassistente/merged_unfiltered.xlsx'
df = pd.read_excel(path_in)

col_name_post_instance_key = 'POST_instance_key'
col_name_post_loc = 'POST_LOC' # Self-reported Loss Of Control
col_name_pre_type = 'PRE_TYPE'
col_post_sub_binge = 'POST_SUB_BINGE'

text_export = False # is data exported in text representation?

# 0 no
# 1 yes
# 2 na
loc_true = 1 # Self-reported Loss Of Control required value

# 0 no
# 1 yes
# 2 na
sub_binge_true = 1 # Subjective Binge required value

# 1 ontbijt
# 2 lunch
# 3 diner
# 4 anders
pre_type_category_other = ['Anders']
if not text_export:
    pre_type_category_other = [4]


columns_type_counts = df[col_name_pre_type].value_counts()
print('counts', columns_type_counts)

Set paths and columns of config document which included portion sizes

In [None]:
path_config = '../resources/config/binge_config.xlsx'

df_config = pd.read_excel(path_config)
print(df_config.columns)

Set paths and columns for result document

In [None]:
col_objective_binge = 'objective_binge'  
col_subjective_binge = 'subjective_binge' 
col_no_loc_no_sub_binge_verify = 'no_loc_no_sub_binge_verify'
col_loc_verify = 'loc_verify'
col_no_binge = 'no_binge'
col_binge_intake = 'binge_intake'
path_out = '../resources/data_ignored/Onderzoeksassistente/report.merged-binge.xlsx'
path_out_raters = '../resources/data_ignored/Onderzoeksassistente/report.merged-binge-raters.xlsx'

Classification methods

In [None]:
def isNaN(num):
    return num != num

def get_binge_value(row, category_number):
    if row[col_name_pre_type] in pre_type_category_other:
        col_name_bing_int = 'other_int'
    else:
        col_name_bing_int = 'meals_int'
    return df_config.loc[df_config['#'] == category_number, col_name_bing_int].item()

def objective_binge(row):
    if isNaN(row[col_name_post_instance_key]):
        return "IGNORE_POST_INSTANCE_MISSING"
    if row[col_name_post_loc] != loc_true:
        return "IGNORE_POST_LOC_FALSE"
    if row[col_post_sub_binge] != sub_binge_true:
        return "IGNORE_POST_SUB_BINGE_FALSE"
    for x in range(24):
        category_number = x + 1
        col_name_post_intake_por = 'POST_INTAKE_POR_'+str(category_number)
        if not isNaN(row[col_name_post_intake_por]):
            value = row[col_name_post_intake_por]
            binge_value = get_binge_value(row, category_number)
            if value > binge_value:
                return True
            else:
                return False
    return "NO_PORTION"

def subjective_binge(row):
    if isNaN(row[col_name_post_instance_key]):
        return "IGNORE_POST_INSTANCE_MISSING"
    if row[col_name_post_loc] != loc_true:
        return "IGNORE_POST_LOC_FALSE"
    if row[col_post_sub_binge] != sub_binge_true:
        return "IGNORE_POST_SUB_BINGE_FALSE"
    return True

def no_loc_no_sub_binge_verify(row):
    if isNaN(row[col_name_post_instance_key]):
        return "IGNORE_POST_INSTANCE_MISSING"
    if row[col_name_post_loc] == loc_true:
        return "IGNORE_POST_LOC_TRUE"
    if row[col_post_sub_binge] == sub_binge_true:
        return "IGNORE_POST_SUB_BINGE_TRUE"
    for x in range(24):
        category_number = x + 1
        col_name_post_intake_por = 'POST_INTAKE_POR_'+str(category_number)
        if not isNaN(row[col_name_post_intake_por]):
            value = row[col_name_post_intake_por]
            binge_value = get_binge_value(row, category_number)
            if value > binge_value:
                return True
            else:
                return False
    return "NO_PORTION"

def loc_binge_verify(row):
    if isNaN(row[col_name_post_instance_key]):
        return "IGNORE_POST_INSTANCE_MISSING"
    if row[col_name_post_loc] != loc_true:
        return "IGNORE_POST_LOC_FALSE"
    for x in range(24):
        category_number = x + 1
        col_name_post_intake_por = 'POST_INTAKE_POR_'+str(category_number)
        if not isNaN(row[col_name_post_intake_por]):
            value = row[col_name_post_intake_por]
            binge_value = get_binge_value(row, category_number)
            if value > binge_value:
                return True
            else:
                return False
    return "NO_PORTION"

def no_binge(row):
    if isNaN(row[col_name_post_instance_key]):
        return "IGNORE_POST_INSTANCE_MISSING"
    if row[col_name_post_loc] == loc_true:
        return False
    if row[col_post_sub_binge] == sub_binge_true:
        return False
    return True

def get_binge_intake(row):
    intake_list = []
    for x in range(24):
        category_number = x + 1
        col_name_post_intake_por = 'POST_INTAKE_POR_'+str(category_number)
        if not isNaN(row[col_name_post_intake_por]):
            value = row[col_name_post_intake_por]
            binge_value = get_binge_value(row, category_number)
            if value > binge_value:
                intake = { "category_number": category_number, "intake_por": binge_value }
                intake_list.append(intake)
            else:
                continue
    return json.dumps(intake_list)

Preform classification and write results

In [None]:
df[col_objective_binge] = df.apply(objective_binge, axis=1)
df[col_subjective_binge] = df.apply(subjective_binge, axis=1)
df[col_loc_verify] = df.apply(loc_binge_verify, axis=1)
df[col_no_loc_no_sub_binge_verify] = df.apply(no_loc_no_sub_binge_verify, axis=1)
df[col_no_binge] = df.apply(no_binge, axis=1)
df[col_binge_intake] = df.apply(get_binge_intake, axis=1)



col_objective_binge_counts = df[col_objective_binge].value_counts()
print('total PRE entries', len(df.index))
print(col_objective_binge) 
print(col_objective_binge_counts)

print("---")
col_loc_verify_counts = df[col_loc_verify].value_counts()
print(col_loc_verify) 
print(col_loc_verify_counts)

print("---")
col_no_loc_no_sub_binge_verify_counts = df[col_no_loc_no_sub_binge_verify].value_counts()
print(col_no_loc_no_sub_binge_verify) 
print(col_no_loc_no_sub_binge_verify_counts)


Prepare dataset for labelling of objective binges and normal eating moments

In [None]:
objective_df = df[df[col_objective_binge] == True]
no_binge = df[df[col_no_binge] == True]

obj_binge_count = len(objective_df.index)
print('objective binge entries', obj_binge_count)
print('no binge', len(no_binge.index))

no_binge_sample = no_binge.sample(n=obj_binge_count) #equal sample size

df_to_label = pd.concat([objective_df, no_binge_sample])
df_to_label = df_to_label[['instance_key_x','PRE_COGN']].sample(frac=1) #shuffle                   


Write dataframes to excel

In [None]:
print('writing result')
df.to_excel(path_out)
df_to_label.to_excel(path_out_raters)
print('done!')