# Objective Binge Classifier #

Import dependencies

In [1]:
!pip install openpyxl
import pandas as pd



Set paths and columns of response document

In [2]:
path_in = '../resources/data_ignored/report.merged-vj-unfiltered.xlsx'
df = pd.read_excel(path_in)

col_name_post_instance_key = 'POST_instance_key'
col_name_post_loc = 'POST_LOC' # Self-reported Loss Of Control
col_name_pre_type = 'PRE_TYPE'
col_post_sub_binge = 'POST_SUB_BINGE'

loc_true = 1 # Self-reported Loss Of Control required value
sub_binge_true = 1 # Subjective Binge required value
pre_type_category_other = ['Anders']

columns_type_counts = df[col_name_pre_type].value_counts()
print('counts', columns_type_counts)

counts Anders     572
Ontbijt    442
Diner      437
Lunch      412
Name: PRE_TYPE, dtype: int64


Set paths and columns of config document

In [3]:
path_config = '../resources/config/binge_config.xlsx'

df_config = pd.read_excel(path_config)
print(df_config.columns)

Index(['#', 'Category', '1 portion refers to', 'Std “meals’', 'Std “other”',
       'meals_int', 'other_int'],
      dtype='object')


Set paths and columns for result document

In [4]:
column_objective_binge = 'objective_binge'  
col_no_loc_no_sub_binge_verify = 'no_loc_no_sub_binge_verify'
path_out = '../resources/data_ignored/report.merged-vj-binge.xlsx'

Classification methods

In [5]:
def isNaN(num):
    return num != num

def get_binge_value(row, category_number):
    if row[col_name_pre_type] in pre_type_category_other:
        col_name_bing_int = 'other_int'
    else:
        col_name_bing_int = 'meals_int'
    return df_config.loc[df_config['#'] == category_number, col_name_bing_int].item()

def objective_binge(row):
    if isNaN(row[col_name_post_instance_key]):
        return "IGNORE_POST_INSTANCE_MISSING"
    if row[col_name_post_loc] != loc_true:
        return "IGNORE_POST_LOC_FALSE"
    if row[col_post_sub_binge] != sub_binge_true:
        return "IGNORE_POST_SUB_BINGE_FALSE"
    for x in range(24):
        category_number = x + 1
        col_name_post_intake_por = 'POST_INTAKE_POR_'+str(category_number)
        if not isNaN(row[col_name_post_intake_por]):
            value = row[col_name_post_intake_por]
            binge_value = get_binge_value(row, category_number)
            if value > binge_value:
                return True
            else:
                return False
    return "NO_PORTION"

def no_loc_no_sub_binge_verify(row):
    if isNaN(row[col_name_post_instance_key]):
        return "IGNORE_POST_INSTANCE_MISSING"
    if row[col_name_post_loc] == loc_true:
        return "IGNORE_POST_LOC_TRUE"
    if row[col_post_sub_binge] == sub_binge_true:
        return "IGNORE_POST_SUB_BINGE_TRUE"
    for x in range(24):
        category_number = x + 1
        col_name_post_intake_por = 'POST_INTAKE_POR_'+str(category_number)
        if not isNaN(row[col_name_post_intake_por]):
            value = row[col_name_post_intake_por]
            binge_value = get_binge_value(row, category_number)
            if value > binge_value:
                return True
            else:
                return False
    return "NO_PORTION"


Preform classification and write results

In [None]:
df[column_objective_binge] = df.apply(objective_binge, axis=1)
df[col_no_loc_no_sub_binge_verify] = df.apply(no_loc_no_sub_binge_verify, axis=1)

column_objective_binge_counts = df[column_objective_binge].value_counts()
print('total PRE entries', len(df.index))
print(column_objective_binge) 
print(column_objective_binge_counts)

col_no_loc_no_sub_binge_verify_counts = df[col_no_loc_no_sub_binge_verify].value_counts()
print(col_no_loc_no_sub_binge_verify) 
print(col_no_loc_no_sub_binge_verify_counts)

print('writing result')
df.to_excel(path_out)
print('done!')
