In [1]:
import pandas as pd
from pm4py.objects.conversion.log import converter as log_converter
from pm4py.objects.log.importer.xes import importer as xes_importer

import utils

READ_PARQUET = True  # faster, but have to create first

In [2]:
if READ_PARQUET:
    print("Reading parquet.")
    df = utils.load_parquet()
else:
    print("Reading csv and savings as parquet.")
    df = utils.load_csv()
    utils.save_parquet(df)

Reading parquet.


In [3]:
df.head(1)

Unnamed: 0,Case ID,Activity,Resource,Complete Timestamp,Variant,Variant index,(case) amount_applied0,(case) amount_applied1,(case) amount_applied2,(case) amount_applied3,...,(case) young farmer,activity,concept:name,docid,doctype,eventid,lifecycle:transition,note,subprocess,success
0,8b99873a6136cfa6,Payment application-Application-mail income,0;n/a,2015/05/08 00:00:00.000,Variant 1832,1832,960.35,,,,...,False,mail income,mail income,-18008611495569447,Payment application,,complete,none,Application,True


In [4]:
first_id = df.iloc[0]['Case ID']
first_id

'8b99873a6136cfa6'

In [5]:
sample_df = df[df['Case ID'] == first_id]
print(len(sample_df))
sample_df.head(1)

52


Unnamed: 0,Case ID,Activity,Resource,Complete Timestamp,Variant,Variant index,(case) amount_applied0,(case) amount_applied1,(case) amount_applied2,(case) amount_applied3,...,(case) young farmer,activity,concept:name,docid,doctype,eventid,lifecycle:transition,note,subprocess,success
0,8b99873a6136cfa6,Payment application-Application-mail income,0;n/a,2015/05/08 00:00:00.000,Variant 1832,1832,960.35,,,,...,False,mail income,mail income,-18008611495569447,Payment application,,complete,none,Application,True


In [6]:
# Can change some parameters about default columns etc and use these as input for the log converter
parameters = {
    log_converter.Variants.TO_EVENT_LOG.value.Parameters.CASE_ID_KEY: 'Case ID',
    log_converter.Variants.TO_EVENT_LOG.value.Parameters.CASE_ATTRIBUTE_PREFIX: '(case) ',
}
log = log_converter.apply(sample_df, parameters=parameters, variant=log_converter.Variants.TO_EVENT_LOG)

In [7]:
# first trace
log[0].attributes

{'amount_applied0': 960.35,
 'amount_applied1': nan,
 'amount_applied2': nan,
 'amount_applied3': nan,
 'applicant': 'b3b1bafcf8a5c359',
 'application': '8b99873a6136cfa6',
 'area': 2.6994,
 'basic payment': True,
 'cross_compliance': 0.0,
 'department': 'e7',
 'greening': True,
 'number_parcels': 3,
 'payment_actual0': 960.35,
 'payment_actual1': nan,
 'payment_actual2': nan,
 'payment_actual3': nan,
 'penalty_ABP': False,
 'penalty_AGP': False,
 'penalty_AJLP': False,
 'penalty_AUVP': False,
 'penalty_AVBP': False,
 'penalty_AVGP': False,
 'penalty_AVJLP': False,
 'penalty_AVUVP': False,
 'penalty_B16': False,
 'penalty_B2': False,
 'penalty_B3': False,
 'penalty_B4': False,
 'penalty_B5': False,
 'penalty_B5F': False,
 'penalty_B6': False,
 'penalty_BGK': False,
 'penalty_BGKV': False,
 'penalty_BGP': False,
 'penalty_C16': False,
 'penalty_C4': False,
 'penalty_C9': False,
 'penalty_CC': False,
 'penalty_GP1': False,
 'penalty_JLP1': False,
 'penalty_JLP2': False,
 'penalty_JLP3': 

In [8]:
# first trace from Rowan
dictionary = {'young farmer': False,
 'selected_random': False,
 'penalty_AJLP': False,
 'application': '8b99873a6136cfa6',
 'penalty_amount0': 0.0,
 'program-id': '215',
 'penalty_BGKV': False,
 'penalty_AUVP': False,
 'applicant': 'b3b1bafcf8a5c359',
 'risk_factor': 1.0,
 'small farmer': True,
 'penalty_BGP': False,
 'department': 'e7',
 'penalty_C16': False,
 'penalty_BGK': False,
 'penalty_AVUVP': False,
 'penalty_CC': False,
 'penalty_AVJLP': False,
 'penalty_C9': False,
 'cross_compliance': 0.0,
 'rejected': False,
 'greening': True,
 'penalty_C4': False,
 'penalty_AVGP': False,
 'penalty_ABP': False,
 'penalty_B6': False,
 'penalty_B4': False,
 'penalty_B5': False,
 'penalty_AVBP': False,
 'penalty_B2': False,
 'selected_risk': False,
 'penalty_B3': False,
 'area': 2.6994,
 'selected_manually': False,
 'penalty_AGP': False,
 'penalty_B16': False,
 'penalty_GP1': False,
 'basic payment': True,
 'penalty_B5F': False,
 'penalty_V5': False,
 'payment_actual0': 960.35,
 'identity:id': 'B15FB890-574E-4117-B66A-BDFBC4FCAA6E',
 'amount_applied0': 960.35,
 'redistribution': True,
 'penalty_JLP6': False,
 'penalty_JLP7': False,
 'year': '2015',
 'penalty_JLP5': False,
 'penalty_JLP2': False,
 'penalty_JLP3': False,
 'number_parcels': 3,
 'penalty_JLP1': False,
 'concept:name': '8b99873a6136cfa6'}

In [9]:
# check what is present in my log but not in Rowans
for key in log[0].attributes.keys():
    if key not in dictionary.keys():
        print(key)

amount_applied1
amount_applied2
amount_applied3
payment_actual1
payment_actual2
payment_actual3
penalty_amount1
penalty_amount2
penalty_amount3


In [10]:
# check what is present in Rowan's logs but not in mine
for key in dictionary.keys():
    if key not in log[0].attributes.keys():
        print(key)

identity:id


In [12]:
utils.save_log(log)