In [8]:
import pandas as pd
import random

# To display flow using graphviz
from IPython.display import Image
from graphviz import Digraph


from datetime import datetime, timedelta

In [9]:
flow_data = pd.read_excel('model_in_table.xlsx')

In [10]:
flow_data

Unnamed: 0,Activity,Next Activity,Probability
0,Start,Activity A,1.0
1,Activity A,Activity B,1.0
2,Activity B,Activity C,1.0
3,Activity C,Activity D,0.3
4,Activity C,Activity E,0.7
5,Activity D,Activity E,0.5
6,Activity D,Activity A,0.5


In [4]:
def generate_cases(data):
    
    """
    generate an event log returning cases and activity sequences in a dataframe
    """
    raws = []

    for i in range(CASE_COUNT):
        CaseID = f'C-{i}'
        activity = data.at[0,'Activity']
        raws.append({'CaseID': CaseID, 'ActivityOrder': 1 , 'Activity' : activity})

        for j in range(50):

            next_activity_probailities = data[data['Activity']==activity]

            if not len(next_activity_probailities)>0:
                raws.append({'CaseID': CaseID, 'ActivityOrder': j+2 , 'Activity' : 'End'})
                
                break 

            next_activity = next_activity_probailities.sample(weights=next_activity_probailities['Probability']).iloc[0]['Next Activity']
            raws.append({'CaseID': CaseID, 'ActivityOrder': j+2 , 'Activity' : next_activity})
            activity = next_activity
    
    sequence_log = pd.DataFrame(raws)
    return sequence_log

In [5]:
def flow_chart(flow_data):
    """
    this function create a flowchart using the instructions in flow_data table which include 3 columns "Activity",
    "Next Activity", "probability"
    """
    # Create Digraph object
    flow_chart = Digraph('ProcessFlow', filename='process_flow.gv', format='png')

    # Iterate through DataFrame rows to add nodes and edges
    for _, row in flow_data.iterrows():
        from_activity = row['Activity']
        to_activity = row['Next Activity']
        probability = row['Probability']

        flow_chart.node(from_activity)
        flow_chart.node(to_activity)
        flow_chart.edge(from_activity, to_activity, label=f'Probability: {probability}')

    # Save and render the flowchart
    flow_chart.render(view=False)  # Set view=False to avoid the message

    return Image(filename='process_flow.gv.png')

### Add timestamp to start

In [107]:
def generate_cases_w_starttimestamp(data):
    
    """
    generate an event log returning cases and activity sequences in a dataframe
    """
    
    start_date = datetime(2023, 1, 1, 9, 0, 0)  # January 1, 2023, 09:00:00
    end_date = datetime(2023, 1, 4, 0, 0, 0)    # January 2, 2023, 00:00:00

    start_hour = 9 # i.e. 9am
    end_hour = 17 # i.e. 5pm

    case_interval = 180 #minues

    interval = timedelta(minutes=case_interval)

    raws = []
    
    timestamp = 'test'

    while start_date < end_date:
        if start_date.weekday() < 5:  # Check if it's a weekday (0: Monday, 1: Tuesday, ..., 4: Friday)
            if start_date.hour >= 9 and start_date.hour < 17:
                timestamp = start_date.strftime("%d-%m-%Y %H:%M:%S")
                
                # create i to number the generated cases
                i=1
                CaseID = f'C-{i}'
                activity = data.at[0,'Activity']
                raws.append({'CaseID': CaseID, 'ActivityOrder': 1 , 'Activity' : activity, 'AssignedTimeStamp':timestamp })
                i+=1

                for j in range(50):

                    next_activity_probailities = data[data['Activity']==activity]

                    if not len(next_activity_probailities)>0:
                        raws.append({'CaseID': CaseID, 'ActivityOrder': j+2 , 'Activity' : 'End'})

                        break 

                    next_activity = next_activity_probailities.sample(weights=next_activity_probailities['Probability']).iloc[0]['Next Activity']
                    raws.append({'CaseID': CaseID, 'ActivityOrder': j+2 , 'Activity' : next_activity})
                    activity = next_activity
    
        start_date += interval
    
    sequence_log = pd.DataFrame(raws)
    sequence_log['StartTimeStamp'] = None
    sequence_log['EndTimeStamp'] = None
    
    return sequence_log

In [108]:
generate_cases_w_starttimestamp(flow_data)

Unnamed: 0,CaseID,ActivityOrder,Activity,AssignedTimeStamp,StartTimeStamp,EndTimeStamp
0,C-1,1,Start,02-01-2023 09:00:00,,
1,C-1,2,Activity A,,,
2,C-1,3,Activity B,,,
3,C-1,4,Activity C,,,
4,C-1,5,Activity E,,,
5,C-1,6,End,,,
6,C-1,1,Start,02-01-2023 12:00:00,,
7,C-1,2,Activity A,,,
8,C-1,3,Activity B,,,
9,C-1,4,Activity C,,,


In [104]:
output = generate_cases_w_starttimestamp(flow_data)

In [105]:
output[output['Activity']=='Start']

Unnamed: 0,CaseID,ActivityOrder,Activity,AssignedTimeStamp
0,C-1,1,Start,02-01-2023 09:00:00
6,C-1,1,Start,02-01-2023 12:00:00
12,C-1,1,Start,02-01-2023 15:00:00
18,C-1,1,Start,03-01-2023 09:00:00
24,C-1,1,Start,03-01-2023 12:00:00
31,C-1,1,Start,03-01-2023 15:00:00
